diff options
Diffstat (limited to 'drivers/net/ethernet')
638 files changed, 54504 insertions, 12072 deletions
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 53e1f7e07959..96cc5fc36eb5 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -1051,6 +1051,7 @@ el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_link_ksettings *cmd) break; case 3: cmd->base.port = PORT_BNC; + break; default: break; } diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index ad04660b97b8..1cdff1dca790 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -19,6 +19,7 @@ config SUNGEM_PHY tristate source "drivers/net/ethernet/3com/Kconfig" +source "drivers/net/ethernet/actions/Kconfig" source "drivers/net/ethernet/adaptec/Kconfig" source "drivers/net/ethernet/aeroflex/Kconfig" source "drivers/net/ethernet/agere/Kconfig" @@ -81,6 +82,7 @@ source "drivers/net/ethernet/huawei/Kconfig" source "drivers/net/ethernet/i825xx/Kconfig" source "drivers/net/ethernet/ibm/Kconfig" source "drivers/net/ethernet/intel/Kconfig" +source "drivers/net/ethernet/microsoft/Kconfig" source "drivers/net/ethernet/xscale/Kconfig" config JME @@ -97,7 +99,8 @@ config JME config KORINA tristate "Korina (IDT RC32434) Ethernet support" - depends on MIKROTIK_RB532 + depends on MIKROTIK_RB532 || COMPILE_TEST + select MII help If you have a Mikrotik RouterBoard 500 or IDT RC32434 based system say Y. Otherwise say N. diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 1e7dc8a7762d..cb3f9084a21b 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_NET_VENDOR_3COM) += 3com/ obj-$(CONFIG_NET_VENDOR_8390) += 8390/ +obj-$(CONFIG_NET_VENDOR_ACTIONS) += actions/ obj-$(CONFIG_NET_VENDOR_ADAPTEC) += adaptec/ obj-$(CONFIG_GRETH) += aeroflex/ obj-$(CONFIG_NET_VENDOR_AGERE) += agere/ @@ -44,6 +45,7 @@ obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/ obj-$(CONFIG_NET_VENDOR_IBM) += ibm/ obj-$(CONFIG_NET_VENDOR_INTEL) += intel/ obj-$(CONFIG_NET_VENDOR_I825XX) += i825xx/ +obj-$(CONFIG_NET_VENDOR_MICROSOFT) += microsoft/ obj-$(CONFIG_NET_VENDOR_XSCALE) += xscale/ obj-$(CONFIG_JME) += jme.o obj-$(CONFIG_KORINA) += korina.o diff --git a/drivers/net/ethernet/actions/Kconfig b/drivers/net/ethernet/actions/Kconfig new file mode 100644 index 000000000000..ccad6a3f4d6f --- /dev/null +++ b/drivers/net/ethernet/actions/Kconfig @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config NET_VENDOR_ACTIONS + bool "Actions Semi devices" + default y + depends on ARCH_ACTIONS + help + If you have a network (Ethernet) card belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all the + questions about Actions Semi devices. If you say Y, you will be + asked for your specific card in the following questions. + +if NET_VENDOR_ACTIONS + +config OWL_EMAC + tristate "Actions Semi Owl Ethernet MAC support" + select PHYLIB + help + This driver supports the Actions Semi Ethernet Media Access + Controller (EMAC) found on the S500 and S900 SoCs. The controller + is compliant with the IEEE 802.3 CSMA/CD standard and supports + both half-duplex and full-duplex operation modes at 10/100 Mb/s. + +endif # NET_VENDOR_ACTIONS diff --git a/drivers/net/ethernet/actions/Makefile b/drivers/net/ethernet/actions/Makefile new file mode 100644 index 000000000000..fde8001d538a --- /dev/null +++ b/drivers/net/ethernet/actions/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the Actions Semi Owl SoCs built-in ethernet macs +# + +obj-$(CONFIG_OWL_EMAC) += owl-emac.o diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c new file mode 100644 index 000000000000..b8e771c2bc40 --- /dev/null +++ b/drivers/net/ethernet/actions/owl-emac.c @@ -0,0 +1,1625 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Actions Semi Owl SoCs Ethernet MAC driver + * + * Copyright (c) 2012 Actions Semi Inc. + * Copyright (c) 2021 Cristian Ciocaltea <cristian.ciocaltea@gmail.com> + */ + +#include <linux/circ_buf.h> +#include <linux/clk.h> +#include <linux/dma-mapping.h> +#include <linux/etherdevice.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> +#include <linux/platform_device.h> +#include <linux/pm.h> +#include <linux/reset.h> + +#include "owl-emac.h" + +#define OWL_EMAC_DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | \ + NETIF_MSG_PROBE | \ + NETIF_MSG_LINK) + +static u32 owl_emac_reg_read(struct owl_emac_priv *priv, u32 reg) +{ + return readl(priv->base + reg); +} + +static void owl_emac_reg_write(struct owl_emac_priv *priv, u32 reg, u32 data) +{ + writel(data, priv->base + reg); +} + +static u32 owl_emac_reg_update(struct owl_emac_priv *priv, + u32 reg, u32 mask, u32 val) +{ + u32 data, old_val; + + data = owl_emac_reg_read(priv, reg); + old_val = data & mask; + + data &= ~mask; + data |= val & mask; + + owl_emac_reg_write(priv, reg, data); + + return old_val; +} + +static void owl_emac_reg_set(struct owl_emac_priv *priv, u32 reg, u32 bits) +{ + owl_emac_reg_update(priv, reg, bits, bits); +} + +static void owl_emac_reg_clear(struct owl_emac_priv *priv, u32 reg, u32 bits) +{ + owl_emac_reg_update(priv, reg, bits, 0); +} + +static struct device *owl_emac_get_dev(struct owl_emac_priv *priv) +{ + return priv->netdev->dev.parent; +} + +static void owl_emac_irq_enable(struct owl_emac_priv *priv) +{ + /* Enable all interrupts except TU. + * + * Note the NIE and AIE bits shall also be set in order to actually + * enable the selected interrupts. + */ + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR7, + OWL_EMAC_BIT_MAC_CSR7_NIE | + OWL_EMAC_BIT_MAC_CSR7_AIE | + OWL_EMAC_BIT_MAC_CSR7_ALL_NOT_TUE); +} + +static void owl_emac_irq_disable(struct owl_emac_priv *priv) +{ + /* Disable all interrupts. + * + * WARNING: Unset only the NIE and AIE bits in CSR7 to workaround an + * unexpected side effect (MAC hardware bug?!) where some bits in the + * status register (CSR5) are cleared automatically before being able + * to read them via owl_emac_irq_clear(). + */ + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR7, + OWL_EMAC_BIT_MAC_CSR7_ALL_NOT_TUE); +} + +static u32 owl_emac_irq_status(struct owl_emac_priv *priv) +{ + return owl_emac_reg_read(priv, OWL_EMAC_REG_MAC_CSR5); +} + +static u32 owl_emac_irq_clear(struct owl_emac_priv *priv) +{ + u32 val = owl_emac_irq_status(priv); + + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR5, val); + + return val; +} + +static dma_addr_t owl_emac_dma_map_rx(struct owl_emac_priv *priv, + struct sk_buff *skb) +{ + struct device *dev = owl_emac_get_dev(priv); + + /* Buffer pointer for the RX DMA descriptor must be word aligned. */ + return dma_map_single(dev, skb_tail_pointer(skb), + skb_tailroom(skb), DMA_FROM_DEVICE); +} + +static void owl_emac_dma_unmap_rx(struct owl_emac_priv *priv, + struct sk_buff *skb, dma_addr_t dma_addr) +{ + struct device *dev = owl_emac_get_dev(priv); + + dma_unmap_single(dev, dma_addr, skb_tailroom(skb), DMA_FROM_DEVICE); +} + +static dma_addr_t owl_emac_dma_map_tx(struct owl_emac_priv *priv, + struct sk_buff *skb) +{ + struct device *dev = owl_emac_get_dev(priv); + + return dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); +} + +static void owl_emac_dma_unmap_tx(struct owl_emac_priv *priv, + struct sk_buff *skb, dma_addr_t dma_addr) +{ + struct device *dev = owl_emac_get_dev(priv); + + dma_unmap_single(dev, dma_addr, skb_headlen(skb), DMA_TO_DEVICE); +} + +static unsigned int owl_emac_ring_num_unused(struct owl_emac_ring *ring) +{ + return CIRC_SPACE(ring->head, ring->tail, ring->size); +} + +static unsigned int owl_emac_ring_get_next(struct owl_emac_ring *ring, + unsigned int cur) +{ + return (cur + 1) & (ring->size - 1); +} + +static void owl_emac_ring_push_head(struct owl_emac_ring *ring) +{ + ring->head = owl_emac_ring_get_next(ring, ring->head); +} + +static void owl_emac_ring_pop_tail(struct owl_emac_ring *ring) +{ + ring->tail = owl_emac_ring_get_next(ring, ring->tail); +} + +static struct sk_buff *owl_emac_alloc_skb(struct net_device *netdev) +{ + struct sk_buff *skb; + int offset; + + skb = netdev_alloc_skb(netdev, OWL_EMAC_RX_FRAME_MAX_LEN + + OWL_EMAC_SKB_RESERVE); + if (unlikely(!skb)) + return NULL; + + /* Ensure 4 bytes DMA alignment. */ + offset = ((uintptr_t)skb->data) & (OWL_EMAC_SKB_ALIGN - 1); + if (unlikely(offset)) + skb_reserve(skb, OWL_EMAC_SKB_ALIGN - offset); + + return skb; +} + +static int owl_emac_ring_prepare_rx(struct owl_emac_priv *priv) +{ + struct owl_emac_ring *ring = &priv->rx_ring; + struct device *dev = owl_emac_get_dev(priv); + struct net_device *netdev = priv->netdev; + struct owl_emac_ring_desc *desc; + struct sk_buff *skb; + dma_addr_t dma_addr; + int i; + + for (i = 0; i < ring->size; i++) { + skb = owl_emac_alloc_skb(netdev); + if (!skb) + return -ENOMEM; + + dma_addr = owl_emac_dma_map_rx(priv, skb); + if (dma_mapping_error(dev, dma_addr)) { + dev_kfree_skb(skb); + return -ENOMEM; + } + + desc = &ring->descs[i]; + desc->status = OWL_EMAC_BIT_RDES0_OWN; + desc->control = skb_tailroom(skb) & OWL_EMAC_MSK_RDES1_RBS1; + desc->buf_addr = dma_addr; + desc->reserved = 0; + + ring->skbs[i] = skb; + ring->skbs_dma[i] = dma_addr; + } + + desc->control |= OWL_EMAC_BIT_RDES1_RER; + + ring->head = 0; + ring->tail = 0; + + return 0; +} + +static void owl_emac_ring_prepare_tx(struct owl_emac_priv *priv) +{ + struct owl_emac_ring *ring = &priv->tx_ring; + struct owl_emac_ring_desc *desc; + int i; + + for (i = 0; i < ring->size; i++) { + desc = &ring->descs[i]; + + desc->status = 0; + desc->control = OWL_EMAC_BIT_TDES1_IC; + desc->buf_addr = 0; + desc->reserved = 0; + } + + desc->control |= OWL_EMAC_BIT_TDES1_TER; + + memset(ring->skbs_dma, 0, sizeof(dma_addr_t) * ring->size); + + ring->head = 0; + ring->tail = 0; +} + +static void owl_emac_ring_unprepare_rx(struct owl_emac_priv *priv) +{ + struct owl_emac_ring *ring = &priv->rx_ring; + int i; + + for (i = 0; i < ring->size; i++) { + ring->descs[i].status = 0; + + if (!ring->skbs_dma[i]) + continue; + + owl_emac_dma_unmap_rx(priv, ring->skbs[i], ring->skbs_dma[i]); + ring->skbs_dma[i] = 0; + + dev_kfree_skb(ring->skbs[i]); + ring->skbs[i] = NULL; + } +} + +static void owl_emac_ring_unprepare_tx(struct owl_emac_priv *priv) +{ + struct owl_emac_ring *ring = &priv->tx_ring; + int i; + + for (i = 0; i < ring->size; i++) { + ring->descs[i].status = 0; + + if (!ring->skbs_dma[i]) + continue; + + owl_emac_dma_unmap_tx(priv, ring->skbs[i], ring->skbs_dma[i]); + ring->skbs_dma[i] = 0; + + dev_kfree_skb(ring->skbs[i]); + ring->skbs[i] = NULL; + } +} + +static int owl_emac_ring_alloc(struct device *dev, struct owl_emac_ring *ring, + unsigned int size) +{ + ring->descs = dmam_alloc_coherent(dev, + sizeof(struct owl_emac_ring_desc) * size, + &ring->descs_dma, GFP_KERNEL); + if (!ring->descs) + return -ENOMEM; + + ring->skbs = devm_kcalloc(dev, size, sizeof(struct sk_buff *), + GFP_KERNEL); + if (!ring->skbs) + return -ENOMEM; + + ring->skbs_dma = devm_kcalloc(dev, size, sizeof(dma_addr_t), + GFP_KERNEL); + if (!ring->skbs_dma) + return -ENOMEM; + + ring->size = size; + + return 0; +} + +static void owl_emac_dma_cmd_resume_rx(struct owl_emac_priv *priv) +{ + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR2, + OWL_EMAC_VAL_MAC_CSR2_RPD); +} + +static void owl_emac_dma_cmd_resume_tx(struct owl_emac_priv *priv) +{ + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR1, + OWL_EMAC_VAL_MAC_CSR1_TPD); +} + +static u32 owl_emac_dma_cmd_set_tx(struct owl_emac_priv *priv, u32 status) +{ + return owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6, + OWL_EMAC_BIT_MAC_CSR6_ST, status); +} + +static u32 owl_emac_dma_cmd_start_tx(struct owl_emac_priv *priv) +{ + return owl_emac_dma_cmd_set_tx(priv, ~0); +} + +static u32 owl_emac_dma_cmd_set(struct owl_emac_priv *priv, u32 status) +{ + return owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6, + OWL_EMAC_MSK_MAC_CSR6_STSR, status); +} + +static u32 owl_emac_dma_cmd_start(struct owl_emac_priv *priv) +{ + return owl_emac_dma_cmd_set(priv, ~0); +} + +static u32 owl_emac_dma_cmd_stop(struct owl_emac_priv *priv) +{ + return owl_emac_dma_cmd_set(priv, 0); +} + +static void owl_emac_set_hw_mac_addr(struct net_device *netdev) +{ + struct owl_emac_priv *priv = netdev_priv(netdev); + u8 *mac_addr = netdev->dev_addr; + u32 addr_high, addr_low; + + addr_high = mac_addr[0] << 8 | mac_addr[1]; + addr_low = mac_addr[2] << 24 | mac_addr[3] << 16 | + mac_addr[4] << 8 | mac_addr[5]; + + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR17, addr_high); + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR16, addr_low); +} + +static void owl_emac_update_link_state(struct owl_emac_priv *priv) +{ + u32 val, status; + + if (priv->pause) { + val = OWL_EMAC_BIT_MAC_CSR20_FCE | OWL_EMAC_BIT_MAC_CSR20_TUE; + val |= OWL_EMAC_BIT_MAC_CSR20_TPE | OWL_EMAC_BIT_MAC_CSR20_RPE; + val |= OWL_EMAC_BIT_MAC_CSR20_BPE; + } else { + val = 0; + } + + /* Update flow control. */ + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR20, val); + + val = (priv->speed == SPEED_100) ? OWL_EMAC_VAL_MAC_CSR6_SPEED_100M : + OWL_EMAC_VAL_MAC_CSR6_SPEED_10M; + val <<= OWL_EMAC_OFF_MAC_CSR6_SPEED; + + if (priv->duplex == DUPLEX_FULL) + val |= OWL_EMAC_BIT_MAC_CSR6_FD; + + spin_lock_bh(&priv->lock); + + /* Temporarily stop DMA TX & RX. */ + status = owl_emac_dma_cmd_stop(priv); + + /* Update operation modes. */ + owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6, + OWL_EMAC_MSK_MAC_CSR6_SPEED | + OWL_EMAC_BIT_MAC_CSR6_FD, val); + + /* Restore DMA TX & RX status. */ + owl_emac_dma_cmd_set(priv, status); + + spin_unlock_bh(&priv->lock); +} + +static void owl_emac_adjust_link(struct net_device *netdev) +{ + struct owl_emac_priv *priv = netdev_priv(netdev); + struct phy_device *phydev = netdev->phydev; + bool state_changed = false; + + if (phydev->link) { + if (!priv->link) { + priv->link = phydev->link; + state_changed = true; + } + + if (priv->speed != phydev->speed) { + priv->speed = phydev->speed; + state_changed = true; + } + + if (priv->duplex != phydev->duplex) { + priv->duplex = phydev->duplex; + state_changed = true; + } + + if (priv->pause != phydev->pause) { + priv->pause = phydev->pause; + state_changed = true; + } + } else { + if (priv->link) { + priv->link = phydev->link; + state_changed = true; + } + } + + if (state_changed) { + if (phydev->link) + owl_emac_update_link_state(priv); + + if (netif_msg_link(priv)) + phy_print_status(phydev); + } +} + +static irqreturn_t owl_emac_handle_irq(int irq, void *data) +{ + struct net_device *netdev = data; + struct owl_emac_priv *priv = netdev_priv(netdev); + + if (netif_running(netdev)) { + owl_emac_irq_disable(priv); + napi_schedule(&priv->napi); + } + + return IRQ_HANDLED; +} + +static void owl_emac_ether_addr_push(u8 **dst, const u8 *src) +{ + u32 *a = (u32 *)(*dst); + const u16 *b = (const u16 *)src; + + a[0] = b[0]; + a[1] = b[1]; + a[2] = b[2]; + + *dst += 12; +} + +static void +owl_emac_setup_frame_prepare(struct owl_emac_priv *priv, struct sk_buff *skb) +{ + const u8 bcast_addr[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; + const u8 *mac_addr = priv->netdev->dev_addr; + u8 *frame; + int i; + + skb_put(skb, OWL_EMAC_SETUP_FRAME_LEN); + + frame = skb->data; + memset(frame, 0, skb->len); + + owl_emac_ether_addr_push(&frame, mac_addr); + owl_emac_ether_addr_push(&frame, bcast_addr); + + /* Fill multicast addresses. */ + WARN_ON(priv->mcaddr_list.count >= OWL_EMAC_MAX_MULTICAST_ADDRS); + for (i = 0; i < priv->mcaddr_list.count; i++) { + mac_addr = priv->mcaddr_list.addrs[i]; + owl_emac_ether_addr_push(&frame, mac_addr); + } +} + +/* The setup frame is a special descriptor which is used to provide physical + * addresses (i.e. mac, broadcast and multicast) to the MAC hardware for + * filtering purposes. To be recognized as a setup frame, the TDES1_SET bit + * must be set in the TX descriptor control field. + */ +static int owl_emac_setup_frame_xmit(struct owl_emac_priv *priv) +{ + struct owl_emac_ring *ring = &priv->tx_ring; + struct net_device *netdev = priv->netdev; + struct owl_emac_ring_desc *desc; + struct sk_buff *skb; + unsigned int tx_head; + u32 status, control; + dma_addr_t dma_addr; + int ret; + + skb = owl_emac_alloc_skb(netdev); + if (!skb) + return -ENOMEM; + + owl_emac_setup_frame_prepare(priv, skb); + + dma_addr = owl_emac_dma_map_tx(priv, skb); + if (dma_mapping_error(owl_emac_get_dev(priv), dma_addr)) { + ret = -ENOMEM; + goto err_free_skb; + } + + spin_lock_bh(&priv->lock); + + tx_head = ring->head; + desc = &ring->descs[tx_head]; + + status = READ_ONCE(desc->status); + control = READ_ONCE(desc->control); + dma_rmb(); /* Ensure data has been read before used. */ + + if (unlikely(status & OWL_EMAC_BIT_TDES0_OWN) || + !owl_emac_ring_num_unused(ring)) { + spin_unlock_bh(&priv->lock); + owl_emac_dma_unmap_tx(priv, skb, dma_addr); + ret = -EBUSY; + goto err_free_skb; + } + + ring->skbs[tx_head] = skb; + ring->skbs_dma[tx_head] = dma_addr; + + control &= OWL_EMAC_BIT_TDES1_IC | OWL_EMAC_BIT_TDES1_TER; /* Maintain bits */ + control |= OWL_EMAC_BIT_TDES1_SET; + control |= OWL_EMAC_MSK_TDES1_TBS1 & skb->len; + + WRITE_ONCE(desc->control, control); + WRITE_ONCE(desc->buf_addr, dma_addr); + dma_wmb(); /* Flush descriptor before changing ownership. */ + WRITE_ONCE(desc->status, OWL_EMAC_BIT_TDES0_OWN); + + owl_emac_ring_push_head(ring); + + /* Temporarily enable DMA TX. */ + status = owl_emac_dma_cmd_start_tx(priv); + + /* Trigger setup frame processing. */ + owl_emac_dma_cmd_resume_tx(priv); + + /* Restore DMA TX status. */ + owl_emac_dma_cmd_set_tx(priv, status); + + /* Stop regular TX until setup frame is processed. */ + netif_stop_queue(netdev); + + spin_unlock_bh(&priv->lock); + + return 0; + +err_free_skb: + dev_kfree_skb(skb); + return ret; +} + +static netdev_tx_t owl_emac_ndo_start_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct owl_emac_priv *priv = netdev_priv(netdev); + struct device *dev = owl_emac_get_dev(priv); + struct owl_emac_ring *ring = &priv->tx_ring; + struct owl_emac_ring_desc *desc; + unsigned int tx_head; + u32 status, control; + dma_addr_t dma_addr; + + dma_addr = owl_emac_dma_map_tx(priv, skb); + if (dma_mapping_error(dev, dma_addr)) { + dev_err_ratelimited(&netdev->dev, "TX DMA mapping failed\n"); + dev_kfree_skb(skb); + netdev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + + spin_lock_bh(&priv->lock); + + tx_head = ring->head; + desc = &ring->descs[tx_head]; + + status = READ_ONCE(desc->status); + control = READ_ONCE(desc->control); + dma_rmb(); /* Ensure data has been read before used. */ + + if (!owl_emac_ring_num_unused(ring) || + unlikely(status & OWL_EMAC_BIT_TDES0_OWN)) { + netif_stop_queue(netdev); + spin_unlock_bh(&priv->lock); + + dev_dbg_ratelimited(&netdev->dev, "TX buffer full, status=0x%08x\n", + owl_emac_irq_status(priv)); + owl_emac_dma_unmap_tx(priv, skb, dma_addr); + netdev->stats.tx_dropped++; + return NETDEV_TX_BUSY; + } + + ring->skbs[tx_head] = skb; + ring->skbs_dma[tx_head] = dma_addr; + + control &= OWL_EMAC_BIT_TDES1_IC | OWL_EMAC_BIT_TDES1_TER; /* Maintain bits */ + control |= OWL_EMAC_BIT_TDES1_FS | OWL_EMAC_BIT_TDES1_LS; + control |= OWL_EMAC_MSK_TDES1_TBS1 & skb->len; + + WRITE_ONCE(desc->control, control); + WRITE_ONCE(desc->buf_addr, dma_addr); + dma_wmb(); /* Flush descriptor before changing ownership. */ + WRITE_ONCE(desc->status, OWL_EMAC_BIT_TDES0_OWN); + + owl_emac_dma_cmd_resume_tx(priv); + owl_emac_ring_push_head(ring); + + /* FIXME: The transmission is currently restricted to a single frame + * at a time as a workaround for a MAC hardware bug that causes random + * freeze of the TX queue processor. + */ + netif_stop_queue(netdev); + + spin_unlock_bh(&priv->lock); + + return NETDEV_TX_OK; +} + +static bool owl_emac_tx_complete_tail(struct owl_emac_priv *priv) +{ + struct owl_emac_ring *ring = &priv->tx_ring; + struct net_device *netdev = priv->netdev; + struct owl_emac_ring_desc *desc; + struct sk_buff *skb; + unsigned int tx_tail; + u32 status; + + tx_tail = ring->tail; + desc = &ring->descs[tx_tail]; + + status = READ_ONCE(desc->status); + dma_rmb(); /* Ensure data has been read before used. */ + + if (status & OWL_EMAC_BIT_TDES0_OWN) + return false; + + /* Check for errors. */ + if (status & OWL_EMAC_BIT_TDES0_ES) { + dev_dbg_ratelimited(&netdev->dev, + "TX complete error status: 0x%08x\n", + status); + + netdev->stats.tx_errors++; + + if (status & OWL_EMAC_BIT_TDES0_UF) + netdev->stats.tx_fifo_errors++; + + if (status & OWL_EMAC_BIT_TDES0_EC) + netdev->stats.tx_aborted_errors++; + + if (status & OWL_EMAC_BIT_TDES0_LC) + netdev->stats.tx_window_errors++; + + if (status & OWL_EMAC_BIT_TDES0_NC) + netdev->stats.tx_heartbeat_errors++; + + if (status & OWL_EMAC_BIT_TDES0_LO) + netdev->stats.tx_carrier_errors++; + } else { + netdev->stats.tx_packets++; + netdev->stats.tx_bytes += ring->skbs[tx_tail]->len; + } + + /* Some collisions occurred, but pkt has been transmitted. */ + if (status & OWL_EMAC_BIT_TDES0_DE) + netdev->stats.collisions++; + + skb = ring->skbs[tx_tail]; + owl_emac_dma_unmap_tx(priv, skb, ring->skbs_dma[tx_tail]); + dev_kfree_skb(skb); + + ring->skbs[tx_tail] = NULL; + ring->skbs_dma[tx_tail] = 0; + + owl_emac_ring_pop_tail(ring); + + if (unlikely(netif_queue_stopped(netdev))) + netif_wake_queue(netdev); + + return true; +} + +static void owl_emac_tx_complete(struct owl_emac_priv *priv) +{ + struct owl_emac_ring *ring = &priv->tx_ring; + struct net_device *netdev = priv->netdev; + unsigned int tx_next; + u32 status; + + spin_lock(&priv->lock); + + while (ring->tail != ring->head) { + if (!owl_emac_tx_complete_tail(priv)) + break; + } + + /* FIXME: This is a workaround for a MAC hardware bug not clearing + * (sometimes) the OWN bit for a transmitted frame descriptor. + * + * At this point, when TX queue is full, the tail descriptor has the + * OWN bit set, which normally means the frame has not been processed + * or transmitted yet. But if there is at least one descriptor in the + * queue having the OWN bit cleared, we can safely assume the tail + * frame has been also processed by the MAC hardware. + * + * If that's the case, let's force the frame completion by manually + * clearing the OWN bit. + */ + if (unlikely(!owl_emac_ring_num_unused(ring))) { + tx_next = ring->tail; + + while ((tx_next = owl_emac_ring_get_next(ring, tx_next)) != ring->head) { + status = READ_ONCE(ring->descs[tx_next].status); + dma_rmb(); /* Ensure data has been read before used. */ + + if (status & OWL_EMAC_BIT_TDES0_OWN) + continue; + + netdev_dbg(netdev, "Found uncleared TX desc OWN bit\n"); + + status = READ_ONCE(ring->descs[ring->tail].status); + dma_rmb(); /* Ensure data has been read before used. */ + status &= ~OWL_EMAC_BIT_TDES0_OWN; + WRITE_ONCE(ring->descs[ring->tail].status, status); + + owl_emac_tx_complete_tail(priv); + break; + } + } + + spin_unlock(&priv->lock); +} + +static int owl_emac_rx_process(struct owl_emac_priv *priv, int budget) +{ + struct owl_emac_ring *ring = &priv->rx_ring; + struct device *dev = owl_emac_get_dev(priv); + struct net_device *netdev = priv->netdev; + struct owl_emac_ring_desc *desc; + struct sk_buff *curr_skb, *new_skb; + dma_addr_t curr_dma, new_dma; + unsigned int rx_tail, len; + u32 status; + int recv = 0; + + while (recv < budget) { + spin_lock(&priv->lock); + + rx_tail = ring->tail; + desc = &ring->descs[rx_tail]; + + status = READ_ONCE(desc->status); + dma_rmb(); /* Ensure data has been read before used. */ + + if (status & OWL_EMAC_BIT_RDES0_OWN) { + spin_unlock(&priv->lock); + break; + } + + curr_skb = ring->skbs[rx_tail]; + curr_dma = ring->skbs_dma[rx_tail]; + owl_emac_ring_pop_tail(ring); + + spin_unlock(&priv->lock); + + if (status & (OWL_EMAC_BIT_RDES0_DE | OWL_EMAC_BIT_RDES0_RF | + OWL_EMAC_BIT_RDES0_TL | OWL_EMAC_BIT_RDES0_CS | + OWL_EMAC_BIT_RDES0_DB | OWL_EMAC_BIT_RDES0_CE | + OWL_EMAC_BIT_RDES0_ZERO)) { + dev_dbg_ratelimited(&netdev->dev, + "RX desc error status: 0x%08x\n", + status); + + if (status & OWL_EMAC_BIT_RDES0_DE) + netdev->stats.rx_over_errors++; + + if (status & (OWL_EMAC_BIT_RDES0_RF | OWL_EMAC_BIT_RDES0_DB)) + netdev->stats.rx_frame_errors++; + + if (status & OWL_EMAC_BIT_RDES0_TL) + netdev->stats.rx_length_errors++; + + if (status & OWL_EMAC_BIT_RDES0_CS) + netdev->stats.collisions++; + + if (status & OWL_EMAC_BIT_RDES0_CE) + netdev->stats.rx_crc_errors++; + + if (status & OWL_EMAC_BIT_RDES0_ZERO) + netdev->stats.rx_fifo_errors++; + + goto drop_skb; + } + + len = (status & OWL_EMAC_MSK_RDES0_FL) >> OWL_EMAC_OFF_RDES0_FL; + if (unlikely(len > OWL_EMAC_RX_FRAME_MAX_LEN)) { + netdev->stats.rx_length_errors++; + netdev_err(netdev, "invalid RX frame len: %u\n", len); + goto drop_skb; + } + + /* Prepare new skb before receiving the current one. */ + new_skb = owl_emac_alloc_skb(netdev); + if (unlikely(!new_skb)) + goto drop_skb; + + new_dma = owl_emac_dma_map_rx(priv, new_skb); + if (dma_mapping_error(dev, new_dma)) { + dev_kfree_skb(new_skb); + netdev_err(netdev, "RX DMA mapping failed\n"); + goto drop_skb; + } + + owl_emac_dma_unmap_rx(priv, curr_skb, curr_dma); + + skb_put(curr_skb, len - ETH_FCS_LEN); + curr_skb->ip_summed = CHECKSUM_NONE; + curr_skb->protocol = eth_type_trans(curr_skb, netdev); + curr_skb->dev = netdev; + + netif_receive_skb(curr_skb); + + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += len; + recv++; + goto push_skb; + +drop_skb: + netdev->stats.rx_dropped++; + netdev->stats.rx_errors++; + /* Reuse the current skb. */ + new_skb = curr_skb; + new_dma = curr_dma; + +push_skb: + spin_lock(&priv->lock); + + ring->skbs[ring->head] = new_skb; + ring->skbs_dma[ring->head] = new_dma; + + WRITE_ONCE(desc->buf_addr, new_dma); + dma_wmb(); /* Flush descriptor before changing ownership. */ + WRITE_ONCE(desc->status, OWL_EMAC_BIT_RDES0_OWN); + + owl_emac_ring_push_head(ring); + + spin_unlock(&priv->lock); + } + + return recv; +} + +static int owl_emac_poll(struct napi_struct *napi, int budget) +{ + int work_done = 0, ru_cnt = 0, recv; + static int tx_err_cnt, rx_err_cnt; + struct owl_emac_priv *priv; + u32 status, proc_status; + + priv = container_of(napi, struct owl_emac_priv, napi); + + while ((status = owl_emac_irq_clear(priv)) & + (OWL_EMAC_BIT_MAC_CSR5_NIS | OWL_EMAC_BIT_MAC_CSR5_AIS)) { + recv = 0; + + /* TX setup frame raises ETI instead of TI. */ + if (status & (OWL_EMAC_BIT_MAC_CSR5_TI | OWL_EMAC_BIT_MAC_CSR5_ETI)) { + owl_emac_tx_complete(priv); + tx_err_cnt = 0; + + /* Count MAC internal RX errors. */ + proc_status = status & OWL_EMAC_MSK_MAC_CSR5_RS; + proc_status >>= OWL_EMAC_OFF_MAC_CSR5_RS; + if (proc_status == OWL_EMAC_VAL_MAC_CSR5_RS_DATA || + proc_status == OWL_EMAC_VAL_MAC_CSR5_RS_CDES || + proc_status == OWL_EMAC_VAL_MAC_CSR5_RS_FDES) + rx_err_cnt++; + } + + if (status & OWL_EMAC_BIT_MAC_CSR5_RI) { + recv = owl_emac_rx_process(priv, budget - work_done); + rx_err_cnt = 0; + + /* Count MAC internal TX errors. */ + proc_status = status & OWL_EMAC_MSK_MAC_CSR5_TS; + proc_status >>= OWL_EMAC_OFF_MAC_CSR5_TS; + if (proc_status == OWL_EMAC_VAL_MAC_CSR5_TS_DATA || + proc_status == OWL_EMAC_VAL_MAC_CSR5_TS_CDES) + tx_err_cnt++; + } else if (status & OWL_EMAC_BIT_MAC_CSR5_RU) { + /* MAC AHB is in suspended state, will return to RX + * descriptor processing when the host changes ownership + * of the descriptor and either an RX poll demand CMD is + * issued or a new frame is recognized by the MAC AHB. + */ + if (++ru_cnt == 2) + owl_emac_dma_cmd_resume_rx(priv); + + recv = owl_emac_rx_process(priv, budget - work_done); + + /* Guard against too many RU interrupts. */ + if (ru_cnt > 3) + break; + } + + work_done += recv; + if (work_done >= budget) + break; + } + + if (work_done < budget) { + napi_complete_done(napi, work_done); + owl_emac_irq_enable(priv); + } + + /* Reset MAC when getting too many internal TX or RX errors. */ + if (tx_err_cnt > 10 || rx_err_cnt > 10) { + netdev_dbg(priv->netdev, "%s error status: 0x%08x\n", + tx_err_cnt > 10 ? "TX" : "RX", status); + rx_err_cnt = 0; + tx_err_cnt = 0; + schedule_work(&priv->mac_reset_task); + } + + return work_done; +} + +static void owl_emac_mdio_clock_enable(struct owl_emac_priv *priv) +{ + u32 val; + + /* Enable MDC clock generation by adjusting CLKDIV according to + * the vendor implementation of the original driver. + */ + val = owl_emac_reg_read(priv, OWL_EMAC_REG_MAC_CSR10); + val &= OWL_EMAC_MSK_MAC_CSR10_CLKDIV; + val |= OWL_EMAC_VAL_MAC_CSR10_CLKDIV_128 << OWL_EMAC_OFF_MAC_CSR10_CLKDIV; + + val |= OWL_EMAC_BIT_MAC_CSR10_SB; + val |= OWL_EMAC_VAL_MAC_CSR10_OPCODE_CDS << OWL_EMAC_OFF_MAC_CSR10_OPCODE; + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR10, val); +} + +static void owl_emac_core_hw_reset(struct owl_emac_priv *priv) +{ + /* Trigger hardware reset. */ + reset_control_assert(priv->reset); + usleep_range(10, 20); + reset_control_deassert(priv->reset); + usleep_range(100, 200); +} + +static int owl_emac_core_sw_reset(struct owl_emac_priv *priv) +{ + u32 val; + int ret; + + /* Trigger software reset. */ + owl_emac_reg_set(priv, OWL_EMAC_REG_MAC_CSR0, OWL_EMAC_BIT_MAC_CSR0_SWR); + ret = readl_poll_timeout(priv->base + OWL_EMAC_REG_MAC_CSR0, + val, !(val & OWL_EMAC_BIT_MAC_CSR0_SWR), + OWL_EMAC_POLL_DELAY_USEC, + OWL_EMAC_RESET_POLL_TIMEOUT_USEC); + if (ret) + return ret; + + if (priv->phy_mode == PHY_INTERFACE_MODE_RMII) { + /* Enable RMII and use the 50MHz rmii clk as output to PHY. */ + val = 0; + } else { + /* Enable SMII and use the 125MHz rmii clk as output to PHY. + * Additionally set SMII SYNC delay to 4 half cycle. + */ + val = 0x04 << OWL_EMAC_OFF_MAC_CTRL_SSDC; + val |= OWL_EMAC_BIT_MAC_CTRL_RSIS; + } + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CTRL, val); + + /* MDC is disabled after reset. */ + owl_emac_mdio_clock_enable(priv); + + /* Set FIFO pause & restart threshold levels. */ + val = 0x40 << OWL_EMAC_OFF_MAC_CSR19_FPTL; + val |= 0x10 << OWL_EMAC_OFF_MAC_CSR19_FRTL; + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR19, val); + + /* Set flow control pause quanta time to ~100 ms. */ + val = 0x4FFF << OWL_EMAC_OFF_MAC_CSR18_PQT; + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR18, val); + + /* Setup interrupt mitigation. */ + val = 7 << OWL_EMAC_OFF_MAC_CSR11_NRP; + val |= 4 << OWL_EMAC_OFF_MAC_CSR11_RT; + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR11, val); + + /* Set RX/TX rings base addresses. */ + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR3, + (u32)(priv->rx_ring.descs_dma)); + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR4, + (u32)(priv->tx_ring.descs_dma)); + + /* Setup initial operation mode. */ + val = OWL_EMAC_VAL_MAC_CSR6_SPEED_100M << OWL_EMAC_OFF_MAC_CSR6_SPEED; + val |= OWL_EMAC_BIT_MAC_CSR6_FD; + owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6, + OWL_EMAC_MSK_MAC_CSR6_SPEED | + OWL_EMAC_BIT_MAC_CSR6_FD, val); + owl_emac_reg_clear(priv, OWL_EMAC_REG_MAC_CSR6, + OWL_EMAC_BIT_MAC_CSR6_PR | OWL_EMAC_BIT_MAC_CSR6_PM); + + priv->link = 0; + priv->speed = SPEED_UNKNOWN; + priv->duplex = DUPLEX_UNKNOWN; + priv->pause = 0; + priv->mcaddr_list.count = 0; + + return 0; +} + +static int owl_emac_enable(struct net_device *netdev, bool start_phy) +{ + struct owl_emac_priv *priv = netdev_priv(netdev); + int ret; + + owl_emac_dma_cmd_stop(priv); + owl_emac_irq_disable(priv); + owl_emac_irq_clear(priv); + + owl_emac_ring_prepare_tx(priv); + ret = owl_emac_ring_prepare_rx(priv); + if (ret) + goto err_unprep; + + ret = owl_emac_core_sw_reset(priv); + if (ret) { + netdev_err(netdev, "failed to soft reset MAC core: %d\n", ret); + goto err_unprep; + } + + owl_emac_set_hw_mac_addr(netdev); + owl_emac_setup_frame_xmit(priv); + + netdev_reset_queue(netdev); + napi_enable(&priv->napi); + + owl_emac_irq_enable(priv); + owl_emac_dma_cmd_start(priv); + + if (start_phy) + phy_start(netdev->phydev); + + netif_start_queue(netdev); + + return 0; + +err_unprep: + owl_emac_ring_unprepare_rx(priv); + owl_emac_ring_unprepare_tx(priv); + + return ret; +} + +static void owl_emac_disable(struct net_device *netdev, bool stop_phy) +{ + struct owl_emac_priv *priv = netdev_priv(netdev); + + owl_emac_dma_cmd_stop(priv); + owl_emac_irq_disable(priv); + + netif_stop_queue(netdev); + napi_disable(&priv->napi); + + if (stop_phy) + phy_stop(netdev->phydev); + + owl_emac_ring_unprepare_rx(priv); + owl_emac_ring_unprepare_tx(priv); +} + +static int owl_emac_ndo_open(struct net_device *netdev) +{ + return owl_emac_enable(netdev, true); +} + +static int owl_emac_ndo_stop(struct net_device *netdev) +{ + owl_emac_disable(netdev, true); + + return 0; +} + +static void owl_emac_set_multicast(struct net_device *netdev, int count) +{ + struct owl_emac_priv *priv = netdev_priv(netdev); + struct netdev_hw_addr *ha; + int index = 0; + + if (count <= 0) { + priv->mcaddr_list.count = 0; + return; + } + + netdev_for_each_mc_addr(ha, netdev) { + if (!is_multicast_ether_addr(ha->addr)) + continue; + + WARN_ON(index >= OWL_EMAC_MAX_MULTICAST_ADDRS); + ether_addr_copy(priv->mcaddr_list.addrs[index++], ha->addr); + } + + priv->mcaddr_list.count = index; + + owl_emac_setup_frame_xmit(priv); +} + +static void owl_emac_ndo_set_rx_mode(struct net_device *netdev) +{ + struct owl_emac_priv *priv = netdev_priv(netdev); + u32 status, val = 0; + int mcast_count = 0; + + if (netdev->flags & IFF_PROMISC) { + val = OWL_EMAC_BIT_MAC_CSR6_PR; + } else if (netdev->flags & IFF_ALLMULTI) { + val = OWL_EMAC_BIT_MAC_CSR6_PM; + } else if (netdev->flags & IFF_MULTICAST) { + mcast_count = netdev_mc_count(netdev); + + if (mcast_count > OWL_EMAC_MAX_MULTICAST_ADDRS) { + val = OWL_EMAC_BIT_MAC_CSR6_PM; + mcast_count = 0; + } + } + + spin_lock_bh(&priv->lock); + + /* Temporarily stop DMA TX & RX. */ + status = owl_emac_dma_cmd_stop(priv); + + /* Update operation modes. */ + owl_emac_reg_update(priv, OWL_EMAC_REG_MAC_CSR6, + OWL_EMAC_BIT_MAC_CSR6_PR | OWL_EMAC_BIT_MAC_CSR6_PM, + val); + + /* Restore DMA TX & RX status. */ + owl_emac_dma_cmd_set(priv, status); + + spin_unlock_bh(&priv->lock); + + /* Set/reset multicast addr list. */ + owl_emac_set_multicast(netdev, mcast_count); +} + +static int owl_emac_ndo_set_mac_addr(struct net_device *netdev, void *addr) +{ + struct sockaddr *skaddr = addr; + + if (!is_valid_ether_addr(skaddr->sa_data)) + return -EADDRNOTAVAIL; + + if (netif_running(netdev)) + return -EBUSY; + + memcpy(netdev->dev_addr, skaddr->sa_data, netdev->addr_len); + owl_emac_set_hw_mac_addr(netdev); + + return owl_emac_setup_frame_xmit(netdev_priv(netdev)); +} + +static int owl_emac_ndo_do_ioctl(struct net_device *netdev, + struct ifreq *req, int cmd) +{ + if (!netif_running(netdev)) + return -EINVAL; + + return phy_mii_ioctl(netdev->phydev, req, cmd); +} + +static void owl_emac_ndo_tx_timeout(struct net_device *netdev, + unsigned int txqueue) +{ + struct owl_emac_priv *priv = netdev_priv(netdev); + + schedule_work(&priv->mac_reset_task); +} + +static void owl_emac_reset_task(struct work_struct *work) +{ + struct owl_emac_priv *priv; + + priv = container_of(work, struct owl_emac_priv, mac_reset_task); + + netdev_dbg(priv->netdev, "resetting MAC\n"); + owl_emac_disable(priv->netdev, false); + owl_emac_enable(priv->netdev, false); +} + +static struct net_device_stats * +owl_emac_ndo_get_stats(struct net_device *netdev) +{ + /* FIXME: If possible, try to get stats from MAC hardware registers + * instead of tracking them manually in the driver. + */ + + return &netdev->stats; +} + +static const struct net_device_ops owl_emac_netdev_ops = { + .ndo_open = owl_emac_ndo_open, + .ndo_stop = owl_emac_ndo_stop, + .ndo_start_xmit = owl_emac_ndo_start_xmit, + .ndo_set_rx_mode = owl_emac_ndo_set_rx_mode, + .ndo_set_mac_address = owl_emac_ndo_set_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_do_ioctl = owl_emac_ndo_do_ioctl, + .ndo_tx_timeout = owl_emac_ndo_tx_timeout, + .ndo_get_stats = owl_emac_ndo_get_stats, +}; + +static void owl_emac_ethtool_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strscpy(info->driver, OWL_EMAC_DRVNAME, sizeof(info->driver)); +} + +static u32 owl_emac_ethtool_get_msglevel(struct net_device *netdev) +{ + struct owl_emac_priv *priv = netdev_priv(netdev); + + return priv->msg_enable; +} + +static void owl_emac_ethtool_set_msglevel(struct net_device *ndev, u32 val) +{ + struct owl_emac_priv *priv = netdev_priv(ndev); + + priv->msg_enable = val; +} + +static const struct ethtool_ops owl_emac_ethtool_ops = { + .get_drvinfo = owl_emac_ethtool_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_msglevel = owl_emac_ethtool_get_msglevel, + .set_msglevel = owl_emac_ethtool_set_msglevel, +}; + +static int owl_emac_mdio_wait(struct owl_emac_priv *priv) +{ + u32 val; + + /* Wait while data transfer is in progress. */ + return readl_poll_timeout(priv->base + OWL_EMAC_REG_MAC_CSR10, + val, !(val & OWL_EMAC_BIT_MAC_CSR10_SB), + OWL_EMAC_POLL_DELAY_USEC, + OWL_EMAC_MDIO_POLL_TIMEOUT_USEC); +} + +static int owl_emac_mdio_read(struct mii_bus *bus, int addr, int regnum) +{ + struct owl_emac_priv *priv = bus->priv; + u32 data, tmp; + int ret; + + if (regnum & MII_ADDR_C45) + return -EOPNOTSUPP; + + data = OWL_EMAC_BIT_MAC_CSR10_SB; + data |= OWL_EMAC_VAL_MAC_CSR10_OPCODE_RD << OWL_EMAC_OFF_MAC_CSR10_OPCODE; + + tmp = addr << OWL_EMAC_OFF_MAC_CSR10_PHYADD; + data |= tmp & OWL_EMAC_MSK_MAC_CSR10_PHYADD; + + tmp = regnum << OWL_EMAC_OFF_MAC_CSR10_REGADD; + data |= tmp & OWL_EMAC_MSK_MAC_CSR10_REGADD; + + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR10, data); + + ret = owl_emac_mdio_wait(priv); + if (ret) + return ret; + + data = owl_emac_reg_read(priv, OWL_EMAC_REG_MAC_CSR10); + data &= OWL_EMAC_MSK_MAC_CSR10_DATA; + + return data; +} + +static int +owl_emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val) +{ + struct owl_emac_priv *priv = bus->priv; + u32 data, tmp; + + if (regnum & MII_ADDR_C45) + return -EOPNOTSUPP; + + data = OWL_EMAC_BIT_MAC_CSR10_SB; + data |= OWL_EMAC_VAL_MAC_CSR10_OPCODE_WR << OWL_EMAC_OFF_MAC_CSR10_OPCODE; + + tmp = addr << OWL_EMAC_OFF_MAC_CSR10_PHYADD; + data |= tmp & OWL_EMAC_MSK_MAC_CSR10_PHYADD; + + tmp = regnum << OWL_EMAC_OFF_MAC_CSR10_REGADD; + data |= tmp & OWL_EMAC_MSK_MAC_CSR10_REGADD; + + data |= val & OWL_EMAC_MSK_MAC_CSR10_DATA; + + owl_emac_reg_write(priv, OWL_EMAC_REG_MAC_CSR10, data); + + return owl_emac_mdio_wait(priv); +} + +static int owl_emac_mdio_init(struct net_device *netdev) +{ + struct owl_emac_priv *priv = netdev_priv(netdev); + struct device *dev = owl_emac_get_dev(priv); + struct device_node *mdio_node; + int ret; + + mdio_node = of_get_child_by_name(dev->of_node, "mdio"); + if (!mdio_node) + return -ENODEV; + + if (!of_device_is_available(mdio_node)) { + ret = -ENODEV; + goto err_put_node; + } + + priv->mii = devm_mdiobus_alloc(dev); + if (!priv->mii) { + ret = -ENOMEM; + goto err_put_node; + } + + snprintf(priv->mii->id, MII_BUS_ID_SIZE, "%s", dev_name(dev)); + priv->mii->name = "owl-emac-mdio"; + priv->mii->parent = dev; + priv->mii->read = owl_emac_mdio_read; + priv->mii->write = owl_emac_mdio_write; + priv->mii->phy_mask = ~0; /* Mask out all PHYs from auto probing. */ + priv->mii->priv = priv; + + ret = devm_of_mdiobus_register(dev, priv->mii, mdio_node); + +err_put_node: + of_node_put(mdio_node); + return ret; +} + +static int owl_emac_phy_init(struct net_device *netdev) +{ + struct owl_emac_priv *priv = netdev_priv(netdev); + struct device *dev = owl_emac_get_dev(priv); + struct phy_device *phy; + + phy = of_phy_get_and_connect(netdev, dev->of_node, + owl_emac_adjust_link); + if (!phy) + return -ENODEV; + + phy_set_sym_pause(phy, true, true, true); + + if (netif_msg_link(priv)) + phy_attached_info(phy); + + return 0; +} + +static void owl_emac_get_mac_addr(struct net_device *netdev) +{ + struct device *dev = netdev->dev.parent; + int ret; + + ret = eth_platform_get_mac_address(dev, netdev->dev_addr); + if (!ret && is_valid_ether_addr(netdev->dev_addr)) + return; + + eth_hw_addr_random(netdev); + dev_warn(dev, "using random MAC address %pM\n", netdev->dev_addr); +} + +static __maybe_unused int owl_emac_suspend(struct device *dev) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct owl_emac_priv *priv = netdev_priv(netdev); + + disable_irq(netdev->irq); + + if (netif_running(netdev)) { + owl_emac_disable(netdev, true); + netif_device_detach(netdev); + } + + clk_bulk_disable_unprepare(OWL_EMAC_NCLKS, priv->clks); + + return 0; +} + +static __maybe_unused int owl_emac_resume(struct device *dev) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct owl_emac_priv *priv = netdev_priv(netdev); + int ret; + + ret = clk_bulk_prepare_enable(OWL_EMAC_NCLKS, priv->clks); + if (ret) + return ret; + + if (netif_running(netdev)) { + owl_emac_core_hw_reset(priv); + owl_emac_core_sw_reset(priv); + + ret = owl_emac_enable(netdev, true); + if (ret) { + clk_bulk_disable_unprepare(OWL_EMAC_NCLKS, priv->clks); + return ret; + } + + netif_device_attach(netdev); + } + + enable_irq(netdev->irq); + + return 0; +} + +static void owl_emac_clk_disable_unprepare(void *data) +{ + struct owl_emac_priv *priv = data; + + clk_bulk_disable_unprepare(OWL_EMAC_NCLKS, priv->clks); +} + +static int owl_emac_clk_set_rate(struct owl_emac_priv *priv) +{ + struct device *dev = owl_emac_get_dev(priv); + unsigned long rate; + int ret; + + switch (priv->phy_mode) { + case PHY_INTERFACE_MODE_RMII: + rate = 50000000; + break; + + case PHY_INTERFACE_MODE_SMII: + rate = 125000000; + break; + + default: + dev_err(dev, "unsupported phy interface mode %d\n", + priv->phy_mode); + return -EOPNOTSUPP; + } + + ret = clk_set_rate(priv->clks[OWL_EMAC_CLK_RMII].clk, rate); + if (ret) + dev_err(dev, "failed to set RMII clock rate: %d\n", ret); + + return ret; +} + +static int owl_emac_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct net_device *netdev; + struct owl_emac_priv *priv; + int ret, i; + + netdev = devm_alloc_etherdev(dev, sizeof(*priv)); + if (!netdev) + return -ENOMEM; + + platform_set_drvdata(pdev, netdev); + SET_NETDEV_DEV(netdev, dev); + + priv = netdev_priv(netdev); + priv->netdev = netdev; + priv->msg_enable = netif_msg_init(-1, OWL_EMAC_DEFAULT_MSG_ENABLE); + + ret = of_get_phy_mode(dev->of_node, &priv->phy_mode); + if (ret) { + dev_err(dev, "failed to get phy mode: %d\n", ret); + return ret; + } + + spin_lock_init(&priv->lock); + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(dev, "unsupported DMA mask\n"); + return ret; + } + + ret = owl_emac_ring_alloc(dev, &priv->rx_ring, OWL_EMAC_RX_RING_SIZE); + if (ret) + return ret; + + ret = owl_emac_ring_alloc(dev, &priv->tx_ring, OWL_EMAC_TX_RING_SIZE); + if (ret) + return ret; + + priv->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(priv->base)) + return PTR_ERR(priv->base); + + netdev->irq = platform_get_irq(pdev, 0); + if (netdev->irq < 0) + return netdev->irq; + + ret = devm_request_irq(dev, netdev->irq, owl_emac_handle_irq, + IRQF_SHARED, netdev->name, netdev); + if (ret) { + dev_err(dev, "failed to request irq: %d\n", netdev->irq); + return ret; + } + + for (i = 0; i < OWL_EMAC_NCLKS; i++) + priv->clks[i].id = owl_emac_clk_names[i]; + + ret = devm_clk_bulk_get(dev, OWL_EMAC_NCLKS, priv->clks); + if (ret) + return ret; + + ret = clk_bulk_prepare_enable(OWL_EMAC_NCLKS, priv->clks); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, owl_emac_clk_disable_unprepare, priv); + if (ret) + return ret; + + ret = owl_emac_clk_set_rate(priv); + if (ret) + return ret; + + priv->reset = devm_reset_control_get_exclusive(dev, NULL); + if (IS_ERR(priv->reset)) + return dev_err_probe(dev, PTR_ERR(priv->reset), + "failed to get reset control"); + + owl_emac_get_mac_addr(netdev); + + owl_emac_core_hw_reset(priv); + owl_emac_mdio_clock_enable(priv); + + ret = owl_emac_mdio_init(netdev); + if (ret) { + dev_err(dev, "failed to initialize MDIO bus\n"); + return ret; + } + + ret = owl_emac_phy_init(netdev); + if (ret) { + dev_err(dev, "failed to initialize PHY\n"); + return ret; + } + + INIT_WORK(&priv->mac_reset_task, owl_emac_reset_task); + + netdev->min_mtu = OWL_EMAC_MTU_MIN; + netdev->max_mtu = OWL_EMAC_MTU_MAX; + netdev->watchdog_timeo = OWL_EMAC_TX_TIMEOUT; + netdev->netdev_ops = &owl_emac_netdev_ops; + netdev->ethtool_ops = &owl_emac_ethtool_ops; + netif_napi_add(netdev, &priv->napi, owl_emac_poll, NAPI_POLL_WEIGHT); + + ret = devm_register_netdev(dev, netdev); + if (ret) { + netif_napi_del(&priv->napi); + phy_disconnect(netdev->phydev); + return ret; + } + + return 0; +} + +static int owl_emac_remove(struct platform_device *pdev) +{ + struct owl_emac_priv *priv = platform_get_drvdata(pdev); + + netif_napi_del(&priv->napi); + phy_disconnect(priv->netdev->phydev); + cancel_work_sync(&priv->mac_reset_task); + + return 0; +} + +static const struct of_device_id owl_emac_of_match[] = { + { .compatible = "actions,owl-emac", }, + { } +}; +MODULE_DEVICE_TABLE(of, owl_emac_of_match); + +static SIMPLE_DEV_PM_OPS(owl_emac_pm_ops, + owl_emac_suspend, owl_emac_resume); + +static struct platform_driver owl_emac_driver = { + .driver = { + .name = OWL_EMAC_DRVNAME, + .of_match_table = owl_emac_of_match, + .pm = &owl_emac_pm_ops, + }, + .probe = owl_emac_probe, + .remove = owl_emac_remove, +}; +module_platform_driver(owl_emac_driver); + +MODULE_DESCRIPTION("Actions Semi Owl SoCs Ethernet MAC Driver"); +MODULE_AUTHOR("Actions Semi Inc."); +MODULE_AUTHOR("Cristian Ciocaltea <cristian.ciocaltea@gmail.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/actions/owl-emac.h b/drivers/net/ethernet/actions/owl-emac.h new file mode 100644 index 000000000000..9eb0d1a30242 --- /dev/null +++ b/drivers/net/ethernet/actions/owl-emac.h @@ -0,0 +1,280 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Actions Semi Owl SoCs Ethernet MAC driver + * + * Copyright (c) 2012 Actions Semi Inc. + * Copyright (c) 2021 Cristian Ciocaltea <cristian.ciocaltea@gmail.com> + */ + +#ifndef __OWL_EMAC_H__ +#define __OWL_EMAC_H__ + +#define OWL_EMAC_DRVNAME "owl-emac" + +#define OWL_EMAC_POLL_DELAY_USEC 5 +#define OWL_EMAC_MDIO_POLL_TIMEOUT_USEC 1000 +#define OWL_EMAC_RESET_POLL_TIMEOUT_USEC 2000 +#define OWL_EMAC_TX_TIMEOUT (2 * HZ) + +#define OWL_EMAC_MTU_MIN ETH_MIN_MTU +#define OWL_EMAC_MTU_MAX ETH_DATA_LEN +#define OWL_EMAC_RX_FRAME_MAX_LEN (ETH_FRAME_LEN + ETH_FCS_LEN) +#define OWL_EMAC_SKB_ALIGN 4 +#define OWL_EMAC_SKB_RESERVE 18 + +#define OWL_EMAC_MAX_MULTICAST_ADDRS 14 +#define OWL_EMAC_SETUP_FRAME_LEN 192 + +#define OWL_EMAC_RX_RING_SIZE 64 +#define OWL_EMAC_TX_RING_SIZE 32 + +/* Bus mode register */ +#define OWL_EMAC_REG_MAC_CSR0 0x0000 +#define OWL_EMAC_BIT_MAC_CSR0_SWR BIT(0) /* Software reset */ + +/* Transmit/receive poll demand registers */ +#define OWL_EMAC_REG_MAC_CSR1 0x0008 +#define OWL_EMAC_VAL_MAC_CSR1_TPD 0x01 +#define OWL_EMAC_REG_MAC_CSR2 0x0010 +#define OWL_EMAC_VAL_MAC_CSR2_RPD 0x01 + +/* Receive/transmit descriptor list base address registers */ +#define OWL_EMAC_REG_MAC_CSR3 0x0018 +#define OWL_EMAC_REG_MAC_CSR4 0x0020 + +/* Status register */ +#define OWL_EMAC_REG_MAC_CSR5 0x0028 +#define OWL_EMAC_MSK_MAC_CSR5_TS GENMASK(22, 20) /* Transmit process state */ +#define OWL_EMAC_OFF_MAC_CSR5_TS 20 +#define OWL_EMAC_VAL_MAC_CSR5_TS_DATA 0x03 /* Transferring data HOST -> FIFO */ +#define OWL_EMAC_VAL_MAC_CSR5_TS_CDES 0x07 /* Closing transmit descriptor */ +#define OWL_EMAC_MSK_MAC_CSR5_RS GENMASK(19, 17) /* Receive process state */ +#define OWL_EMAC_OFF_MAC_CSR5_RS 17 +#define OWL_EMAC_VAL_MAC_CSR5_RS_FDES 0x01 /* Fetching receive descriptor */ +#define OWL_EMAC_VAL_MAC_CSR5_RS_CDES 0x05 /* Closing receive descriptor */ +#define OWL_EMAC_VAL_MAC_CSR5_RS_DATA 0x07 /* Transferring data FIFO -> HOST */ +#define OWL_EMAC_BIT_MAC_CSR5_NIS BIT(16) /* Normal interrupt summary */ +#define OWL_EMAC_BIT_MAC_CSR5_AIS BIT(15) /* Abnormal interrupt summary */ +#define OWL_EMAC_BIT_MAC_CSR5_ERI BIT(14) /* Early receive interrupt */ +#define OWL_EMAC_BIT_MAC_CSR5_GTE BIT(11) /* General-purpose timer expiration */ +#define OWL_EMAC_BIT_MAC_CSR5_ETI BIT(10) /* Early transmit interrupt */ +#define OWL_EMAC_BIT_MAC_CSR5_RPS BIT(8) /* Receive process stopped */ +#define OWL_EMAC_BIT_MAC_CSR5_RU BIT(7) /* Receive buffer unavailable */ +#define OWL_EMAC_BIT_MAC_CSR5_RI BIT(6) /* Receive interrupt */ +#define OWL_EMAC_BIT_MAC_CSR5_UNF BIT(5) /* Transmit underflow */ +#define OWL_EMAC_BIT_MAC_CSR5_LCIS BIT(4) /* Link change status */ +#define OWL_EMAC_BIT_MAC_CSR5_LCIQ BIT(3) /* Link change interrupt */ +#define OWL_EMAC_BIT_MAC_CSR5_TU BIT(2) /* Transmit buffer unavailable */ +#define OWL_EMAC_BIT_MAC_CSR5_TPS BIT(1) /* Transmit process stopped */ +#define OWL_EMAC_BIT_MAC_CSR5_TI BIT(0) /* Transmit interrupt */ + +/* Operation mode register */ +#define OWL_EMAC_REG_MAC_CSR6 0x0030 +#define OWL_EMAC_BIT_MAC_CSR6_RA BIT(30) /* Receive all */ +#define OWL_EMAC_BIT_MAC_CSR6_TTM BIT(22) /* Transmit threshold mode */ +#define OWL_EMAC_BIT_MAC_CSR6_SF BIT(21) /* Store and forward */ +#define OWL_EMAC_MSK_MAC_CSR6_SPEED GENMASK(17, 16) /* Eth speed selection */ +#define OWL_EMAC_OFF_MAC_CSR6_SPEED 16 +#define OWL_EMAC_VAL_MAC_CSR6_SPEED_100M 0x00 +#define OWL_EMAC_VAL_MAC_CSR6_SPEED_10M 0x02 +#define OWL_EMAC_BIT_MAC_CSR6_ST BIT(13) /* Start/stop transmit command */ +#define OWL_EMAC_BIT_MAC_CSR6_LP BIT(10) /* Loopback mode */ +#define OWL_EMAC_BIT_MAC_CSR6_FD BIT(9) /* Full duplex mode */ +#define OWL_EMAC_BIT_MAC_CSR6_PM BIT(7) /* Pass all multicast */ +#define OWL_EMAC_BIT_MAC_CSR6_PR BIT(6) /* Promiscuous mode */ +#define OWL_EMAC_BIT_MAC_CSR6_IF BIT(4) /* Inverse filtering */ +#define OWL_EMAC_BIT_MAC_CSR6_PB BIT(3) /* Pass bad frames */ +#define OWL_EMAC_BIT_MAC_CSR6_HO BIT(2) /* Hash only filtering mode */ +#define OWL_EMAC_BIT_MAC_CSR6_SR BIT(1) /* Start/stop receive command */ +#define OWL_EMAC_BIT_MAC_CSR6_HP BIT(0) /* Hash/perfect receive filtering mode */ +#define OWL_EMAC_MSK_MAC_CSR6_STSR (OWL_EMAC_BIT_MAC_CSR6_ST | \ + OWL_EMAC_BIT_MAC_CSR6_SR) + +/* Interrupt enable register */ +#define OWL_EMAC_REG_MAC_CSR7 0x0038 +#define OWL_EMAC_BIT_MAC_CSR7_NIE BIT(16) /* Normal interrupt summary enable */ +#define OWL_EMAC_BIT_MAC_CSR7_AIE BIT(15) /* Abnormal interrupt summary enable */ +#define OWL_EMAC_BIT_MAC_CSR7_ERE BIT(14) /* Early receive interrupt enable */ +#define OWL_EMAC_BIT_MAC_CSR7_GTE BIT(11) /* General-purpose timer overflow */ +#define OWL_EMAC_BIT_MAC_CSR7_ETE BIT(10) /* Early transmit interrupt enable */ +#define OWL_EMAC_BIT_MAC_CSR7_RSE BIT(8) /* Receive stopped enable */ +#define OWL_EMAC_BIT_MAC_CSR7_RUE BIT(7) /* Receive buffer unavailable enable */ +#define OWL_EMAC_BIT_MAC_CSR7_RIE BIT(6) /* Receive interrupt enable */ +#define OWL_EMAC_BIT_MAC_CSR7_UNE BIT(5) /* Underflow interrupt enable */ +#define OWL_EMAC_BIT_MAC_CSR7_TUE BIT(2) /* Transmit buffer unavailable enable */ +#define OWL_EMAC_BIT_MAC_CSR7_TSE BIT(1) /* Transmit stopped enable */ +#define OWL_EMAC_BIT_MAC_CSR7_TIE BIT(0) /* Transmit interrupt enable */ +#define OWL_EMAC_BIT_MAC_CSR7_ALL_NOT_TUE (OWL_EMAC_BIT_MAC_CSR7_ERE | \ + OWL_EMAC_BIT_MAC_CSR7_GTE | \ + OWL_EMAC_BIT_MAC_CSR7_ETE | \ + OWL_EMAC_BIT_MAC_CSR7_RSE | \ + OWL_EMAC_BIT_MAC_CSR7_RUE | \ + OWL_EMAC_BIT_MAC_CSR7_RIE | \ + OWL_EMAC_BIT_MAC_CSR7_UNE | \ + OWL_EMAC_BIT_MAC_CSR7_TSE | \ + OWL_EMAC_BIT_MAC_CSR7_TIE) + +/* Missed frames and overflow counter register */ +#define OWL_EMAC_REG_MAC_CSR8 0x0040 +/* MII management and serial ROM register */ +#define OWL_EMAC_REG_MAC_CSR9 0x0048 + +/* MII serial management register */ +#define OWL_EMAC_REG_MAC_CSR10 0x0050 +#define OWL_EMAC_BIT_MAC_CSR10_SB BIT(31) /* Start transfer or busy */ +#define OWL_EMAC_MSK_MAC_CSR10_CLKDIV GENMASK(30, 28) /* Clock divider */ +#define OWL_EMAC_OFF_MAC_CSR10_CLKDIV 28 +#define OWL_EMAC_VAL_MAC_CSR10_CLKDIV_128 0x04 +#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_WR 0x01 /* Register write command */ +#define OWL_EMAC_OFF_MAC_CSR10_OPCODE 26 /* Operation mode */ +#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_DCG 0x00 /* Disable clock generation */ +#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_WR 0x01 /* Register write command */ +#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_RD 0x02 /* Register read command */ +#define OWL_EMAC_VAL_MAC_CSR10_OPCODE_CDS 0x03 /* Clock divider set */ +#define OWL_EMAC_MSK_MAC_CSR10_PHYADD GENMASK(25, 21) /* Physical layer address */ +#define OWL_EMAC_OFF_MAC_CSR10_PHYADD 21 +#define OWL_EMAC_MSK_MAC_CSR10_REGADD GENMASK(20, 16) /* Register address */ +#define OWL_EMAC_OFF_MAC_CSR10_REGADD 16 +#define OWL_EMAC_MSK_MAC_CSR10_DATA GENMASK(15, 0) /* Register data */ + +/* General-purpose timer and interrupt mitigation control register */ +#define OWL_EMAC_REG_MAC_CSR11 0x0058 +#define OWL_EMAC_OFF_MAC_CSR11_TT 27 /* Transmit timer */ +#define OWL_EMAC_OFF_MAC_CSR11_NTP 24 /* No. of transmit packets */ +#define OWL_EMAC_OFF_MAC_CSR11_RT 20 /* Receive timer */ +#define OWL_EMAC_OFF_MAC_CSR11_NRP 17 /* No. of receive packets */ + +/* MAC address low/high registers */ +#define OWL_EMAC_REG_MAC_CSR16 0x0080 +#define OWL_EMAC_REG_MAC_CSR17 0x0088 + +/* Pause time & cache thresholds register */ +#define OWL_EMAC_REG_MAC_CSR18 0x0090 +#define OWL_EMAC_OFF_MAC_CSR18_CPTL 24 /* Cache pause threshold level */ +#define OWL_EMAC_OFF_MAC_CSR18_CRTL 16 /* Cache restart threshold level */ +#define OWL_EMAC_OFF_MAC_CSR18_PQT 0 /* Flow control pause quanta time */ + +/* FIFO pause & restart threshold register */ +#define OWL_EMAC_REG_MAC_CSR19 0x0098 +#define OWL_EMAC_OFF_MAC_CSR19_FPTL 16 /* FIFO pause threshold level */ +#define OWL_EMAC_OFF_MAC_CSR19_FRTL 0 /* FIFO restart threshold level */ + +/* Flow control setup & status register */ +#define OWL_EMAC_REG_MAC_CSR20 0x00A0 +#define OWL_EMAC_BIT_MAC_CSR20_FCE BIT(31) /* Flow Control Enable */ +#define OWL_EMAC_BIT_MAC_CSR20_TUE BIT(30) /* Transmit Un-pause frames Enable */ +#define OWL_EMAC_BIT_MAC_CSR20_TPE BIT(29) /* Transmit Pause frames Enable */ +#define OWL_EMAC_BIT_MAC_CSR20_RPE BIT(28) /* Receive Pause frames Enable */ +#define OWL_EMAC_BIT_MAC_CSR20_BPE BIT(27) /* Back pressure (half-duplex) Enable */ + +/* MII control register */ +#define OWL_EMAC_REG_MAC_CTRL 0x00B0 +#define OWL_EMAC_BIT_MAC_CTRL_RRSB BIT(8) /* RMII_REFCLK select bit */ +#define OWL_EMAC_OFF_MAC_CTRL_SSDC 4 /* SMII SYNC delay cycle */ +#define OWL_EMAC_BIT_MAC_CTRL_RCPS BIT(1) /* REF_CLK phase select */ +#define OWL_EMAC_BIT_MAC_CTRL_RSIS BIT(0) /* RMII/SMII interface select */ + +/* Receive descriptor status field */ +#define OWL_EMAC_BIT_RDES0_OWN BIT(31) /* Ownership bit */ +#define OWL_EMAC_BIT_RDES0_FF BIT(30) /* Filtering fail */ +#define OWL_EMAC_MSK_RDES0_FL GENMASK(29, 16) /* Frame length */ +#define OWL_EMAC_OFF_RDES0_FL 16 +#define OWL_EMAC_BIT_RDES0_ES BIT(15) /* Error summary */ +#define OWL_EMAC_BIT_RDES0_DE BIT(14) /* Descriptor error */ +#define OWL_EMAC_BIT_RDES0_RF BIT(11) /* Runt frame */ +#define OWL_EMAC_BIT_RDES0_MF BIT(10) /* Multicast frame */ +#define OWL_EMAC_BIT_RDES0_FS BIT(9) /* First descriptor */ +#define OWL_EMAC_BIT_RDES0_LS BIT(8) /* Last descriptor */ +#define OWL_EMAC_BIT_RDES0_TL BIT(7) /* Frame too long */ +#define OWL_EMAC_BIT_RDES0_CS BIT(6) /* Collision seen */ +#define OWL_EMAC_BIT_RDES0_FT BIT(5) /* Frame type */ +#define OWL_EMAC_BIT_RDES0_RE BIT(3) /* Report on MII error */ +#define OWL_EMAC_BIT_RDES0_DB BIT(2) /* Dribbling bit */ +#define OWL_EMAC_BIT_RDES0_CE BIT(1) /* CRC error */ +#define OWL_EMAC_BIT_RDES0_ZERO BIT(0) /* Legal frame length indicator */ + +/* Receive descriptor control and count field */ +#define OWL_EMAC_BIT_RDES1_RER BIT(25) /* Receive end of ring */ +#define OWL_EMAC_MSK_RDES1_RBS1 GENMASK(10, 0) /* Buffer 1 size */ + +/* Transmit descriptor status field */ +#define OWL_EMAC_BIT_TDES0_OWN BIT(31) /* Ownership bit */ +#define OWL_EMAC_BIT_TDES0_ES BIT(15) /* Error summary */ +#define OWL_EMAC_BIT_TDES0_LO BIT(11) /* Loss of carrier */ +#define OWL_EMAC_BIT_TDES0_NC BIT(10) /* No carrier */ +#define OWL_EMAC_BIT_TDES0_LC BIT(9) /* Late collision */ +#define OWL_EMAC_BIT_TDES0_EC BIT(8) /* Excessive collisions */ +#define OWL_EMAC_MSK_TDES0_CC GENMASK(6, 3) /* Collision count */ +#define OWL_EMAC_BIT_TDES0_UF BIT(1) /* Underflow error */ +#define OWL_EMAC_BIT_TDES0_DE BIT(0) /* Deferred */ + +/* Transmit descriptor control and count field */ +#define OWL_EMAC_BIT_TDES1_IC BIT(31) /* Interrupt on completion */ +#define OWL_EMAC_BIT_TDES1_LS BIT(30) /* Last descriptor */ +#define OWL_EMAC_BIT_TDES1_FS BIT(29) /* First descriptor */ +#define OWL_EMAC_BIT_TDES1_FT1 BIT(28) /* Filtering type */ +#define OWL_EMAC_BIT_TDES1_SET BIT(27) /* Setup packet */ +#define OWL_EMAC_BIT_TDES1_AC BIT(26) /* Add CRC disable */ +#define OWL_EMAC_BIT_TDES1_TER BIT(25) /* Transmit end of ring */ +#define OWL_EMAC_BIT_TDES1_DPD BIT(23) /* Disabled padding */ +#define OWL_EMAC_BIT_TDES1_FT0 BIT(22) /* Filtering type */ +#define OWL_EMAC_MSK_TDES1_TBS1 GENMASK(10, 0) /* Buffer 1 size */ + +static const char *const owl_emac_clk_names[] = { "eth", "rmii" }; +#define OWL_EMAC_NCLKS ARRAY_SIZE(owl_emac_clk_names) + +enum owl_emac_clk_map { + OWL_EMAC_CLK_ETH = 0, + OWL_EMAC_CLK_RMII +}; + +struct owl_emac_addr_list { + u8 addrs[OWL_EMAC_MAX_MULTICAST_ADDRS][ETH_ALEN]; + int count; +}; + +/* TX/RX descriptors */ +struct owl_emac_ring_desc { + u32 status; + u32 control; + u32 buf_addr; + u32 reserved; /* 2nd buffer address is not used */ +}; + +struct owl_emac_ring { + struct owl_emac_ring_desc *descs; + dma_addr_t descs_dma; + struct sk_buff **skbs; + dma_addr_t *skbs_dma; + unsigned int size; + unsigned int head; + unsigned int tail; +}; + +struct owl_emac_priv { + struct net_device *netdev; + void __iomem *base; + + struct clk_bulk_data clks[OWL_EMAC_NCLKS]; + struct reset_control *reset; + + struct owl_emac_ring rx_ring; + struct owl_emac_ring tx_ring; + + struct mii_bus *mii; + struct napi_struct napi; + + phy_interface_t phy_mode; + unsigned int link; + int speed; + int duplex; + int pause; + struct owl_emac_addr_list mcaddr_list; + + struct work_struct mac_reset_task; + + u32 msg_enable; /* Debug message level */ + spinlock_t lock; /* Sync concurrent ring access */ +}; + +#endif /* __OWL_EMAC_H__ */ diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index 9c5891bbfe61..d77fafbc1530 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1449,10 +1449,10 @@ static int greth_of_probe(struct platform_device *ofdev) break; } if (i == 6) { - const u8 *addr; + u8 addr[ETH_ALEN]; - addr = of_get_mac_address(ofdev->dev.of_node); - if (!IS_ERR(addr)) { + err = of_get_mac_address(ofdev->dev.of_node, addr); + if (!err) { for (i = 0; i < 6; i++) macaddr[i] = (unsigned int) addr[i]; } else { diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index 5ed80d9a6b9f..f99ae317c188 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -790,7 +790,6 @@ static int emac_probe(struct platform_device *pdev) struct emac_board_info *db; struct net_device *ndev; int ret = 0; - const char *mac_addr; ndev = alloc_etherdev(sizeof(struct emac_board_info)); if (!ndev) { @@ -853,12 +852,9 @@ static int emac_probe(struct platform_device *pdev) } /* Read MAC-address from DT */ - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - - /* Check if the MAC address is valid, if not get a random one */ - if (!is_valid_ether_addr(ndev->dev_addr)) { + ret = of_get_mac_address(np, ndev->dev_addr); + if (ret) { + /* if the MAC address is invalid get a random one */ eth_hw_addr_random(ndev); dev_warn(&pdev->dev, "using random MAC address %pM\n", ndev->dev_addr); diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 907125abef2c..1c00d719e5d7 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1351,7 +1351,6 @@ static int altera_tse_probe(struct platform_device *pdev) struct resource *control_port; struct resource *dma_res; struct altera_tse_private *priv; - const unsigned char *macaddr; void __iomem *descmap; const struct of_device_id *of_id = NULL; @@ -1525,10 +1524,8 @@ static int altera_tse_probe(struct platform_device *pdev) priv->rx_dma_buf_sz = ALTERA_RXDMABUFFER_SIZE; /* get default MAC address from device tree */ - macaddr = of_get_mac_address(pdev->dev.of_node); - if (!IS_ERR(macaddr)) - ether_addr_copy(ndev->dev_addr, macaddr); - else + ret = of_get_mac_address(pdev->dev.of_node, ndev->dev_addr); + if (ret) eth_hw_addr_random(ndev); /* get phy addr and create mdio */ diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 02087d443e73..764852ead1d6 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -863,7 +863,7 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) if (unlikely(i == timeout)) { netdev_err(ena_dev->net_device, - "Reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n", + "Reading reg failed for timeout. expected: req id[%u] offset[%u] actual: req id[%u] offset[%u]\n", mmio_read->seq_num, offset, read_resp->req_id, read_resp->reg_off); ret = ENA_MMIO_READ_TIMEOUT; @@ -2396,7 +2396,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, if (key) { if (key_len != sizeof(hash_key->key)) { netdev_err(ena_dev->net_device, - "key len (%hu) doesn't equal the supported size (%zu)\n", + "key len (%u) doesn't equal the supported size (%zu)\n", key_len, sizeof(hash_key->key)); return -EINVAL; } diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 343caf41e709..73b03ce59412 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -124,7 +124,7 @@ struct ena_com_io_cq { /* holds the number of cdesc of the current packet */ u16 cur_rx_pkt_cdesc_count; - /* save the firt cdesc idx of the current packet */ + /* save the first cdesc idx of the current packet */ u16 cur_rx_pkt_cdesc_start_idx; u16 q_depth; diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index d6cc7aa612b7..2fe7ccee55b2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -251,10 +251,10 @@ static void ena_queue_strings(struct ena_adapter *adapter, u8 **data) for (j = 0; j < ENA_STATS_ARRAY_TX; j++) { ena_stats = &ena_stats_tx_strings[j]; - snprintf(*data, ETH_GSTRING_LEN, - "queue_%u_%s_%s", i, - is_xdp ? "xdp_tx" : "tx", ena_stats->name); - (*data) += ETH_GSTRING_LEN; + ethtool_sprintf(data, + "queue_%u_%s_%s", i, + is_xdp ? "xdp_tx" : "tx", + ena_stats->name); } if (!is_xdp) { @@ -264,9 +264,9 @@ static void ena_queue_strings(struct ena_adapter *adapter, u8 **data) for (j = 0; j < ENA_STATS_ARRAY_RX; j++) { ena_stats = &ena_stats_rx_strings[j]; - snprintf(*data, ETH_GSTRING_LEN, - "queue_%u_rx_%s", i, ena_stats->name); - (*data) += ETH_GSTRING_LEN; + ethtool_sprintf(data, + "queue_%u_rx_%s", i, + ena_stats->name); } } } @@ -280,9 +280,8 @@ static void ena_com_dev_strings(u8 **data) for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) { ena_stats = &ena_stats_ena_com_strings[i]; - snprintf(*data, ETH_GSTRING_LEN, - "ena_admin_q_%s", ena_stats->name); - (*data) += ETH_GSTRING_LEN; + ethtool_sprintf(data, + "ena_admin_q_%s", ena_stats->name); } } @@ -295,15 +294,13 @@ static void ena_get_strings(struct ena_adapter *adapter, for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) { ena_stats = &ena_stats_global_strings[i]; - memcpy(data, ena_stats->name, ETH_GSTRING_LEN); - data += ETH_GSTRING_LEN; + ethtool_sprintf(&data, ena_stats->name); } if (eni_stats_needed) { for (i = 0; i < ENA_STATS_ARRAY_ENI(adapter); i++) { ena_stats = &ena_stats_eni_strings[i]; - memcpy(data, ena_stats->name, ETH_GSTRING_LEN); - data += ETH_GSTRING_LEN; + ethtool_sprintf(&data, ena_stats->name); } } diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 102f2c91fdb8..881f88754bf6 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -300,7 +300,7 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring, rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &push_hdr, &push_len); if (unlikely(rc)) - goto error_drop_packet; + return rc; ena_tx_ctx.ena_bufs = tx_info->bufs; ena_tx_ctx.push_header = push_hdr; @@ -330,8 +330,6 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring, error_unmap_dma: ena_unmap_tx_buff(xdp_ring, tx_info); tx_info->xdpf = NULL; -error_drop_packet: - xdp_return_frame(xdpf); return rc; } @@ -339,8 +337,8 @@ static int ena_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags) { struct ena_adapter *adapter = netdev_priv(dev); - int qid, i, err, drops = 0; struct ena_ring *xdp_ring; + int qid, i, nxmit = 0; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; @@ -360,12 +358,9 @@ static int ena_xdp_xmit(struct net_device *dev, int n, spin_lock(&xdp_ring->xdp_tx_lock); for (i = 0; i < n; i++) { - err = ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0); - /* The descriptor is freed by ena_xdp_xmit_frame in case - * of an error. - */ - if (err) - drops++; + if (ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0)) + break; + nxmit++; } /* Ring doorbell to make device aware of the packets */ @@ -378,7 +373,7 @@ static int ena_xdp_xmit(struct net_device *dev, int n, spin_unlock(&xdp_ring->xdp_tx_lock); /* Return number of packets sent */ - return n - drops; + return nxmit; } static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) @@ -415,7 +410,9 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) /* The XDP queues are shared between XDP_TX and XDP_REDIRECT */ spin_lock(&xdp_ring->xdp_tx_lock); - ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf, XDP_XMIT_FLUSH); + if (ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf, + XDP_XMIT_FLUSH)) + xdp_return_frame(xdpf); spin_unlock(&xdp_ring->xdp_tx_lock); xdp_stat = &rx_ring->rx_stats.xdp_tx; @@ -3978,7 +3975,7 @@ static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev, max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num); max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num); max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num); - /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ + /* 1 IRQ for mgmnt and 1 IRQs for each IO direction */ max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1); if (unlikely(!max_num_io_queues)) { dev_err(&pdev->dev, "The device doesn't have io queues\n"); diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 960d483e8997..4a1220cc6f10 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -100,19 +100,19 @@ static int amd8111e_read_phy(struct amd8111e_priv *lp, { void __iomem *mmio = lp->mmio; unsigned int reg_val; - unsigned int repeat= REPEAT_CNT; + unsigned int repeat = REPEAT_CNT; reg_val = readl(mmio + PHY_ACCESS); while (reg_val & PHY_CMD_ACTIVE) - reg_val = readl( mmio + PHY_ACCESS ); + reg_val = readl(mmio + PHY_ACCESS); - writel( PHY_RD_CMD | ((phy_id & 0x1f) << 21) | - ((reg & 0x1f) << 16), mmio +PHY_ACCESS); - do{ + writel(PHY_RD_CMD | ((phy_id & 0x1f) << 21) | + ((reg & 0x1f) << 16), mmio + PHY_ACCESS); + do { reg_val = readl(mmio + PHY_ACCESS); udelay(30); /* It takes 30 us to read/write data */ } while (--repeat && (reg_val & PHY_CMD_ACTIVE)); - if(reg_val & PHY_RD_ERR) + if (reg_val & PHY_RD_ERR) goto err_phy_read; *val = reg_val & 0xffff; @@ -133,17 +133,17 @@ static int amd8111e_write_phy(struct amd8111e_priv *lp, reg_val = readl(mmio + PHY_ACCESS); while (reg_val & PHY_CMD_ACTIVE) - reg_val = readl( mmio + PHY_ACCESS ); + reg_val = readl(mmio + PHY_ACCESS); - writel( PHY_WR_CMD | ((phy_id & 0x1f) << 21) | + writel(PHY_WR_CMD | ((phy_id & 0x1f) << 21) | ((reg & 0x1f) << 16)|val, mmio + PHY_ACCESS); - do{ + do { reg_val = readl(mmio + PHY_ACCESS); udelay(30); /* It takes 30 us to read/write the data */ } while (--repeat && (reg_val & PHY_CMD_ACTIVE)); - if(reg_val & PHY_RD_ERR) + if (reg_val & PHY_RD_ERR) goto err_phy_write; return 0; @@ -159,7 +159,7 @@ static int amd8111e_mdio_read(struct net_device *dev, int phy_id, int reg_num) struct amd8111e_priv *lp = netdev_priv(dev); unsigned int reg_val; - amd8111e_read_phy(lp,phy_id,reg_num,®_val); + amd8111e_read_phy(lp, phy_id, reg_num, ®_val); return reg_val; } @@ -179,17 +179,17 @@ static void amd8111e_mdio_write(struct net_device *dev, static void amd8111e_set_ext_phy(struct net_device *dev) { struct amd8111e_priv *lp = netdev_priv(dev); - u32 bmcr,advert,tmp; + u32 bmcr, advert, tmp; /* Determine mii register values to set the speed */ advert = amd8111e_mdio_read(dev, lp->ext_phy_addr, MII_ADVERTISE); tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4); - switch (lp->ext_phy_option){ + switch (lp->ext_phy_option) { default: case SPEED_AUTONEG: /* advertise all values */ - tmp |= ( ADVERTISE_10HALF|ADVERTISE_10FULL| - ADVERTISE_100HALF|ADVERTISE_100FULL) ; + tmp |= (ADVERTISE_10HALF | ADVERTISE_10FULL | + ADVERTISE_100HALF | ADVERTISE_100FULL); break; case SPEED10_HALF: tmp |= ADVERTISE_10HALF; @@ -224,20 +224,20 @@ static int amd8111e_free_skbs(struct net_device *dev) int i; /* Freeing transmit skbs */ - for(i = 0; i < NUM_TX_BUFFERS; i++){ - if(lp->tx_skbuff[i]){ + for (i = 0; i < NUM_TX_BUFFERS; i++) { + if (lp->tx_skbuff[i]) { dma_unmap_single(&lp->pci_dev->dev, lp->tx_dma_addr[i], lp->tx_skbuff[i]->len, DMA_TO_DEVICE); - dev_kfree_skb (lp->tx_skbuff[i]); + dev_kfree_skb(lp->tx_skbuff[i]); lp->tx_skbuff[i] = NULL; lp->tx_dma_addr[i] = 0; } } /* Freeing previously allocated receive buffers */ - for (i = 0; i < NUM_RX_BUFFERS; i++){ + for (i = 0; i < NUM_RX_BUFFERS; i++) { rx_skbuff = lp->rx_skbuff[i]; - if(rx_skbuff != NULL){ + if (rx_skbuff != NULL) { dma_unmap_single(&lp->pci_dev->dev, lp->rx_dma_addr[i], lp->rx_buff_len - 2, DMA_FROM_DEVICE); @@ -258,13 +258,13 @@ static inline void amd8111e_set_rx_buff_len(struct net_device *dev) struct amd8111e_priv *lp = netdev_priv(dev); unsigned int mtu = dev->mtu; - if (mtu > ETH_DATA_LEN){ + if (mtu > ETH_DATA_LEN) { /* MTU + ethernet header + FCS * + optional VLAN tag + skb reserve space 2 */ lp->rx_buff_len = mtu + ETH_HLEN + 10; lp->options |= OPTION_JUMBO_ENABLE; - } else{ + } else { lp->rx_buff_len = PKT_BUFF_SZ; lp->options &= ~OPTION_JUMBO_ENABLE; } @@ -285,11 +285,11 @@ static int amd8111e_init_ring(struct net_device *dev) lp->tx_ring_idx = 0; - if(lp->opened) + if (lp->opened) /* Free previously allocated transmit and receive skbs */ amd8111e_free_skbs(dev); - else{ + else { /* allocate the tx and rx descriptors */ lp->tx_ring = dma_alloc_coherent(&lp->pci_dev->dev, sizeof(struct amd8111e_tx_dr) * NUM_TX_RING_DR, @@ -312,12 +312,12 @@ static int amd8111e_init_ring(struct net_device *dev) lp->rx_skbuff[i] = netdev_alloc_skb(dev, lp->rx_buff_len); if (!lp->rx_skbuff[i]) { - /* Release previos allocated skbs */ - for(--i; i >= 0 ;i--) - dev_kfree_skb(lp->rx_skbuff[i]); - goto err_free_rx_ring; + /* Release previos allocated skbs */ + for (--i; i >= 0; i--) + dev_kfree_skb(lp->rx_skbuff[i]); + goto err_free_rx_ring; } - skb_reserve(lp->rx_skbuff[i],2); + skb_reserve(lp->rx_skbuff[i], 2); } /* Initilaizing receive descriptors */ for (i = 0; i < NUM_RX_BUFFERS; i++) { @@ -375,40 +375,40 @@ static int amd8111e_set_coalesce(struct net_device *dev, enum coal_mode cmod) case RX_INTR_COAL : timeout = coal_conf->rx_timeout; event_count = coal_conf->rx_event_count; - if( timeout > MAX_TIMEOUT || - event_count > MAX_EVENT_COUNT ) + if (timeout > MAX_TIMEOUT || + event_count > MAX_EVENT_COUNT) return -EINVAL; timeout = timeout * DELAY_TIMER_CONV; writel(VAL0|STINTEN, mmio+INTEN0); - writel((u32)DLY_INT_A_R0|( event_count<< 16 )|timeout, - mmio+DLY_INT_A); + writel((u32)DLY_INT_A_R0 | (event_count << 16) | + timeout, mmio + DLY_INT_A); break; - case TX_INTR_COAL : + case TX_INTR_COAL: timeout = coal_conf->tx_timeout; event_count = coal_conf->tx_event_count; - if( timeout > MAX_TIMEOUT || - event_count > MAX_EVENT_COUNT ) + if (timeout > MAX_TIMEOUT || + event_count > MAX_EVENT_COUNT) return -EINVAL; timeout = timeout * DELAY_TIMER_CONV; - writel(VAL0|STINTEN,mmio+INTEN0); - writel((u32)DLY_INT_B_T0|( event_count<< 16 )|timeout, - mmio+DLY_INT_B); + writel(VAL0 | STINTEN, mmio + INTEN0); + writel((u32)DLY_INT_B_T0 | (event_count << 16) | + timeout, mmio + DLY_INT_B); break; case DISABLE_COAL: - writel(0,mmio+STVAL); - writel(STINTEN, mmio+INTEN0); - writel(0, mmio +DLY_INT_B); - writel(0, mmio+DLY_INT_A); + writel(0, mmio + STVAL); + writel(STINTEN, mmio + INTEN0); + writel(0, mmio + DLY_INT_B); + writel(0, mmio + DLY_INT_A); break; case ENABLE_COAL: /* Start the timer */ - writel((u32)SOFT_TIMER_FREQ, mmio+STVAL); /* 0.5 sec */ - writel(VAL0|STINTEN, mmio+INTEN0); + writel((u32)SOFT_TIMER_FREQ, mmio + STVAL); /* 0.5 sec */ + writel(VAL0 | STINTEN, mmio + INTEN0); break; default: break; @@ -423,67 +423,67 @@ static int amd8111e_restart(struct net_device *dev) { struct amd8111e_priv *lp = netdev_priv(dev); void __iomem *mmio = lp->mmio; - int i,reg_val; + int i, reg_val; /* stop the chip */ writel(RUN, mmio + CMD0); - if(amd8111e_init_ring(dev)) + if (amd8111e_init_ring(dev)) return -ENOMEM; /* enable the port manager and set auto negotiation always */ - writel((u32) VAL1|EN_PMGR, mmio + CMD3 ); - writel((u32)XPHYANE|XPHYRST , mmio + CTRL2); + writel((u32)VAL1 | EN_PMGR, mmio + CMD3); + writel((u32)XPHYANE | XPHYRST, mmio + CTRL2); amd8111e_set_ext_phy(dev); /* set control registers */ reg_val = readl(mmio + CTRL1); reg_val &= ~XMTSP_MASK; - writel( reg_val| XMTSP_128 | CACHE_ALIGN, mmio + CTRL1 ); + writel(reg_val | XMTSP_128 | CACHE_ALIGN, mmio + CTRL1); /* enable interrupt */ - writel( APINT5EN | APINT4EN | APINT3EN | APINT2EN | APINT1EN | + writel(APINT5EN | APINT4EN | APINT3EN | APINT2EN | APINT1EN | APINT0EN | MIIPDTINTEN | MCCIINTEN | MCCINTEN | MREINTEN | SPNDINTEN | MPINTEN | SINTEN | STINTEN, mmio + INTEN0); writel(VAL3 | LCINTEN | VAL1 | TINTEN0 | VAL0 | RINTEN0, mmio + INTEN0); /* initialize tx and rx ring base addresses */ - writel((u32)lp->tx_ring_dma_addr,mmio + XMT_RING_BASE_ADDR0); - writel((u32)lp->rx_ring_dma_addr,mmio+ RCV_RING_BASE_ADDR0); + writel((u32)lp->tx_ring_dma_addr, mmio + XMT_RING_BASE_ADDR0); + writel((u32)lp->rx_ring_dma_addr, mmio + RCV_RING_BASE_ADDR0); writew((u32)NUM_TX_RING_DR, mmio + XMT_RING_LEN0); writew((u16)NUM_RX_RING_DR, mmio + RCV_RING_LEN0); /* set default IPG to 96 */ - writew((u32)DEFAULT_IPG,mmio+IPG); + writew((u32)DEFAULT_IPG, mmio + IPG); writew((u32)(DEFAULT_IPG-IFS1_DELTA), mmio + IFS1); - if(lp->options & OPTION_JUMBO_ENABLE){ + if (lp->options & OPTION_JUMBO_ENABLE) { writel((u32)VAL2|JUMBO, mmio + CMD3); /* Reset REX_UFLO */ - writel( REX_UFLO, mmio + CMD2); + writel(REX_UFLO, mmio + CMD2); /* Should not set REX_UFLO for jumbo frames */ - writel( VAL0 | APAD_XMT|REX_RTRY , mmio + CMD2); - }else{ - writel( VAL0 | APAD_XMT | REX_RTRY|REX_UFLO, mmio + CMD2); + writel(VAL0 | APAD_XMT | REX_RTRY, mmio + CMD2); + } else { + writel(VAL0 | APAD_XMT | REX_RTRY | REX_UFLO, mmio + CMD2); writel((u32)JUMBO, mmio + CMD3); } #if AMD8111E_VLAN_TAG_USED - writel((u32) VAL2|VSIZE|VL_TAG_DEL, mmio + CMD3); + writel((u32)VAL2 | VSIZE | VL_TAG_DEL, mmio + CMD3); #endif - writel( VAL0 | APAD_XMT | REX_RTRY, mmio + CMD2 ); + writel(VAL0 | APAD_XMT | REX_RTRY, mmio + CMD2); /* Setting the MAC address to the device */ for (i = 0; i < ETH_ALEN; i++) - writeb( dev->dev_addr[i], mmio + PADR + i ); + writeb(dev->dev_addr[i], mmio + PADR + i); /* Enable interrupt coalesce */ - if(lp->options & OPTION_INTR_COAL_ENABLE){ + if (lp->options & OPTION_INTR_COAL_ENABLE) { netdev_info(dev, "Interrupt Coalescing Enabled.\n"); - amd8111e_set_coalesce(dev,ENABLE_COAL); + amd8111e_set_coalesce(dev, ENABLE_COAL); } /* set RUN bit to start the chip */ @@ -499,11 +499,11 @@ static int amd8111e_restart(struct net_device *dev) static void amd8111e_init_hw_default(struct amd8111e_priv *lp) { unsigned int reg_val; - unsigned int logic_filter[2] ={0,}; + unsigned int logic_filter[2] = {0,}; void __iomem *mmio = lp->mmio; - /* stop the chip */ + /* stop the chip */ writel(RUN, mmio + CMD0); /* AUTOPOLL0 Register *//*TBD default value is 8100 in FPS */ @@ -519,13 +519,13 @@ static void amd8111e_init_hw_default(struct amd8111e_priv *lp) writel(0, mmio + XMT_RING_BASE_ADDR3); /* Clear CMD0 */ - writel(CMD0_CLEAR,mmio + CMD0); + writel(CMD0_CLEAR, mmio + CMD0); /* Clear CMD2 */ - writel(CMD2_CLEAR, mmio +CMD2); + writel(CMD2_CLEAR, mmio + CMD2); /* Clear CMD7 */ - writel(CMD7_CLEAR , mmio + CMD7); + writel(CMD7_CLEAR, mmio + CMD7); /* Clear DLY_INT_A and DLY_INT_B */ writel(0x0, mmio + DLY_INT_A); @@ -542,16 +542,16 @@ static void amd8111e_init_hw_default(struct amd8111e_priv *lp) writel(0x0, mmio + STVAL); /* Clear INTEN0 */ - writel( INTEN0_CLEAR, mmio + INTEN0); + writel(INTEN0_CLEAR, mmio + INTEN0); /* Clear LADRF */ - writel(0x0 , mmio + LADRF); + writel(0x0, mmio + LADRF); /* Set SRAM_SIZE & SRAM_BOUNDARY registers */ - writel( 0x80010,mmio + SRAM_SIZE); + writel(0x80010, mmio + SRAM_SIZE); /* Clear RCV_RING0_LEN */ - writel(0x0, mmio + RCV_RING_LEN0); + writel(0x0, mmio + RCV_RING_LEN0); /* Clear XMT_RING0/1/2/3_LEN */ writel(0x0, mmio + XMT_RING_LEN0); @@ -571,10 +571,10 @@ static void amd8111e_init_hw_default(struct amd8111e_priv *lp) /* SRAM_SIZE register */ reg_val = readl(mmio + SRAM_SIZE); - if(lp->options & OPTION_JUMBO_ENABLE) - writel( VAL2|JUMBO, mmio + CMD3); + if (lp->options & OPTION_JUMBO_ENABLE) + writel(VAL2 | JUMBO, mmio + CMD3); #if AMD8111E_VLAN_TAG_USED - writel(VAL2|VSIZE|VL_TAG_DEL, mmio + CMD3 ); + writel(VAL2 | VSIZE | VL_TAG_DEL, mmio + CMD3); #endif /* Set default value to CTRL1 Register */ writel(CTRL1_DEFAULT, mmio + CTRL1); @@ -616,14 +616,14 @@ static void amd8111e_stop_chip(struct amd8111e_priv *lp) static void amd8111e_free_ring(struct amd8111e_priv *lp) { /* Free transmit and receive descriptor rings */ - if(lp->rx_ring){ + if (lp->rx_ring) { dma_free_coherent(&lp->pci_dev->dev, sizeof(struct amd8111e_rx_dr) * NUM_RX_RING_DR, lp->rx_ring, lp->rx_ring_dma_addr); lp->rx_ring = NULL; } - if(lp->tx_ring){ + if (lp->tx_ring) { dma_free_coherent(&lp->pci_dev->dev, sizeof(struct amd8111e_tx_dr) * NUM_TX_RING_DR, lp->tx_ring, lp->tx_ring_dma_addr); @@ -643,11 +643,11 @@ static int amd8111e_tx(struct net_device *dev) int tx_index; int status; /* Complete all the transmit packet */ - while (lp->tx_complete_idx != lp->tx_idx){ + while (lp->tx_complete_idx != lp->tx_idx) { tx_index = lp->tx_complete_idx & TX_RING_DR_MOD_MASK; status = le16_to_cpu(lp->tx_ring[tx_index].tx_flags); - if(status & OWN_BIT) + if (status & OWN_BIT) break; /* It still hasn't been Txed */ lp->tx_ring[tx_index].buff_phy_addr = 0; @@ -669,10 +669,10 @@ static int amd8111e_tx(struct net_device *dev) le16_to_cpu(lp->tx_ring[tx_index].buff_count); if (netif_queue_stopped(dev) && - lp->tx_complete_idx > lp->tx_idx - NUM_TX_BUFFERS +2){ + lp->tx_complete_idx > lp->tx_idx - NUM_TX_BUFFERS + 2) { /* The ring is no longer full, clear tbusy. */ /* lp->tx_full = 0; */ - netif_wake_queue (dev); + netif_wake_queue(dev); } } return 0; @@ -685,7 +685,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget) struct net_device *dev = lp->amd8111e_net_dev; int rx_index = lp->rx_idx & RX_RING_DR_MOD_MASK; void __iomem *mmio = lp->mmio; - struct sk_buff *skb,*new_skb; + struct sk_buff *skb, *new_skb; int min_pkt_len, status; int num_rx_pkt = 0; short pkt_len; @@ -710,7 +710,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget) goto err_next_pkt; } /* check for STP and ENP */ - if (!((status & STP_BIT) && (status & ENP_BIT))){ + if (!((status & STP_BIT) && (status & ENP_BIT))) { /* resetting flags */ lp->rx_ring[rx_index].rx_flags &= RESET_RX_FLAGS; goto err_next_pkt; @@ -755,7 +755,7 @@ static int amd8111e_rx_poll(struct napi_struct *napi, int budget) skb->protocol = eth_type_trans(skb, dev); #if AMD8111E_VLAN_TAG_USED - if (vtag == TT_VLAN_TAGGED){ + if (vtag == TT_VLAN_TAGGED) { u16 vlan_tag = le16_to_cpu(lp->rx_ring[rx_index].tag_ctrl_info); __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); } @@ -793,25 +793,25 @@ err_next_pkt: static int amd8111e_link_change(struct net_device *dev) { struct amd8111e_priv *lp = netdev_priv(dev); - int status0,speed; + int status0, speed; /* read the link change */ - status0 = readl(lp->mmio + STAT0); + status0 = readl(lp->mmio + STAT0); - if(status0 & LINK_STATS){ - if(status0 & AUTONEG_COMPLETE) + if (status0 & LINK_STATS) { + if (status0 & AUTONEG_COMPLETE) lp->link_config.autoneg = AUTONEG_ENABLE; else lp->link_config.autoneg = AUTONEG_DISABLE; - if(status0 & FULL_DPLX) + if (status0 & FULL_DPLX) lp->link_config.duplex = DUPLEX_FULL; else lp->link_config.duplex = DUPLEX_HALF; speed = (status0 & SPEED_MASK) >> 7; - if(speed == PHY_SPEED_10) + if (speed == PHY_SPEED_10) lp->link_config.speed = SPEED_10; - else if(speed == PHY_SPEED_100) + else if (speed == PHY_SPEED_100) lp->link_config.speed = SPEED_100; netdev_info(dev, "Link is Up. Speed is %s Mbps %s Duplex\n", @@ -821,8 +821,7 @@ static int amd8111e_link_change(struct net_device *dev) "Full" : "Half"); netif_carrier_on(dev); - } - else{ + } else { lp->link_config.speed = SPEED_INVALID; lp->link_config.duplex = DUPLEX_INVALID; lp->link_config.autoneg = AUTONEG_INVALID; @@ -840,7 +839,7 @@ static int amd8111e_read_mib(void __iomem *mmio, u8 MIB_COUNTER) unsigned int data; unsigned int repeat = REPEAT_CNT; - writew( MIB_RD_CMD | MIB_COUNTER, mmio + MIB_ADDR); + writew(MIB_RD_CMD | MIB_COUNTER, mmio + MIB_ADDR); do { status = readw(mmio + MIB_ADDR); udelay(2); /* controller takes MAX 2 us to get mib data */ @@ -863,7 +862,7 @@ static struct net_device_stats *amd8111e_get_stats(struct net_device *dev) if (!lp->opened) return new_stats; - spin_lock_irqsave (&lp->lock, flags); + spin_lock_irqsave(&lp->lock, flags); /* stats.rx_packets */ new_stats->rx_packets = amd8111e_read_mib(mmio, rcv_broadcast_pkts)+ @@ -943,7 +942,7 @@ static struct net_device_stats *amd8111e_get_stats(struct net_device *dev) /* Reset the mibs for collecting new statistics */ /* writew(MIB_CLEAR, mmio + MIB_ADDR);*/ - spin_unlock_irqrestore (&lp->lock, flags); + spin_unlock_irqrestore(&lp->lock, flags); return new_stats; } @@ -974,96 +973,90 @@ static int amd8111e_calc_coalesce(struct net_device *dev) rx_data_rate = coal_conf->rx_bytes - coal_conf->rx_prev_bytes; coal_conf->rx_prev_bytes = coal_conf->rx_bytes; - if(rx_pkt_rate < 800){ - if(coal_conf->rx_coal_type != NO_COALESCE){ + if (rx_pkt_rate < 800) { + if (coal_conf->rx_coal_type != NO_COALESCE) { coal_conf->rx_timeout = 0x0; coal_conf->rx_event_count = 0; - amd8111e_set_coalesce(dev,RX_INTR_COAL); + amd8111e_set_coalesce(dev, RX_INTR_COAL); coal_conf->rx_coal_type = NO_COALESCE; } - } - else{ + } else { rx_pkt_size = rx_data_rate/rx_pkt_rate; - if (rx_pkt_size < 128){ - if(coal_conf->rx_coal_type != NO_COALESCE){ + if (rx_pkt_size < 128) { + if (coal_conf->rx_coal_type != NO_COALESCE) { coal_conf->rx_timeout = 0; coal_conf->rx_event_count = 0; - amd8111e_set_coalesce(dev,RX_INTR_COAL); + amd8111e_set_coalesce(dev, RX_INTR_COAL); coal_conf->rx_coal_type = NO_COALESCE; } - } - else if ( (rx_pkt_size >= 128) && (rx_pkt_size < 512) ){ + } else if ((rx_pkt_size >= 128) && (rx_pkt_size < 512)) { - if(coal_conf->rx_coal_type != LOW_COALESCE){ + if (coal_conf->rx_coal_type != LOW_COALESCE) { coal_conf->rx_timeout = 1; coal_conf->rx_event_count = 4; - amd8111e_set_coalesce(dev,RX_INTR_COAL); + amd8111e_set_coalesce(dev, RX_INTR_COAL); coal_conf->rx_coal_type = LOW_COALESCE; } - } - else if ((rx_pkt_size >= 512) && (rx_pkt_size < 1024)){ + } else if ((rx_pkt_size >= 512) && (rx_pkt_size < 1024)) { - if(coal_conf->rx_coal_type != MEDIUM_COALESCE){ + if (coal_conf->rx_coal_type != MEDIUM_COALESCE) { coal_conf->rx_timeout = 1; coal_conf->rx_event_count = 4; - amd8111e_set_coalesce(dev,RX_INTR_COAL); + amd8111e_set_coalesce(dev, RX_INTR_COAL); coal_conf->rx_coal_type = MEDIUM_COALESCE; } - } - else if(rx_pkt_size >= 1024){ - if(coal_conf->rx_coal_type != HIGH_COALESCE){ + } else if (rx_pkt_size >= 1024) { + + if (coal_conf->rx_coal_type != HIGH_COALESCE) { coal_conf->rx_timeout = 2; coal_conf->rx_event_count = 3; - amd8111e_set_coalesce(dev,RX_INTR_COAL); + amd8111e_set_coalesce(dev, RX_INTR_COAL); coal_conf->rx_coal_type = HIGH_COALESCE; } } } - /* NOW FOR TX INTR COALESC */ - if(tx_pkt_rate < 800){ - if(coal_conf->tx_coal_type != NO_COALESCE){ + /* NOW FOR TX INTR COALESC */ + if (tx_pkt_rate < 800) { + if (coal_conf->tx_coal_type != NO_COALESCE) { coal_conf->tx_timeout = 0x0; coal_conf->tx_event_count = 0; - amd8111e_set_coalesce(dev,TX_INTR_COAL); + amd8111e_set_coalesce(dev, TX_INTR_COAL); coal_conf->tx_coal_type = NO_COALESCE; } - } - else{ + } else { tx_pkt_size = tx_data_rate/tx_pkt_rate; - if (tx_pkt_size < 128){ + if (tx_pkt_size < 128) { - if(coal_conf->tx_coal_type != NO_COALESCE){ + if (coal_conf->tx_coal_type != NO_COALESCE) { coal_conf->tx_timeout = 0; coal_conf->tx_event_count = 0; - amd8111e_set_coalesce(dev,TX_INTR_COAL); + amd8111e_set_coalesce(dev, TX_INTR_COAL); coal_conf->tx_coal_type = NO_COALESCE; } - } - else if ( (tx_pkt_size >= 128) && (tx_pkt_size < 512) ){ + } else if ((tx_pkt_size >= 128) && (tx_pkt_size < 512)) { - if(coal_conf->tx_coal_type != LOW_COALESCE){ + if (coal_conf->tx_coal_type != LOW_COALESCE) { coal_conf->tx_timeout = 1; coal_conf->tx_event_count = 2; - amd8111e_set_coalesce(dev,TX_INTR_COAL); + amd8111e_set_coalesce(dev, TX_INTR_COAL); coal_conf->tx_coal_type = LOW_COALESCE; } - } - else if ((tx_pkt_size >= 512) && (tx_pkt_size < 1024)){ + } else if ((tx_pkt_size >= 512) && (tx_pkt_size < 1024)) { - if(coal_conf->tx_coal_type != MEDIUM_COALESCE){ + if (coal_conf->tx_coal_type != MEDIUM_COALESCE) { coal_conf->tx_timeout = 2; coal_conf->tx_event_count = 5; - amd8111e_set_coalesce(dev,TX_INTR_COAL); + amd8111e_set_coalesce(dev, TX_INTR_COAL); coal_conf->tx_coal_type = MEDIUM_COALESCE; } } else if (tx_pkt_size >= 1024) { @@ -1091,7 +1084,7 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id) unsigned int intr0, intren0; unsigned int handled = 1; - if(unlikely(dev == NULL)) + if (unlikely(dev == NULL)) return IRQ_NONE; spin_lock(&lp->lock); @@ -1105,7 +1098,7 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id) /* Process all the INT event until INTR bit is clear. */ - if (!(intr0 & INTR)){ + if (!(intr0 & INTR)) { handled = 0; goto err_no_interrupt; } @@ -1140,7 +1133,7 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id) amd8111e_calc_coalesce(dev); err_no_interrupt: - writel( VAL0 | INTREN,mmio + CMD0); + writel(VAL0 | INTREN, mmio + CMD0); spin_unlock(&lp->lock); @@ -1180,7 +1173,7 @@ static int amd8111e_close(struct net_device *dev) netif_carrier_off(lp->amd8111e_net_dev); /* Delete ipg timer */ - if(lp->options & OPTION_DYN_IPG_ENABLE) + if (lp->options & OPTION_DYN_IPG_ENABLE) del_timer_sync(&lp->ipg_data.ipg_timer); spin_unlock_irq(&lp->lock); @@ -1200,8 +1193,8 @@ static int amd8111e_open(struct net_device *dev) { struct amd8111e_priv *lp = netdev_priv(dev); - if(dev->irq ==0 || request_irq(dev->irq, amd8111e_interrupt, IRQF_SHARED, - dev->name, dev)) + if (dev->irq == 0 || request_irq(dev->irq, amd8111e_interrupt, + IRQF_SHARED, dev->name, dev)) return -EAGAIN; napi_enable(&lp->napi); @@ -1210,7 +1203,7 @@ static int amd8111e_open(struct net_device *dev) amd8111e_init_hw_default(lp); - if(amd8111e_restart(dev)){ + if (amd8111e_restart(dev)) { spin_unlock_irq(&lp->lock); napi_disable(&lp->napi); if (dev->irq) @@ -1218,7 +1211,7 @@ static int amd8111e_open(struct net_device *dev) return -ENOMEM; } /* Start ipg timer */ - if(lp->options & OPTION_DYN_IPG_ENABLE){ + if (lp->options & OPTION_DYN_IPG_ENABLE) { add_timer(&lp->ipg_data.ipg_timer); netdev_info(dev, "Dynamic IPG Enabled\n"); } @@ -1289,10 +1282,10 @@ static netdev_tx_t amd8111e_start_xmit(struct sk_buff *skb, lp->tx_idx++; /* Trigger an immediate send poll. */ - writel( VAL1 | TDMD0, lp->mmio + CMD0); - writel( VAL2 | RDMD0,lp->mmio + CMD0); + writel(VAL1 | TDMD0, lp->mmio + CMD0); + writel(VAL2 | RDMD0, lp->mmio + CMD0); - if(amd8111e_tx_queue_avail(lp) < 0){ + if (amd8111e_tx_queue_avail(lp) < 0) { netif_stop_queue(dev); } spin_unlock_irqrestore(&lp->lock, flags); @@ -1326,15 +1319,15 @@ static void amd8111e_set_multicast_list(struct net_device *dev) { struct netdev_hw_addr *ha; struct amd8111e_priv *lp = netdev_priv(dev); - u32 mc_filter[2] ; + u32 mc_filter[2]; int bit_num; - if(dev->flags & IFF_PROMISC){ - writel( VAL2 | PROM, lp->mmio + CMD2); + if (dev->flags & IFF_PROMISC) { + writel(VAL2 | PROM, lp->mmio + CMD2); return; } else - writel( PROM, lp->mmio + CMD2); + writel(PROM, lp->mmio + CMD2); if (dev->flags & IFF_ALLMULTI || netdev_mc_count(dev) > MAX_FILTER_SIZE) { /* get all multicast packet */ @@ -1439,7 +1432,7 @@ static int amd8111e_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol_ if (wol_info->wolopts & WAKE_MAGIC) lp->options |= (OPTION_WOL_ENABLE | OPTION_WAKE_MAGIC_ENABLE); - else if(wol_info->wolopts & WAKE_PHY) + else if (wol_info->wolopts & WAKE_PHY) lp->options |= (OPTION_WOL_ENABLE | OPTION_WAKE_PHY_ENABLE); else @@ -1464,14 +1457,14 @@ static const struct ethtool_ops ops = { * gets/sets driver speed, gets memory mapped register values, forces * auto negotiation, sets/gets WOL options for ethtool application. */ -static int amd8111e_ioctl(struct net_device *dev , struct ifreq *ifr, int cmd) +static int amd8111e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mii_ioctl_data *data = if_mii(ifr); struct amd8111e_priv *lp = netdev_priv(dev); int err; u32 mii_regval; - switch(cmd) { + switch (cmd) { case SIOCGMIIPHY: data->phy_id = lp->ext_phy_addr; @@ -1511,7 +1504,7 @@ static int amd8111e_set_mac_address(struct net_device *dev, void *p) spin_lock_irq(&lp->lock); /* Setting the MAC address to the device */ for (i = 0; i < ETH_ALEN; i++) - writeb( dev->dev_addr[i], lp->mmio + PADR + i ); + writeb(dev->dev_addr[i], lp->mmio + PADR + i); spin_unlock_irq(&lp->lock); @@ -1536,22 +1529,22 @@ static int amd8111e_change_mtu(struct net_device *dev, int new_mtu) spin_lock_irq(&lp->lock); - /* stop the chip */ + /* stop the chip */ writel(RUN, lp->mmio + CMD0); dev->mtu = new_mtu; err = amd8111e_restart(dev); spin_unlock_irq(&lp->lock); - if(!err) + if (!err) netif_start_queue(dev); return err; } static int amd8111e_enable_magicpkt(struct amd8111e_priv *lp) { - writel( VAL1|MPPLBA, lp->mmio + CMD3); - writel( VAL0|MPEN_SW, lp->mmio + CMD7); + writel(VAL1 | MPPLBA, lp->mmio + CMD3); + writel(VAL0 | MPEN_SW, lp->mmio + CMD7); /* To eliminate PCI posting bug */ readl(lp->mmio + CMD7); @@ -1562,7 +1555,7 @@ static int amd8111e_enable_link_change(struct amd8111e_priv *lp) { /* Adapter is already stoped/suspended/interrupt-disabled */ - writel(VAL0|LCMODE_SW,lp->mmio + CMD7); + writel(VAL0 | LCMODE_SW, lp->mmio + CMD7); /* To eliminate PCI posting bug */ readl(lp->mmio + CMD7); @@ -1584,7 +1577,7 @@ static void amd8111e_tx_timeout(struct net_device *dev, unsigned int txqueue) spin_lock_irq(&lp->lock); err = amd8111e_restart(dev); spin_unlock_irq(&lp->lock); - if(!err) + if (!err) netif_wake_queue(dev); } @@ -1605,22 +1598,21 @@ static int __maybe_unused amd8111e_suspend(struct device *dev_d) /* stop chip */ spin_lock_irq(&lp->lock); - if(lp->options & OPTION_DYN_IPG_ENABLE) + if (lp->options & OPTION_DYN_IPG_ENABLE) del_timer_sync(&lp->ipg_data.ipg_timer); amd8111e_stop_chip(lp); spin_unlock_irq(&lp->lock); - if(lp->options & OPTION_WOL_ENABLE){ + if (lp->options & OPTION_WOL_ENABLE) { /* enable wol */ - if(lp->options & OPTION_WAKE_MAGIC_ENABLE) + if (lp->options & OPTION_WAKE_MAGIC_ENABLE) amd8111e_enable_magicpkt(lp); - if(lp->options & OPTION_WAKE_PHY_ENABLE) + if (lp->options & OPTION_WAKE_PHY_ENABLE) amd8111e_enable_link_change(lp); device_set_wakeup_enable(dev_d, 1); - } - else{ + } else { device_set_wakeup_enable(dev_d, 0); } @@ -1640,7 +1632,7 @@ static int __maybe_unused amd8111e_resume(struct device *dev_d) spin_lock_irq(&lp->lock); amd8111e_restart(dev); /* Restart ipg timer */ - if(lp->options & OPTION_DYN_IPG_ENABLE) + if (lp->options & OPTION_DYN_IPG_ENABLE) mod_timer(&lp->ipg_data.ipg_timer, jiffies + IPG_CONVERGE_JIFFIES); spin_unlock_irq(&lp->lock); @@ -1657,14 +1649,14 @@ static void amd8111e_config_ipg(struct timer_list *t) unsigned int total_col_cnt; unsigned int tmp_ipg; - if(lp->link_config.duplex == DUPLEX_FULL){ + if (lp->link_config.duplex == DUPLEX_FULL) { ipg_data->ipg = DEFAULT_IPG; return; } - if(ipg_data->ipg_state == SSTATE){ + if (ipg_data->ipg_state == SSTATE) { - if(ipg_data->timer_tick == IPG_STABLE_TIME){ + if (ipg_data->timer_tick == IPG_STABLE_TIME) { ipg_data->timer_tick = 0; ipg_data->ipg = MIN_IPG - IPG_STEP; @@ -1676,7 +1668,7 @@ static void amd8111e_config_ipg(struct timer_list *t) ipg_data->timer_tick++; } - if(ipg_data->ipg_state == CSTATE){ + if (ipg_data->ipg_state == CSTATE) { /* Get the current collision count */ @@ -1684,10 +1676,10 @@ static void amd8111e_config_ipg(struct timer_list *t) amd8111e_read_mib(mmio, xmt_collisions); if ((total_col_cnt - prev_col_cnt) < - (ipg_data->diff_col_cnt)){ + (ipg_data->diff_col_cnt)) { ipg_data->diff_col_cnt = - total_col_cnt - prev_col_cnt ; + total_col_cnt - prev_col_cnt; ipg_data->ipg = ipg_data->current_ipg; } @@ -1696,7 +1688,7 @@ static void amd8111e_config_ipg(struct timer_list *t) if (ipg_data->current_ipg <= MAX_IPG) tmp_ipg = ipg_data->current_ipg; - else{ + else { tmp_ipg = ipg_data->ipg; ipg_data->ipg_state = SSTATE; } @@ -1748,24 +1740,24 @@ static int amd8111e_probe_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int err, i; - unsigned long reg_addr,reg_len; + unsigned long reg_addr, reg_len; struct amd8111e_priv *lp; struct net_device *dev; err = pci_enable_device(pdev); - if(err){ + if (err) { dev_err(&pdev->dev, "Cannot enable new PCI device\n"); return err; } - if(!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)){ + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Cannot find PCI base address\n"); err = -ENODEV; goto err_disable_pdev; } err = pci_request_regions(pdev, MODULE_NAME); - if(err){ + if (err) { dev_err(&pdev->dev, "Cannot obtain PCI resources\n"); goto err_disable_pdev; } @@ -1798,7 +1790,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, SET_NETDEV_DEV(dev, &pdev->dev); #if AMD8111E_VLAN_TAG_USED - dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX ; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; #endif lp = netdev_priv(dev); @@ -1821,16 +1813,16 @@ static int amd8111e_probe_one(struct pci_dev *pdev, /* Setting user defined parametrs */ lp->ext_phy_option = speed_duplex[card_idx]; - if(coalesce[card_idx]) + if (coalesce[card_idx]) lp->options |= OPTION_INTR_COAL_ENABLE; - if(dynamic_ipg[card_idx++]) + if (dynamic_ipg[card_idx++]) lp->options |= OPTION_DYN_IPG_ENABLE; /* Initialize driver entry points */ dev->netdev_ops = &amd8111e_netdev_ops; dev->ethtool_ops = &ops; - dev->irq =pdev->irq; + dev->irq = pdev->irq; dev->watchdog_timeo = AMD8111E_TX_TIMEOUT; dev->min_mtu = AMD8111E_MIN_MTU; dev->max_mtu = AMD8111E_MAX_MTU; @@ -1861,7 +1853,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); /* Initialize software ipg timer */ - if(lp->options & OPTION_DYN_IPG_ENABLE){ + if (lp->options & OPTION_DYN_IPG_ENABLE) { timer_setup(&lp->ipg_data.ipg_timer, amd8111e_config_ipg, 0); lp->ipg_data.ipg_timer.expires = jiffies + IPG_CONVERGE_JIFFIES; @@ -1870,7 +1862,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, } /* display driver and device information */ - chip_version = (readl(lp->mmio + CHIPID) & 0xf0000000)>>28; + chip_version = (readl(lp->mmio + CHIPID) & 0xf0000000) >> 28; dev_info(&pdev->dev, "[ Rev %x ] PCI 10/100BaseT Ethernet %pM\n", chip_version, dev->dev_addr); if (lp->ext_phy_id) @@ -1879,7 +1871,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, else dev_info(&pdev->dev, "Couldn't detect MII PHY, assuming address 0x01\n"); - return 0; + return 0; err_free_dev: free_netdev(dev); @@ -1919,7 +1911,7 @@ MODULE_DEVICE_TABLE(pci, amd8111e_pci_tbl); static SIMPLE_DEV_PM_OPS(amd8111e_pm_ops, amd8111e_suspend, amd8111e_resume); static struct pci_driver amd8111e_driver = { - .name = MODULE_NAME, + .name = MODULE_NAME, .id_table = amd8111e_pci_tbl, .probe = amd8111e_probe_one, .remove = amd8111e_remove_one, diff --git a/drivers/net/ethernet/amd/hplance.c b/drivers/net/ethernet/amd/hplance.c index e10aceb2b767..6784f8748638 100644 --- a/drivers/net/ethernet/amd/hplance.c +++ b/drivers/net/ethernet/amd/hplance.c @@ -170,6 +170,7 @@ static void hplance_init(struct net_device *dev, struct dio_dev *d) static void hplance_writerap(void *priv, unsigned short value) { struct lance_private *lp = (struct lance_private *)priv; + do { out_be16(lp->base + HPLANCE_REGOFF + LANCE_RAP, value); } while ((in_8(lp->base + HPLANCE_STATUS) & LE_ACK) == 0); @@ -178,6 +179,7 @@ static void hplance_writerap(void *priv, unsigned short value) static void hplance_writerdp(void *priv, unsigned short value) { struct lance_private *lp = (struct lance_private *)priv; + do { out_be16(lp->base + HPLANCE_REGOFF + LANCE_RDP, value); } while ((in_8(lp->base + HPLANCE_STATUS) & LE_ACK) == 0); @@ -187,6 +189,7 @@ static unsigned short hplance_readrdp(void *priv) { struct lance_private *lp = (struct lance_private *)priv; __u16 value; + do { value = in_be16(lp->base + HPLANCE_REGOFF + LANCE_RDP); } while ((in_8(lp->base + HPLANCE_STATUS) & LE_ACK) == 0); diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 187b0b9a6e1d..aa412506832d 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1534,8 +1534,7 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) } pci_set_master(pdev); - ioaddr = pci_resource_start(pdev, 0); - if (!ioaddr) { + if (!pci_resource_len(pdev, 0)) { if (pcnet32_debug & NETIF_MSG_PROBE) pr_err("card has no PCI IO resources, aborting\n"); err = -ENODEV; @@ -1548,6 +1547,8 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent) pr_err("architecture does not support 32bit PCI busmaster DMA\n"); goto err_disable_dev; } + + ioaddr = pci_resource_start(pdev, 0); if (!request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci")) { if (pcnet32_debug & NETIF_MSG_PROBE) pr_err("io address range already allocated\n"); @@ -2852,8 +2853,7 @@ static void pcnet32_check_media(struct net_device *dev, int verbose) netif_info(lp, link, dev, "link down\n"); } if (lp->phycount > 1) { - curr_link = pcnet32_check_otherphy(dev); - prev_link = 0; + pcnet32_check_otherphy(dev); } } else if (verbose || !prev_link) { netif_carrier_on(dev); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index ba8321ec1ee7..3305979a9f7c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -180,9 +180,9 @@ #define XGBE_DMA_SYS_AWCR 0x30303030 /* DMA cache settings - PCI device */ -#define XGBE_DMA_PCI_ARCR 0x00000003 -#define XGBE_DMA_PCI_AWCR 0x13131313 -#define XGBE_DMA_PCI_AWARCR 0x00000313 +#define XGBE_DMA_PCI_ARCR 0x000f0f0f +#define XGBE_DMA_PCI_AWCR 0x0f0f0f0f +#define XGBE_DMA_PCI_AWARCR 0x00000f0f /* DMA channel interrupt modes */ #define XGBE_IRQ_MODE_EDGE 0 diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index b56a9e2aecd9..67b8113a2b53 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -857,7 +857,6 @@ int arc_emac_probe(struct net_device *ndev, int interface) struct device_node *phy_node; struct phy_device *phydev = NULL; struct arc_emac_priv *priv; - const char *mac_addr; unsigned int id, clock_frequency, irq; int err; @@ -942,11 +941,8 @@ int arc_emac_probe(struct net_device *ndev, int interface) } /* Get MAC address from device tree */ - mac_addr = of_get_mac_address(dev->of_node); - - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - else + err = of_get_mac_address(dev->of_node, ndev->dev_addr); + if (err) eth_hw_addr_random(ndev); arc_emac_set_address_internal(ndev); diff --git a/drivers/net/ethernet/atheros/Kconfig b/drivers/net/ethernet/atheros/Kconfig index fb803bf92ded..482c58c4c584 100644 --- a/drivers/net/ethernet/atheros/Kconfig +++ b/drivers/net/ethernet/atheros/Kconfig @@ -21,6 +21,7 @@ config AG71XX tristate "Atheros AR7XXX/AR9XXX built-in ethernet mac support" depends on ATH79 select PHYLINK + imply NET_SELFTESTS help If you wish to compile a kernel for AR7XXX/91XXX and enable ethernet support, then you should always answer Y to this. diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index a60ce9030581..1ba81b1eb6fd 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -37,6 +37,7 @@ #include <linux/reset.h> #include <linux/clk.h> #include <linux/io.h> +#include <net/selftests.h> /* For our NAPI weight bigger does *NOT* mean better - it means more * D-cache misses and lots more wasted cycles than we'll ever @@ -497,12 +498,17 @@ static int ag71xx_ethtool_set_pauseparam(struct net_device *ndev, static void ag71xx_ethtool_get_strings(struct net_device *netdev, u32 sset, u8 *data) { - if (sset == ETH_SS_STATS) { - int i; + int i; + switch (sset) { + case ETH_SS_STATS: for (i = 0; i < ARRAY_SIZE(ag71xx_statistics); i++) memcpy(data + i * ETH_GSTRING_LEN, ag71xx_statistics[i].name, ETH_GSTRING_LEN); + break; + case ETH_SS_TEST: + net_selftest_get_strings(data); + break; } } @@ -519,9 +525,14 @@ static void ag71xx_ethtool_get_stats(struct net_device *ndev, static int ag71xx_ethtool_get_sset_count(struct net_device *ndev, int sset) { - if (sset == ETH_SS_STATS) + switch (sset) { + case ETH_SS_STATS: return ARRAY_SIZE(ag71xx_statistics); - return -EOPNOTSUPP; + case ETH_SS_TEST: + return net_selftest_get_count(); + default: + return -EOPNOTSUPP; + } } static const struct ethtool_ops ag71xx_ethtool_ops = { @@ -536,6 +547,7 @@ static const struct ethtool_ops ag71xx_ethtool_ops = { .get_strings = ag71xx_ethtool_get_strings, .get_ethtool_stats = ag71xx_ethtool_get_stats, .get_sset_count = ag71xx_ethtool_get_sset_count, + .self_test = net_selftest, }; static int ag71xx_mdio_wait_busy(struct ag71xx *ag) @@ -1658,9 +1670,9 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit) struct net_device *ndev = ag->ndev; int ring_mask, ring_size, done = 0; unsigned int pktlen_mask, offset; - struct sk_buff *next, *skb; struct ag71xx_ring *ring; struct list_head rx_list; + struct sk_buff *skb; ring = &ag->rx_ring; pktlen_mask = ag->dcfg->desc_pktlen_mask; @@ -1725,7 +1737,7 @@ next: ag71xx_ring_rx_refill(ag); - list_for_each_entry_safe(skb, next, &rx_list, list) + list_for_each_entry(skb, &rx_list, list) skb->protocol = eth_type_trans(skb, ndev); netif_receive_skb_list(&rx_list); @@ -1856,7 +1868,6 @@ static int ag71xx_probe(struct platform_device *pdev) const struct ag71xx_dcfg *dcfg; struct net_device *ndev; struct resource *res; - const void *mac_addr; int tx_size, err, i; struct ag71xx *ag; @@ -1957,10 +1968,8 @@ static int ag71xx_probe(struct platform_device *pdev) ag->stop_desc->ctrl = 0; ag->stop_desc->next = (u32)ag->stop_desc_dma; - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); - if (IS_ERR(mac_addr) || !is_valid_ether_addr(ndev->dev_addr)) { + err = of_get_mac_address(np, ndev->dev_addr); + if (err) { netif_err(ag, probe, ndev, "invalid MAC address, using random address\n"); eth_random_addr(ndev->dev_addr); } diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h index a0562a90fb6d..28ae5c16831e 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c.h +++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h @@ -367,6 +367,7 @@ struct atl1c_hw { u16 phy_id1; u16 phy_id2; + spinlock_t intr_mask_lock; /* protect the intr_mask */ u32 intr_mask; u8 preamble_len; @@ -506,6 +507,7 @@ struct atl1c_adapter { struct net_device *netdev; struct pci_dev *pdev; struct napi_struct napi; + struct napi_struct tx_napi; struct page *rx_page; unsigned int rx_page_offset; unsigned int rx_frag_size; diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 3f65f2b370c5..1d17c24e6d75 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -47,7 +47,7 @@ static void atl1c_down(struct atl1c_adapter *adapter); static int atl1c_reset_mac(struct atl1c_hw *hw); static void atl1c_reset_dma_ring(struct atl1c_adapter *adapter); static int atl1c_configure(struct atl1c_adapter *adapter); -static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter); +static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, bool napi_mode); static const u32 atl1c_default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | @@ -470,7 +470,7 @@ static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter, adapter->rx_buffer_len = mtu > AT_RX_BUF_SIZE ? roundup(mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN, 8) : AT_RX_BUF_SIZE; - head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD) + + head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD + NET_IP_ALIGN) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); adapter->rx_frag_size = roundup_pow_of_two(head_size); } @@ -813,6 +813,7 @@ static int atl1c_sw_init(struct atl1c_adapter *adapter) atl1c_set_rxbufsize(adapter, adapter->netdev); atomic_set(&adapter->irq_sem, 1); spin_lock_init(&adapter->mdio_lock); + spin_lock_init(&adapter->hw.intr_mask_lock); set_bit(__AT_DOWN, &adapter->flags); return 0; @@ -1434,7 +1435,7 @@ static int atl1c_configure(struct atl1c_adapter *adapter) atl1c_set_multi(netdev); atl1c_restore_vlan(adapter); - num = atl1c_alloc_rx_buffer(adapter); + num = atl1c_alloc_rx_buffer(adapter, false); if (unlikely(num == 0)) return -ENOMEM; @@ -1530,20 +1531,19 @@ static inline void atl1c_clear_phy_int(struct atl1c_adapter *adapter) spin_unlock(&adapter->mdio_lock); } -static bool atl1c_clean_tx_irq(struct atl1c_adapter *adapter, - enum atl1c_trans_queue type) +static int atl1c_clean_tx(struct napi_struct *napi, int budget) { - struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[type]; + struct atl1c_adapter *adapter = + container_of(napi, struct atl1c_adapter, tx_napi); + struct atl1c_tpd_ring *tpd_ring = &adapter->tpd_ring[atl1c_trans_normal]; struct atl1c_buffer *buffer_info; struct pci_dev *pdev = adapter->pdev; u16 next_to_clean = atomic_read(&tpd_ring->next_to_clean); u16 hw_next_to_clean; - u16 reg; unsigned int total_bytes = 0, total_packets = 0; + unsigned long flags; - reg = type == atl1c_trans_high ? REG_TPD_PRI1_CIDX : REG_TPD_PRI0_CIDX; - - AT_READ_REGW(&adapter->hw, reg, &hw_next_to_clean); + AT_READ_REGW(&adapter->hw, REG_TPD_PRI0_CIDX, &hw_next_to_clean); while (next_to_clean != hw_next_to_clean) { buffer_info = &tpd_ring->buffer_info[next_to_clean]; @@ -1564,7 +1564,15 @@ static bool atl1c_clean_tx_irq(struct atl1c_adapter *adapter, netif_wake_queue(adapter->netdev); } - return true; + if (total_packets < budget) { + napi_complete_done(napi, total_packets); + spin_lock_irqsave(&adapter->hw.intr_mask_lock, flags); + adapter->hw.intr_mask |= ISR_TX_PKT; + AT_WRITE_REG(&adapter->hw, REG_IMR, adapter->hw.intr_mask); + spin_unlock_irqrestore(&adapter->hw.intr_mask_lock, flags); + return total_packets; + } + return budget; } /** @@ -1599,13 +1607,22 @@ static irqreturn_t atl1c_intr(int irq, void *data) AT_WRITE_REG(hw, REG_ISR, status | ISR_DIS_INT); if (status & ISR_RX_PKT) { if (likely(napi_schedule_prep(&adapter->napi))) { + spin_lock(&hw->intr_mask_lock); hw->intr_mask &= ~ISR_RX_PKT; AT_WRITE_REG(hw, REG_IMR, hw->intr_mask); + spin_unlock(&hw->intr_mask_lock); __napi_schedule(&adapter->napi); } } - if (status & ISR_TX_PKT) - atl1c_clean_tx_irq(adapter, atl1c_trans_normal); + if (status & ISR_TX_PKT) { + if (napi_schedule_prep(&adapter->tx_napi)) { + spin_lock(&hw->intr_mask_lock); + hw->intr_mask &= ~ISR_TX_PKT; + AT_WRITE_REG(hw, REG_IMR, hw->intr_mask); + spin_unlock(&hw->intr_mask_lock); + __napi_schedule(&adapter->tx_napi); + } + } handled = IRQ_HANDLED; /* check if PCIE PHY Link down */ @@ -1650,14 +1667,20 @@ static inline void atl1c_rx_checksum(struct atl1c_adapter *adapter, skb_checksum_none_assert(skb); } -static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter) +static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter, + bool napi_mode) { struct sk_buff *skb; struct page *page; - if (adapter->rx_frag_size > PAGE_SIZE) - return netdev_alloc_skb(adapter->netdev, - adapter->rx_buffer_len); + if (adapter->rx_frag_size > PAGE_SIZE) { + if (likely(napi_mode)) + return napi_alloc_skb(&adapter->napi, + adapter->rx_buffer_len); + else + return netdev_alloc_skb_ip_align(adapter->netdev, + adapter->rx_buffer_len); + } page = adapter->rx_page; if (!page) { @@ -1670,7 +1693,7 @@ static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter) skb = build_skb(page_address(page) + adapter->rx_page_offset, adapter->rx_frag_size); if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD); + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); adapter->rx_page_offset += adapter->rx_frag_size; if (adapter->rx_page_offset >= PAGE_SIZE) adapter->rx_page = NULL; @@ -1680,7 +1703,7 @@ static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter) return skb; } -static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter) +static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter, bool napi_mode) { struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring; struct pci_dev *pdev = adapter->pdev; @@ -1701,7 +1724,7 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter) while (next_info->flags & ATL1C_BUFFER_FREE) { rfd_desc = ATL1C_RFD_DESC(rfd_ring, rfd_next_to_use); - skb = atl1c_alloc_skb(adapter); + skb = atl1c_alloc_skb(adapter, napi_mode); if (unlikely(!skb)) { if (netif_msg_rx_err(adapter)) dev_warn(&pdev->dev, "alloc rx buffer failed\n"); @@ -1851,13 +1874,13 @@ rrs_checked: vlan = le16_to_cpu(vlan); __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan); } - netif_receive_skb(skb); + napi_gro_receive(&adapter->napi, skb); (*work_done)++; count++; } if (count) - atl1c_alloc_rx_buffer(adapter); + atl1c_alloc_rx_buffer(adapter, true); } /** @@ -1870,6 +1893,7 @@ static int atl1c_clean(struct napi_struct *napi, int budget) struct atl1c_adapter *adapter = container_of(napi, struct atl1c_adapter, napi); int work_done = 0; + unsigned long flags; /* Keep link state information with original netdev */ if (!netif_carrier_ok(adapter->netdev)) @@ -1880,8 +1904,10 @@ static int atl1c_clean(struct napi_struct *napi, int budget) if (work_done < budget) { quit_polling: napi_complete_done(napi, work_done); + spin_lock_irqsave(&adapter->hw.intr_mask_lock, flags); adapter->hw.intr_mask |= ISR_RX_PKT; AT_WRITE_REG(&adapter->hw, REG_IMR, adapter->hw.intr_mask); + spin_unlock_irqrestore(&adapter->hw.intr_mask_lock, flags); } return work_done; } @@ -2319,6 +2345,7 @@ static int atl1c_up(struct atl1c_adapter *adapter) atl1c_check_link_status(adapter); clear_bit(__AT_DOWN, &adapter->flags); napi_enable(&adapter->napi); + napi_enable(&adapter->tx_napi); atl1c_irq_enable(adapter); netif_start_queue(netdev); return err; @@ -2339,6 +2366,7 @@ static void atl1c_down(struct atl1c_adapter *adapter) set_bit(__AT_DOWN, &adapter->flags); netif_carrier_off(netdev); napi_disable(&adapter->napi); + napi_disable(&adapter->tx_napi); atl1c_irq_disable(adapter); atl1c_free_irq(adapter); /* disable ASPM if device inactive */ @@ -2587,7 +2615,9 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->mii.mdio_write = atl1c_mdio_write; adapter->mii.phy_id_mask = 0x1f; adapter->mii.reg_num_mask = MDIO_CTRL_REG_MASK; + dev_set_threaded(netdev, true); netif_napi_add(netdev, &adapter->napi, atl1c_clean, 64); + netif_napi_add(netdev, &adapter->tx_napi, atl1c_clean_tx, 64); timer_setup(&adapter->phy_config_timer, atl1c_phy_config, 0); /* setup the private structure */ err = atl1c_sw_init(adapter); diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c index f016f2e12ee7..0cc0db04c27d 100644 --- a/drivers/net/ethernet/atheros/atlx/atl2.c +++ b/drivers/net/ethernet/atheros/atlx/atl2.c @@ -1675,29 +1675,7 @@ static struct pci_driver atl2_driver = { .shutdown = atl2_shutdown, }; -/** - * atl2_init_module - Driver Registration Routine - * - * atl2_init_module is the first routine called when the driver is - * loaded. All it does is register with the PCI subsystem. - */ -static int __init atl2_init_module(void) -{ - return pci_register_driver(&atl2_driver); -} -module_init(atl2_init_module); - -/** - * atl2_exit_module - Driver Exit Cleanup Routine - * - * atl2_exit_module is called just before the driver is removed - * from memory. - */ -static void __exit atl2_exit_module(void) -{ - pci_unregister_driver(&atl2_driver); -} -module_exit(atl2_exit_module); +module_pci_driver(atl2_driver); static void atl2_read_pci_cfg(struct atl2_hw *hw, u32 reg, u16 *value) { diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index f8a168b73307..cb88ffb8f12f 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -54,7 +54,7 @@ config B44_PCI config BCM4908_ENET tristate "Broadcom BCM4908 internal mac support" depends on ARCH_BCM4908 || COMPILE_TEST - default y + default y if ARCH_BCM4908 help This driver supports Ethernet controller integrated into Broadcom BCM4908 family SoCs. diff --git a/drivers/net/ethernet/broadcom/bcm4908_enet.c b/drivers/net/ethernet/broadcom/bcm4908_enet.c index 98cf82dea3e4..60d908507f51 100644 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c @@ -9,6 +9,7 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_net.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/string.h> @@ -53,6 +54,7 @@ struct bcm4908_enet_dma_ring { int length; u16 cfg_block; u16 st_ram_block; + struct napi_struct napi; union { void *cpu_addr; @@ -66,8 +68,8 @@ struct bcm4908_enet_dma_ring { struct bcm4908_enet { struct device *dev; struct net_device *netdev; - struct napi_struct napi; void __iomem *base; + int irq_tx; struct bcm4908_enet_dma_ring tx_ring; struct bcm4908_enet_dma_ring rx_ring; @@ -122,24 +124,31 @@ static void enet_umac_set(struct bcm4908_enet *enet, u16 offset, u32 set) * Helpers */ -static void bcm4908_enet_intrs_on(struct bcm4908_enet *enet) +static void bcm4908_enet_set_mtu(struct bcm4908_enet *enet, int mtu) { - enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS); + enet_umac_write(enet, UMAC_MAX_FRAME_LEN, mtu + ENET_MAX_ETH_OVERHEAD); } -static void bcm4908_enet_intrs_off(struct bcm4908_enet *enet) +/*** + * DMA ring ops + */ + +static void bcm4908_enet_dma_ring_intrs_on(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) { - enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, 0); + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS); } -static void bcm4908_enet_intrs_ack(struct bcm4908_enet *enet) +static void bcm4908_enet_dma_ring_intrs_off(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) { - enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS); + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, 0); } -static void bcm4908_enet_set_mtu(struct bcm4908_enet *enet, int mtu) +static void bcm4908_enet_dma_ring_intrs_ack(struct bcm4908_enet *enet, + struct bcm4908_enet_dma_ring *ring) { - enet_umac_write(enet, UMAC_MAX_FRAME_LEN, mtu + ENET_MAX_ETH_OVERHEAD); + enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS); } /*** @@ -172,6 +181,7 @@ static int bcm4908_dma_alloc_buf_descs(struct bcm4908_enet *enet, err_free_buf_descs: dma_free_coherent(dev, size, ring->cpu_addr, ring->dma_addr); + ring->cpu_addr = NULL; return -ENOMEM; } @@ -413,11 +423,14 @@ static void bcm4908_enet_gmac_init(struct bcm4908_enet *enet) static irqreturn_t bcm4908_enet_irq_handler(int irq, void *dev_id) { struct bcm4908_enet *enet = dev_id; + struct bcm4908_enet_dma_ring *ring; - bcm4908_enet_intrs_off(enet); - bcm4908_enet_intrs_ack(enet); + ring = (irq == enet->irq_tx) ? &enet->tx_ring : &enet->rx_ring; - napi_schedule(&enet->napi); + bcm4908_enet_dma_ring_intrs_off(enet, ring); + bcm4908_enet_dma_ring_intrs_ack(enet, ring); + + napi_schedule(&ring->napi); return IRQ_HANDLED; } @@ -425,6 +438,8 @@ static irqreturn_t bcm4908_enet_irq_handler(int irq, void *dev_id) static int bcm4908_enet_open(struct net_device *netdev) { struct bcm4908_enet *enet = netdev_priv(netdev); + struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring; + struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring; struct device *dev = enet->dev; int err; @@ -434,6 +449,17 @@ static int bcm4908_enet_open(struct net_device *netdev) return err; } + if (enet->irq_tx > 0) { + err = request_irq(enet->irq_tx, bcm4908_enet_irq_handler, 0, + "tx", enet); + if (err) { + dev_err(dev, "Failed to request IRQ %d: %d\n", + enet->irq_tx, err); + free_irq(netdev->irq, enet); + return err; + } + } + bcm4908_enet_gmac_init(enet); bcm4908_enet_dma_reset(enet); bcm4908_enet_dma_init(enet); @@ -442,14 +468,19 @@ static int bcm4908_enet_open(struct net_device *netdev) enet_set(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_MASTER_EN); enet_maskset(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_FLOWC_CH1_EN, 0); - bcm4908_enet_dma_rx_ring_enable(enet, &enet->rx_ring); - napi_enable(&enet->napi); + if (enet->irq_tx > 0) { + napi_enable(&tx_ring->napi); + bcm4908_enet_dma_ring_intrs_ack(enet, tx_ring); + bcm4908_enet_dma_ring_intrs_on(enet, tx_ring); + } + + bcm4908_enet_dma_rx_ring_enable(enet, rx_ring); + napi_enable(&rx_ring->napi); netif_carrier_on(netdev); netif_start_queue(netdev); - - bcm4908_enet_intrs_ack(enet); - bcm4908_enet_intrs_on(enet); + bcm4908_enet_dma_ring_intrs_ack(enet, rx_ring); + bcm4908_enet_dma_ring_intrs_on(enet, rx_ring); return 0; } @@ -457,16 +488,20 @@ static int bcm4908_enet_open(struct net_device *netdev) static int bcm4908_enet_stop(struct net_device *netdev) { struct bcm4908_enet *enet = netdev_priv(netdev); + struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring; + struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring; netif_stop_queue(netdev); netif_carrier_off(netdev); - napi_disable(&enet->napi); + napi_disable(&rx_ring->napi); + napi_disable(&tx_ring->napi); bcm4908_enet_dma_rx_ring_disable(enet, &enet->rx_ring); bcm4908_enet_dma_tx_ring_disable(enet, &enet->tx_ring); bcm4908_enet_dma_uninit(enet); + free_irq(enet->irq_tx, enet); free_irq(enet->netdev->irq, enet); return 0; @@ -483,25 +518,19 @@ static int bcm4908_enet_start_xmit(struct sk_buff *skb, struct net_device *netde u32 tmp; /* Free transmitted skbs */ - while (ring->read_idx != ring->write_idx) { - buf_desc = &ring->buf_desc[ring->read_idx]; - if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN) - break; - slot = &ring->slots[ring->read_idx]; - - dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE); - dev_kfree_skb(slot->skb); - if (++ring->read_idx == ring->length) - ring->read_idx = 0; - } + if (enet->irq_tx < 0 && + !(le32_to_cpu(ring->buf_desc[ring->read_idx].ctl) & DMA_CTL_STATUS_OWN)) + napi_schedule(&enet->tx_ring.napi); /* Don't use the last empty buf descriptor */ if (ring->read_idx <= ring->write_idx) free_buf_descs = ring->read_idx - ring->write_idx + ring->length; else free_buf_descs = ring->read_idx - ring->write_idx; - if (free_buf_descs < 2) + if (free_buf_descs < 2) { + netif_stop_queue(netdev); return NETDEV_TX_BUSY; + } /* Hardware removes OWN bit after sending data */ buf_desc = &ring->buf_desc[ring->write_idx]; @@ -538,9 +567,10 @@ static int bcm4908_enet_start_xmit(struct sk_buff *skb, struct net_device *netde return NETDEV_TX_OK; } -static int bcm4908_enet_poll(struct napi_struct *napi, int weight) +static int bcm4908_enet_poll_rx(struct napi_struct *napi, int weight) { - struct bcm4908_enet *enet = container_of(napi, struct bcm4908_enet, napi); + struct bcm4908_enet_dma_ring *rx_ring = container_of(napi, struct bcm4908_enet_dma_ring, napi); + struct bcm4908_enet *enet = container_of(rx_ring, struct bcm4908_enet, rx_ring); struct device *dev = enet->dev; int handled = 0; @@ -589,7 +619,7 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight) if (handled < weight) { napi_complete_done(napi, handled); - bcm4908_enet_intrs_on(enet); + bcm4908_enet_dma_ring_intrs_on(enet, rx_ring); } /* Hardware could disable ring if it run out of descriptors */ @@ -598,6 +628,42 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight) return handled; } +static int bcm4908_enet_poll_tx(struct napi_struct *napi, int weight) +{ + struct bcm4908_enet_dma_ring *tx_ring = container_of(napi, struct bcm4908_enet_dma_ring, napi); + struct bcm4908_enet *enet = container_of(tx_ring, struct bcm4908_enet, tx_ring); + struct bcm4908_enet_dma_ring_bd *buf_desc; + struct bcm4908_enet_dma_ring_slot *slot; + struct device *dev = enet->dev; + unsigned int bytes = 0; + int handled = 0; + + while (handled < weight && tx_ring->read_idx != tx_ring->write_idx) { + buf_desc = &tx_ring->buf_desc[tx_ring->read_idx]; + if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN) + break; + slot = &tx_ring->slots[tx_ring->read_idx]; + + dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE); + dev_kfree_skb(slot->skb); + bytes += slot->len; + if (++tx_ring->read_idx == tx_ring->length) + tx_ring->read_idx = 0; + + handled++; + } + + if (handled < weight) { + napi_complete_done(napi, handled); + bcm4908_enet_dma_ring_intrs_on(enet, tx_ring); + } + + if (netif_queue_stopped(enet->netdev)) + netif_wake_queue(enet->netdev); + + return handled; +} + static int bcm4908_enet_change_mtu(struct net_device *netdev, int new_mtu) { struct bcm4908_enet *enet = netdev_priv(netdev); @@ -640,6 +706,8 @@ static int bcm4908_enet_probe(struct platform_device *pdev) if (netdev->irq < 0) return netdev->irq; + enet->irq_tx = platform_get_irq_byname(pdev, "tx"); + dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); err = bcm4908_enet_dma_alloc(enet); @@ -647,12 +715,15 @@ static int bcm4908_enet_probe(struct platform_device *pdev) return err; SET_NETDEV_DEV(netdev, &pdev->dev); - eth_hw_addr_random(netdev); + err = of_get_mac_address(dev->of_node, netdev->dev_addr); + if (err) + eth_hw_addr_random(netdev); netdev->netdev_ops = &bcm4908_enet_netdev_ops; netdev->min_mtu = ETH_ZLEN; netdev->mtu = ETH_DATA_LEN; netdev->max_mtu = ENET_MTU_MAX; - netif_napi_add(netdev, &enet->napi, bcm4908_enet_poll, 64); + netif_tx_napi_add(netdev, &enet->tx_ring.napi, bcm4908_enet_poll_tx, NAPI_POLL_WEIGHT); + netif_napi_add(netdev, &enet->rx_ring.napi, bcm4908_enet_poll_rx, NAPI_POLL_WEIGHT); err = register_netdev(netdev); if (err) { @@ -670,7 +741,8 @@ static int bcm4908_enet_remove(struct platform_device *pdev) struct bcm4908_enet *enet = platform_get_drvdata(pdev); unregister_netdev(enet->netdev); - netif_napi_del(&enet->napi); + netif_napi_del(&enet->rx_ring.napi); + netif_napi_del(&enet->tx_ring.napi); bcm4908_enet_dma_free(enet); return 0; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 777bbf6d2586..d9f0f0df8f7b 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2457,7 +2457,6 @@ static int bcm_sysport_probe(struct platform_device *pdev) struct bcm_sysport_priv *priv; struct device_node *dn; struct net_device *dev; - const void *macaddr; u32 txq, rxq; int ret; @@ -2552,12 +2551,10 @@ static int bcm_sysport_probe(struct platform_device *pdev) } /* Initialize netdevice members */ - macaddr = of_get_mac_address(dn); - if (IS_ERR(macaddr)) { + ret = of_get_mac_address(dn, dev->dev_addr); + if (ret) { dev_warn(&pdev->dev, "using random Ethernet MAC\n"); eth_hw_addr_random(dev); - } else { - ether_addr_copy(dev->dev_addr, macaddr); } SET_NETDEV_DEV(dev, &pdev->dev); diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index a5fd161ab5ee..85fa0ab7201c 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -115,7 +115,7 @@ static int bgmac_probe(struct bcma_device *core) struct ssb_sprom *sprom = &core->bus->sprom; struct mii_bus *mii_bus; struct bgmac *bgmac; - const u8 *mac = NULL; + const u8 *mac; int err; bgmac = bgmac_alloc(&core->dev); @@ -128,11 +128,10 @@ static int bgmac_probe(struct bcma_device *core) bcma_set_drvdata(core, bgmac); - if (bgmac->dev->of_node) - mac = of_get_mac_address(bgmac->dev->of_node); + err = of_get_mac_address(bgmac->dev->of_node, bgmac->net_dev->dev_addr); /* If no MAC address assigned via device tree, check SPROM */ - if (IS_ERR_OR_NULL(mac)) { + if (err) { switch (core->core_unit) { case 0: mac = sprom->et0mac; @@ -149,10 +148,9 @@ static int bgmac_probe(struct bcma_device *core) err = -ENOTSUPP; goto err; } + ether_addr_copy(bgmac->net_dev->dev_addr, mac); } - ether_addr_copy(bgmac->net_dev->dev_addr, mac); - /* On BCM4706 we need common core to access PHY */ if (core->id.id == BCMA_CORE_4706_MAC_GBIT && !core->bus->drv_gmac_cmn.core) { diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c index f37f1c58f368..9834b77cf4b6 100644 --- a/drivers/net/ethernet/broadcom/bgmac-platform.c +++ b/drivers/net/ethernet/broadcom/bgmac-platform.c @@ -173,7 +173,7 @@ static int bgmac_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; struct bgmac *bgmac; struct resource *regs; - const u8 *mac_addr; + int ret; bgmac = bgmac_alloc(&pdev->dev); if (!bgmac) @@ -192,11 +192,10 @@ static int bgmac_probe(struct platform_device *pdev) bgmac->dev = &pdev->dev; bgmac->dma_dev = &pdev->dev; - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(bgmac->net_dev->dev_addr, mac_addr); - else - dev_warn(&pdev->dev, "MAC address not present in device tree\n"); + ret = of_get_mac_address(np, bgmac->net_dev->dev_addr); + if (ret) + dev_warn(&pdev->dev, + "MAC address not present in device tree\n"); bgmac->irq = platform_get_irq(pdev, 0); if (bgmac->irq < 0) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 3e8a179f39db..c0986096c701 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8057,7 +8057,7 @@ bnx2_read_vpd_fw_ver(struct bnx2 *bp) data[i + 3] = data[i + BNX2_VPD_LEN]; } - i = pci_vpd_find_tag(data, 0, BNX2_VPD_LEN, PCI_VPD_LRDT_RO_DATA); + i = pci_vpd_find_tag(data, BNX2_VPD_LEN, PCI_VPD_LRDT_RO_DATA); if (i < 0) goto vpd_done; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index b652ed72a621..281b1c2e04a7 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -1395,7 +1395,6 @@ int bnx2x_send_final_clnup(struct bnx2x *bp, u8 clnup_func, u32 poll_cnt) u32 op_gen_command = 0; u32 comp_addr = BAR_CSTRORM_INTMEM + CSTORM_FINAL_CLEANUP_COMPLETE_OFFSET(clnup_func); - int ret = 0; if (REG_RD(bp, comp_addr)) { BNX2X_ERR("Cleanup complete was not 0 before sending\n"); @@ -1420,7 +1419,7 @@ int bnx2x_send_final_clnup(struct bnx2x *bp, u8 clnup_func, u32 poll_cnt) /* Zero completion for next FLR */ REG_WR(bp, comp_addr, 0); - return ret; + return 0; } u8 bnx2x_is_pcie_pending(struct pci_dev *dev) @@ -12207,8 +12206,7 @@ static void bnx2x_read_fwinfo(struct bnx2x *bp) /* VPD RO tag should be first tag after identifier string, hence * we should be able to find it in first BNX2X_VPD_LEN chars */ - i = pci_vpd_find_tag(vpd_start, 0, BNX2X_VPD_LEN, - PCI_VPD_LRDT_RO_DATA); + i = pci_vpd_find_tag(vpd_start, BNX2X_VPD_LEN, PCI_VPD_LRDT_RO_DATA); if (i < 0) goto out_not_found; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b53a0d87371a..2985844634c8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -122,7 +122,10 @@ enum board_idx { NETXTREME_E_VF, NETXTREME_C_VF, NETXTREME_S_VF, + NETXTREME_C_VF_HV, + NETXTREME_E_VF_HV, NETXTREME_E_P5_VF, + NETXTREME_E_P5_VF_HV, }; /* indexed by enum above */ @@ -170,7 +173,10 @@ static const struct { [NETXTREME_E_VF] = { "Broadcom NetXtreme-E Ethernet Virtual Function" }, [NETXTREME_C_VF] = { "Broadcom NetXtreme-C Ethernet Virtual Function" }, [NETXTREME_S_VF] = { "Broadcom NetXtreme-S Ethernet Virtual Function" }, + [NETXTREME_C_VF_HV] = { "Broadcom NetXtreme-C Virtual Function for Hyper-V" }, + [NETXTREME_E_VF_HV] = { "Broadcom NetXtreme-E Virtual Function for Hyper-V" }, [NETXTREME_E_P5_VF] = { "Broadcom BCM5750X NetXtreme-E Ethernet Virtual Function" }, + [NETXTREME_E_P5_VF_HV] = { "Broadcom BCM5750X NetXtreme-E Virtual Function for Hyper-V" }, }; static const struct pci_device_id bnxt_pci_tbl[] = { @@ -222,15 +228,25 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 }, #ifdef CONFIG_BNXT_SRIOV { PCI_VDEVICE(BROADCOM, 0x1606), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x1607), .driver_data = NETXTREME_E_VF_HV }, + { PCI_VDEVICE(BROADCOM, 0x1608), .driver_data = NETXTREME_E_VF_HV }, { PCI_VDEVICE(BROADCOM, 0x1609), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16bd), .driver_data = NETXTREME_E_VF_HV }, { PCI_VDEVICE(BROADCOM, 0x16c1), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16c2), .driver_data = NETXTREME_C_VF_HV }, + { PCI_VDEVICE(BROADCOM, 0x16c3), .driver_data = NETXTREME_C_VF_HV }, + { PCI_VDEVICE(BROADCOM, 0x16c4), .driver_data = NETXTREME_E_VF_HV }, + { PCI_VDEVICE(BROADCOM, 0x16c5), .driver_data = NETXTREME_E_VF_HV }, { PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = NETXTREME_C_VF }, { PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = NETXTREME_E_VF }, { PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF }, { PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF }, { PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF }, + { PCI_VDEVICE(BROADCOM, 0x16e6), .driver_data = NETXTREME_C_VF_HV }, { PCI_VDEVICE(BROADCOM, 0x1806), .driver_data = NETXTREME_E_P5_VF }, { PCI_VDEVICE(BROADCOM, 0x1807), .driver_data = NETXTREME_E_P5_VF }, + { PCI_VDEVICE(BROADCOM, 0x1808), .driver_data = NETXTREME_E_P5_VF_HV }, + { PCI_VDEVICE(BROADCOM, 0x1809), .driver_data = NETXTREME_E_P5_VF_HV }, { PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF }, #endif { 0 } @@ -265,7 +281,8 @@ static struct workqueue_struct *bnxt_pf_wq; static bool bnxt_vf_pciid(enum board_idx idx) { return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF || - idx == NETXTREME_S_VF || idx == NETXTREME_E_P5_VF); + idx == NETXTREME_S_VF || idx == NETXTREME_C_VF_HV || + idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF); } #define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID) @@ -358,6 +375,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) struct pci_dev *pdev = bp->pdev; struct bnxt_tx_ring_info *txr; struct bnxt_sw_tx_bd *tx_buf; + __le32 lflags = 0; i = skb_get_queue_mapping(skb); if (unlikely(i >= bp->tx_nr_rings)) { @@ -399,6 +417,11 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT; } + if (unlikely(skb->no_fcs)) { + lflags |= cpu_to_le32(TX_BD_FLAGS_NO_CRC); + goto normal_tx; + } + if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh) { struct tx_push_buffer *tx_push_buf = txr->tx_push; struct tx_push_bd *tx_push = &tx_push_buf->push_bd; @@ -500,7 +523,7 @@ normal_tx: txbd1 = (struct tx_bd_ext *) &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; - txbd1->tx_bd_hsize_lflags = 0; + txbd1->tx_bd_hsize_lflags = lflags; if (skb_is_gso(skb)) { u32 hdr_len; @@ -512,14 +535,14 @@ normal_tx: hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); - txbd1->tx_bd_hsize_lflags = cpu_to_le32(TX_BD_FLAGS_LSO | + txbd1->tx_bd_hsize_lflags |= cpu_to_le32(TX_BD_FLAGS_LSO | TX_BD_FLAGS_T_IPID | (hdr_len << (TX_BD_HSIZE_SHIFT - 1))); length = skb_shinfo(skb)->gso_size; txbd1->tx_bd_mss = cpu_to_le32(length); length += hdr_len; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { - txbd1->tx_bd_hsize_lflags = + txbd1->tx_bd_hsize_lflags |= cpu_to_le32(TX_BD_FLAGS_TCP_UDP_CHKSUM); txbd1->tx_bd_mss = 0; } @@ -1732,14 +1755,16 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, cons = rxcmp->rx_cmp_opaque; if (unlikely(cons != rxr->rx_next_cons)) { - int rc1 = bnxt_discard_rx(bp, cpr, raw_cons, rxcmp); + int rc1 = bnxt_discard_rx(bp, cpr, &tmp_raw_cons, rxcmp); /* 0xffff is forced error, don't print it */ if (rxr->rx_next_cons != 0xffff) netdev_warn(bp->dev, "RX cons %x != expected cons %x\n", cons, rxr->rx_next_cons); bnxt_sched_reset(bp, rxr); - return rc1; + if (rc1) + return rc1; + goto next_rx_no_prod_no_len; } rx_buf = &rxr->rx_buf_ring[cons]; data = rx_buf->data; @@ -4145,7 +4170,7 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init) bnxt_free_ntp_fltrs(bp, irq_re_init); if (irq_re_init) { bnxt_free_ring_stats(bp); - if (!(bp->fw_cap & BNXT_FW_CAP_PORT_STATS_NO_RESET) || + if (!(bp->phy_flags & BNXT_PHY_FL_PORT_STATS_NO_RESET) || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) bnxt_free_port_stats(bp); bnxt_free_ring_grps(bp); @@ -4470,7 +4495,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, writel(1, bp->bar0 + doorbell_offset); if (!pci_is_enabled(bp->pdev)) - return 0; + return -ENODEV; if (!timeout) timeout = DFLT_HWRM_CMD_TIMEOUT; @@ -4500,12 +4525,15 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) return -EBUSY; /* on first few passes, just barely sleep */ - if (i < HWRM_SHORT_TIMEOUT_COUNTER) + if (i < HWRM_SHORT_TIMEOUT_COUNTER) { usleep_range(HWRM_SHORT_MIN_TIMEOUT, HWRM_SHORT_MAX_TIMEOUT); - else + } else { + if (HWRM_WAIT_MUST_ABORT(bp, req)) + break; usleep_range(HWRM_MIN_TIMEOUT, HWRM_MAX_TIMEOUT); + } } if (bp->hwrm_intr_seq_id != (u16)~seq_id) { @@ -4530,15 +4558,19 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, if (len) break; /* on first few passes, just barely sleep */ - if (i < HWRM_SHORT_TIMEOUT_COUNTER) + if (i < HWRM_SHORT_TIMEOUT_COUNTER) { usleep_range(HWRM_SHORT_MIN_TIMEOUT, HWRM_SHORT_MAX_TIMEOUT); - else + } else { + if (HWRM_WAIT_MUST_ABORT(bp, req)) + goto timeout_abort; usleep_range(HWRM_MIN_TIMEOUT, HWRM_MAX_TIMEOUT); + } } if (i >= tmo_count) { +timeout_abort: if (!silent) netdev_err(bp->dev, "Error (timeout: %d) msg {0x%x 0x%x} len:%d\n", HWRM_TOTAL_TIMEOUT(i), @@ -7540,6 +7572,32 @@ static void __bnxt_map_fw_health_reg(struct bnxt *bp, u32 reg) BNXT_FW_HEALTH_WIN_MAP_OFF); } +bool bnxt_is_fw_healthy(struct bnxt *bp) +{ + if (bp->fw_health && bp->fw_health->status_reliable) { + u32 fw_status; + + fw_status = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); + if (fw_status && !BNXT_FW_IS_HEALTHY(fw_status)) + return false; + } + + return true; +} + +static void bnxt_inv_fw_health_reg(struct bnxt *bp) +{ + struct bnxt_fw_health *fw_health = bp->fw_health; + u32 reg_type; + + if (!fw_health || !fw_health->status_reliable) + return; + + reg_type = BNXT_FW_HEALTH_REG_TYPE(fw_health->regs[BNXT_FW_HEALTH_REG]); + if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC) + fw_health->status_reliable = false; +} + static void bnxt_try_map_fw_health_reg(struct bnxt *bp) { void __iomem *hs; @@ -7547,6 +7605,9 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp) u32 reg_type; u32 sig; + if (bp->fw_health) + bp->fw_health->status_reliable = false; + __bnxt_map_fw_health_reg(bp, HCOMM_STATUS_STRUCT_LOC); hs = bp->bar0 + BNXT_FW_HEALTH_WIN_OFF(HCOMM_STATUS_STRUCT_LOC); @@ -7558,11 +7619,9 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp) BNXT_FW_HEALTH_WIN_BASE + BNXT_GRC_REG_CHIP_NUM); } - if (!BNXT_CHIP_P5(bp)) { - if (bp->fw_health) - bp->fw_health->status_reliable = false; + if (!BNXT_CHIP_P5(bp)) return; - } + status_loc = BNXT_GRC_REG_STATUS_P5 | BNXT_FW_HEALTH_REG_TYPE_BAR0; } else { @@ -7592,6 +7651,7 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp) u32 reg_base = 0xffffffff; int i; + bp->fw_health->status_reliable = false; /* Only pre-map the monitoring GRC registers using window 3 */ for (i = 0; i < 4; i++) { u32 reg = fw_health->regs[i]; @@ -7604,6 +7664,7 @@ static int bnxt_map_fw_health_regs(struct bnxt *bp) return -ERANGE; fw_health->mapped_regs[i] = BNXT_FW_HEALTH_WIN_OFF(reg); } + bp->fw_health->status_reliable = true; if (reg_base == 0xffffffff) return 0; @@ -8304,11 +8365,11 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) #endif } -/* Allow PF and VF with default VLAN to be in promiscuous mode */ +/* Allow PF, trusted VFs and VFs with default VLAN to be in promiscuous mode */ static bool bnxt_promisc_ok(struct bnxt *bp) { #ifdef CONFIG_BNXT_SRIOV - if (BNXT_VF(bp) && !bp->vf.vlan) + if (BNXT_VF(bp) && !bp->vf.vlan && !bnxt_is_trusted_vf(bp, &bp->vf)) return false; #endif return true; @@ -8405,7 +8466,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) if (bp->dev->flags & IFF_BROADCAST) vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; - if ((bp->dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp)) + if (bp->dev->flags & IFF_PROMISC) vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; if (bp->dev->flags & IFF_ALLMULTI) { @@ -9039,8 +9100,9 @@ static char *bnxt_report_fec(struct bnxt_link_info *link_info) static void bnxt_report_link(struct bnxt *bp) { if (bp->link_info.link_up) { - const char *duplex; + const char *signal = ""; const char *flow_ctrl; + const char *duplex; u32 speed; u16 fec; @@ -9062,9 +9124,24 @@ static void bnxt_report_link(struct bnxt *bp) flow_ctrl = "ON - receive"; else flow_ctrl = "none"; - netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s duplex, Flow control: %s\n", - speed, duplex, flow_ctrl); - if (bp->flags & BNXT_FLAG_EEE_CAP) + if (bp->link_info.phy_qcfg_resp.option_flags & + PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN) { + u8 sig_mode = bp->link_info.active_fec_sig_mode & + PORT_PHY_QCFG_RESP_SIGNAL_MODE_MASK; + switch (sig_mode) { + case PORT_PHY_QCFG_RESP_SIGNAL_MODE_NRZ: + signal = "(NRZ) "; + break; + case PORT_PHY_QCFG_RESP_SIGNAL_MODE_PAM4: + signal = "(PAM4) "; + break; + default: + break; + } + } + netdev_info(bp->dev, "NIC Link is Up, %u Mbps %s%s duplex, Flow control: %s\n", + speed, signal, duplex, flow_ctrl); + if (bp->phy_flags & BNXT_PHY_FL_EEE_CAP) netdev_info(bp->dev, "EEE is %s\n", bp->eee.eee_active ? "active" : "not active"); @@ -9096,10 +9173,6 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp) struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr; struct bnxt_link_info *link_info = &bp->link_info; - bp->flags &= ~BNXT_FLAG_EEE_CAP; - if (bp->test_info) - bp->test_info->flags &= ~(BNXT_TEST_FL_EXT_LPBK | - BNXT_TEST_FL_AN_PHY_LPBK); if (bp->hwrm_spec_code < 0x10201) return 0; @@ -9110,31 +9183,17 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp) if (rc) goto hwrm_phy_qcaps_exit; + bp->phy_flags = resp->flags; if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED) { struct ethtool_eee *eee = &bp->eee; u16 fw_speeds = le16_to_cpu(resp->supported_speeds_eee_mode); - bp->flags |= BNXT_FLAG_EEE_CAP; eee->supported = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0); bp->lpi_tmr_lo = le32_to_cpu(resp->tx_lpi_timer_low) & PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_LOW_MASK; bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) & PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK; } - if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_EXTERNAL_LPBK_SUPPORTED) { - if (bp->test_info) - bp->test_info->flags |= BNXT_TEST_FL_EXT_LPBK; - } - if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_AUTONEG_LPBK_SUPPORTED) { - if (bp->test_info) - bp->test_info->flags |= BNXT_TEST_FL_AN_PHY_LPBK; - } - if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED) { - if (BNXT_PF(bp)) - bp->fw_cap |= BNXT_FW_CAP_SHARED_PORT_CFG; - } - if (resp->flags & PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET) - bp->fw_cap |= BNXT_FW_CAP_PORT_STATS_NO_RESET; if (bp->hwrm_spec_code >= 0x10a01) { if (bnxt_phy_qcaps_no_speed(resp)) { @@ -9225,7 +9284,7 @@ int bnxt_update_link(struct bnxt *bp, bool chng_link_state) PORT_PHY_QCFG_RESP_PHY_ADDR_MASK; link_info->module_status = resp->module_status; - if (bp->flags & BNXT_FLAG_EEE_CAP) { + if (bp->phy_flags & BNXT_PHY_FL_EEE_CAP) { struct ethtool_eee *eee = &bp->eee; u16 fw_speeds; @@ -9461,7 +9520,8 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp) if (!BNXT_SINGLE_PF(bp)) return 0; - if (pci_num_vf(bp->pdev)) + if (pci_num_vf(bp->pdev) && + !(bp->phy_flags & BNXT_PHY_FL_FW_MANAGED_LKDN)) return 0; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1); @@ -9494,9 +9554,10 @@ static int bnxt_try_recover_fw(struct bnxt *bp) mutex_lock(&bp->hwrm_cmd_lock); do { - rc = __bnxt_hwrm_ver_get(bp, true); sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); - if (!sts || !BNXT_FW_IS_BOOTING(sts)) + rc = __bnxt_hwrm_ver_get(bp, true); + if (!BNXT_FW_IS_BOOTING(sts) && + !BNXT_FW_IS_RECOVERING(sts)) break; retry++; } while (rc == -EBUSY && retry < BNXT_FW_RETRY); @@ -9556,13 +9617,17 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (rc) return rc; - if (!up) + if (!up) { + bnxt_inv_fw_health_reg(bp); return 0; + } if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE) resc_reinit = true; if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE) fw_reset = true; + else if (bp->fw_health && !bp->fw_health->status_reliable) + bnxt_try_map_fw_health_reg(bp); if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && !fw_reset) { netdev_err(bp->dev, "RESET_DONE not set during FW reset.\n"); @@ -9571,6 +9636,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) } if (resc_reinit || fw_reset) { if (fw_reset) { + set_bit(BNXT_STATE_FW_RESET_DET, &bp->state); if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) bnxt_ulp_stop(bp); bnxt_free_ctx_mem(bp); @@ -9579,21 +9645,25 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) bnxt_dcb_free(bp); rc = bnxt_fw_init_one(bp); if (rc) { + clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state); set_bit(BNXT_STATE_ABORT_ERR, &bp->state); return rc; } bnxt_clear_int_mode(bp); rc = bnxt_init_int_mode(bp); if (rc) { + clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state); netdev_err(bp->dev, "init int mode failed\n"); return rc; } - set_bit(BNXT_STATE_FW_RESET_DET, &bp->state); } if (BNXT_NEW_RM(bp)) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; rc = bnxt_hwrm_func_resc_qcaps(bp, true); + if (rc) + netdev_err(bp->dev, "resc_qcaps failed\n"); + hw_resc->resv_cp_rings = 0; hw_resc->resv_stat_ctxs = 0; hw_resc->resv_irqs = 0; @@ -9607,7 +9677,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) } } } - return 0; + return rc; } static int bnxt_hwrm_port_led_qcaps(struct bnxt *bp) @@ -9736,7 +9806,9 @@ static ssize_t bnxt_show_temp(struct device *dev, if (!rc) len = sprintf(buf, "%u\n", resp->temp * 1000); /* display millidegree */ mutex_unlock(&bp->hwrm_cmd_lock); - return rc ?: len; + if (rc) + return rc; + return len; } static SENSOR_DEVICE_ATTR(temp1_input, 0444, bnxt_show_temp, NULL, 0); @@ -9793,7 +9865,7 @@ static bool bnxt_eee_config_ok(struct bnxt *bp) struct ethtool_eee *eee = &bp->eee; struct bnxt_link_info *link_info = &bp->link_info; - if (!(bp->flags & BNXT_FLAG_EEE_CAP)) + if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP)) return true; if (eee->eee_enabled) { @@ -10440,7 +10512,7 @@ static void bnxt_set_rx_mode(struct net_device *dev) CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST | CFA_L2_SET_RX_MASK_REQ_MASK_BCAST); - if ((dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp)) + if (dev->flags & IFF_PROMISC) mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; uc_update = bnxt_uc_list_updated(bp); @@ -10516,6 +10588,9 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp) } skip_uc: + if ((vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS) && + !bnxt_promisc_ok(bp)) + vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); if (rc && vnic->mc_list_count) { netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n", @@ -10710,6 +10785,40 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features) return rc; } +static netdev_features_t bnxt_features_check(struct sk_buff *skb, + struct net_device *dev, + netdev_features_t features) +{ + struct bnxt *bp; + __be16 udp_port; + u8 l4_proto = 0; + + features = vlan_features_check(skb, features); + if (!skb->encapsulation) + return features; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + l4_proto = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_proto = ipv6_hdr(skb)->nexthdr; + break; + default: + return features; + } + + if (l4_proto != IPPROTO_UDP) + return features; + + bp = netdev_priv(dev); + /* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */ + udp_port = udp_hdr(skb)->dest; + if (udp_port == bp->vxlan_port || udp_port == bp->nge_port) + return features; + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); +} + int bnxt_dbg_hwrm_rd_reg(struct bnxt *bp, u32 reg_off, u16 num_words, u32 *reg_buf) { @@ -11035,6 +11144,7 @@ static void bnxt_fw_reset_close(struct bnxt *bp) pci_disable_device(bp->pdev); } __bnxt_close_nic(bp, true, false); + bnxt_vf_reps_free(bp); bnxt_clear_int_mode(bp); bnxt_hwrm_func_drv_unrgtr(bp); if (pci_is_enabled(bp->pdev)) @@ -11640,7 +11750,7 @@ static void bnxt_reset_all(struct bnxt *bp) req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP; req.flags = FW_RESET_REQ_FLAGS_RESET_GRACEFUL; rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - if (rc) + if (rc != -ENODEV) netdev_warn(bp->dev, "Unable to reset FW rc=%d\n", rc); } bp->fw_reset_timestamp = jiffies; @@ -11723,28 +11833,20 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10); return; case BNXT_FW_RESET_STATE_ENABLE_DEV: - if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) { - u32 val; - - if (!bp->fw_reset_min_dsecs) { - u16 val; - - pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID, - &val); - if (val == 0xffff) { - if (bnxt_fw_reset_timeout(bp)) { - netdev_err(bp->dev, "Firmware reset aborted, PCI config space invalid\n"); - goto fw_reset_abort; - } - bnxt_queue_fw_reset_work(bp, HZ / 1000); - return; + bnxt_inv_fw_health_reg(bp); + if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) && + !bp->fw_reset_min_dsecs) { + u16 val; + + pci_read_config_word(bp->pdev, PCI_SUBSYSTEM_ID, &val); + if (val == 0xffff) { + if (bnxt_fw_reset_timeout(bp)) { + netdev_err(bp->dev, "Firmware reset aborted, PCI config space invalid\n"); + goto fw_reset_abort; } + bnxt_queue_fw_reset_work(bp, HZ / 1000); + return; } - val = bnxt_fw_health_readl(bp, - BNXT_FW_RESET_INPROG_REG); - if (val) - netdev_warn(bp->dev, "FW reset inprog %x after min wait time.\n", - val); } clear_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); if (pci_enable_device(bp->pdev)) { @@ -11787,6 +11889,8 @@ static void bnxt_fw_reset_task(struct work_struct *work) bnxt_ulp_start(bp, rc); if (!rc) bnxt_reenable_sriov(bp); + bnxt_vf_reps_alloc(bp); + bnxt_vf_reps_open(bp); bnxt_dl_health_recovery_done(bp); bnxt_dl_health_status_update(bp, true); rtnl_unlock(); @@ -12222,10 +12326,13 @@ static int bnxt_udp_tunnel_sync(struct net_device *netdev, unsigned int table) unsigned int cmd; udp_tunnel_nic_get_port(netdev, table, 0, &ti); - if (ti.type == UDP_TUNNEL_TYPE_VXLAN) + if (ti.type == UDP_TUNNEL_TYPE_VXLAN) { + bp->vxlan_port = ti.port; cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN; - else + } else { + bp->nge_port = ti.port; cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE; + } if (ti.port) return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti.port, cmd); @@ -12325,6 +12432,7 @@ static const struct net_device_ops bnxt_netdev_ops = { .ndo_change_mtu = bnxt_change_mtu, .ndo_fix_features = bnxt_fix_features, .ndo_set_features = bnxt_set_features, + .ndo_features_check = bnxt_features_check, .ndo_tx_timeout = bnxt_tx_timeout, #ifdef CONFIG_BNXT_SRIOV .ndo_get_vf_config = bnxt_get_vf_config, @@ -12393,12 +12501,17 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt) int rc = 0; struct bnxt_link_info *link_info = &bp->link_info; + bp->phy_flags = 0; rc = bnxt_hwrm_phy_qcaps(bp); if (rc) { netdev_err(bp->dev, "Probe phy can't get phy capabilities (rc: %x)\n", rc); return rc; } + if (bp->phy_flags & BNXT_PHY_FL_NO_FCS) + bp->dev->priv_flags |= IFF_SUPP_NOFCS; + else + bp->dev->priv_flags &= ~IFF_SUPP_NOFCS; if (!fw_dflt) return 0; @@ -12681,7 +12794,7 @@ static void bnxt_vpd_read_info(struct bnxt *bp) goto exit; } - i = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA); + i = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA); if (i < 0) { netdev_err(bp->dev, "VPD READ-Only not found\n"); goto exit; @@ -12872,8 +12985,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (!BNXT_CHIP_P4_PLUS(bp)) bp->flags |= BNXT_FLAG_DOUBLE_DB; - bp->ulp_probe = bnxt_ulp_probe; - rc = bnxt_init_mac_addr(bp); if (rc) { dev_err(&pdev->dev, "Unable to initialize mac address.\n"); @@ -12934,6 +13045,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rc); } + bnxt_inv_fw_health_reg(bp); bnxt_dl_register(bp); rc = register_netdev(dev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 1259e68cba2a..98e0cef4532c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -671,6 +671,10 @@ struct nqe_cn { #define HWRM_MIN_TIMEOUT 25 #define HWRM_MAX_TIMEOUT 40 +#define HWRM_WAIT_MUST_ABORT(bp, req) \ + (le16_to_cpu((req)->req_type) != HWRM_VER_GET && \ + !bnxt_is_fw_healthy(bp)) + #define HWRM_TOTAL_TIMEOUT(n) (((n) <= HWRM_SHORT_TIMEOUT_COUNTER) ? \ ((n) * HWRM_SHORT_MIN_TIMEOUT) : \ (HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT + \ @@ -1337,9 +1341,6 @@ struct bnxt_led_info { struct bnxt_test_info { u8 offline_mask; - u8 flags; -#define BNXT_TEST_FL_EXT_LPBK 0x1 -#define BNXT_TEST_FL_AN_PHY_LPBK 0x2 u16 timeout; char string[BNXT_MAX_TEST][ETH_GSTRING_LEN]; }; @@ -1560,6 +1561,7 @@ struct bnxt_fw_reporter_ctx { #define BNXT_FW_STATUS_HEALTH_MSK 0xffff #define BNXT_FW_STATUS_HEALTHY 0x8000 #define BNXT_FW_STATUS_SHUTDOWN 0x100000 +#define BNXT_FW_STATUS_RECOVERING 0x400000 #define BNXT_FW_IS_HEALTHY(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) ==\ BNXT_FW_STATUS_HEALTHY) @@ -1570,6 +1572,9 @@ struct bnxt_fw_reporter_ctx { #define BNXT_FW_IS_ERR(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) > \ BNXT_FW_STATUS_HEALTHY) +#define BNXT_FW_IS_RECOVERING(sts) (BNXT_FW_IS_ERR(sts) && \ + ((sts) & BNXT_FW_STATUS_RECOVERING)) + #define BNXT_FW_RETRY 5 #define BNXT_FW_IF_RETRY 10 @@ -1685,7 +1690,6 @@ struct bnxt { #define BNXT_FLAG_SHARED_RINGS 0x200 #define BNXT_FLAG_PORT_STATS 0x400 #define BNXT_FLAG_UDP_RSS_CAP 0x800 - #define BNXT_FLAG_EEE_CAP 0x1000 #define BNXT_FLAG_NEW_RSS_CAP 0x2000 #define BNXT_FLAG_WOL_CAP 0x4000 #define BNXT_FLAG_ROCEV1_CAP 0x8000 @@ -1712,8 +1716,10 @@ struct bnxt { #define BNXT_NPAR(bp) ((bp)->port_partition_type) #define BNXT_MH(bp) ((bp)->flags & BNXT_FLAG_MULTI_HOST) #define BNXT_SINGLE_PF(bp) (BNXT_PF(bp) && !BNXT_NPAR(bp) && !BNXT_MH(bp)) +#define BNXT_SH_PORT_CFG_OK(bp) (BNXT_PF(bp) && \ + ((bp)->phy_flags & BNXT_PHY_FL_SHARED_PORT_CFG)) #define BNXT_PHY_CFG_ABLE(bp) ((BNXT_SINGLE_PF(bp) || \ - ((bp)->fw_cap & BNXT_FW_CAP_SHARED_PORT_CFG)) && \ + BNXT_SH_PORT_CFG_OK(bp)) && \ (bp)->link_info.phy_state == BNXT_PHY_STATE_ENABLED) #define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0) #define BNXT_RX_PAGE_MODE(bp) ((bp)->flags & BNXT_FLAG_RX_PAGE_MODE) @@ -1745,7 +1751,6 @@ struct bnxt { (BNXT_CHIP_P4(bp) || BNXT_CHIP_P5(bp)) struct bnxt_en_dev *edev; - struct bnxt_en_dev * (*ulp_probe)(struct net_device *); struct bnxt_napi **bnapi; @@ -1863,11 +1868,9 @@ struct bnxt { #define BNXT_FW_CAP_EXT_STATS_SUPPORTED 0x00040000 #define BNXT_FW_CAP_ERR_RECOVER_RELOAD 0x00100000 #define BNXT_FW_CAP_HOT_RESET 0x00200000 - #define BNXT_FW_CAP_SHARED_PORT_CFG 0x00400000 #define BNXT_FW_CAP_VLAN_RX_STRIP 0x01000000 #define BNXT_FW_CAP_VLAN_TX_INSERT 0x02000000 #define BNXT_FW_CAP_EXT_HW_STATS_SUPPORTED 0x04000000 - #define BNXT_FW_CAP_PORT_STATS_NO_RESET 0x10000000 #define BNXT_FW_CAP_RING_MONITOR 0x40000000 #define BNXT_NEW_RM(bp) ((bp)->fw_cap & BNXT_FW_CAP_NEW_RM) @@ -1910,6 +1913,8 @@ struct bnxt { u16 vxlan_fw_dst_port_id; u16 nge_fw_dst_port_id; + __be16 vxlan_port; + __be16 nge_port; u8 port_partition_type; u8 port_count; u16 br_mode; @@ -2002,6 +2007,17 @@ struct bnxt { u32 lpi_tmr_lo; u32 lpi_tmr_hi; + /* copied from flags in hwrm_port_phy_qcaps_output */ + u8 phy_flags; +#define BNXT_PHY_FL_EEE_CAP PORT_PHY_QCAPS_RESP_FLAGS_EEE_SUPPORTED +#define BNXT_PHY_FL_EXT_LPBK PORT_PHY_QCAPS_RESP_FLAGS_EXTERNAL_LPBK_SUPPORTED +#define BNXT_PHY_FL_AN_PHY_LPBK PORT_PHY_QCAPS_RESP_FLAGS_AUTONEG_LPBK_SUPPORTED +#define BNXT_PHY_FL_SHARED_PORT_CFG PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED +#define BNXT_PHY_FL_PORT_STATS_NO_RESET PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET +#define BNXT_PHY_FL_NO_PHY_LPBK PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED +#define BNXT_PHY_FL_FW_MANAGED_LKDN PORT_PHY_QCAPS_RESP_FLAGS_FW_MANAGED_LINK_DOWN +#define BNXT_PHY_FL_NO_FCS PORT_PHY_QCAPS_RESP_FLAGS_NO_FCS + u8 num_tests; struct bnxt_test_info *test_info; @@ -2228,6 +2244,7 @@ int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool); int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp); int bnxt_hwrm_free_wol_fltr(struct bnxt *bp); int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all); +bool bnxt_is_fw_healthy(struct bnxt *bp); int bnxt_hwrm_fw_set_time(struct bnxt *); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 2f8b193a772d..c664ec52ebcf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1930,6 +1930,20 @@ static int bnxt_get_fecparam(struct net_device *dev, return 0; } +static void bnxt_get_fec_stats(struct net_device *dev, + struct ethtool_fec_stats *fec_stats) +{ + struct bnxt *bp = netdev_priv(dev); + u64 *rx; + + if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS_EXT)) + return; + + rx = bp->rx_port_stats_ext.sw_stats; + fec_stats->corrected_bits.total = + *(rx + BNXT_RX_STATS_EXT_OFFSET(rx_corrected_bits)); +} + static u32 bnxt_ethtool_forced_fec_to_fw(struct bnxt_link_info *link_info, u32 fec) { @@ -2898,7 +2912,7 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) if (!BNXT_PHY_CFG_ABLE(bp)) return -EOPNOTSUPP; - if (!(bp->flags & BNXT_FLAG_EEE_CAP)) + if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP)) return -EOPNOTSUPP; mutex_lock(&bp->link_lock); @@ -2949,7 +2963,7 @@ static int bnxt_get_eee(struct net_device *dev, struct ethtool_eee *edata) { struct bnxt *bp = netdev_priv(dev); - if (!(bp->flags & BNXT_FLAG_EEE_CAP)) + if (!(bp->phy_flags & BNXT_PHY_FL_EEE_CAP)) return -EOPNOTSUPP; *edata = bp->eee; @@ -3201,7 +3215,7 @@ static int bnxt_disable_an_for_lpbk(struct bnxt *bp, int rc; if (!link_info->autoneg || - (bp->test_info->flags & BNXT_TEST_FL_AN_PHY_LPBK)) + (bp->phy_flags & BNXT_PHY_FL_AN_PHY_LPBK)) return 0; rc = bnxt_query_force_speeds(bp, &fw_advertising); @@ -3402,7 +3416,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, } if ((etest->flags & ETH_TEST_FL_EXTERNAL_LB) && - (bp->test_info->flags & BNXT_TEST_FL_EXT_LPBK)) + (bp->phy_flags & BNXT_PHY_FL_EXT_LPBK)) do_ext_lpbk = true; if (etest->flags & ETH_TEST_FL_OFFLINE) { @@ -3976,6 +3990,133 @@ ethtool_init_exit: mutex_unlock(&bp->hwrm_cmd_lock); } +static void bnxt_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct bnxt *bp = netdev_priv(dev); + u64 *rx; + + if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS_EXT)) + return; + + rx = bp->rx_port_stats_ext.sw_stats; + phy_stats->SymbolErrorDuringCarrier = + *(rx + BNXT_RX_STATS_EXT_OFFSET(rx_pcs_symbol_err)); +} + +static void bnxt_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct bnxt *bp = netdev_priv(dev); + u64 *rx, *tx; + + if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS)) + return; + + rx = bp->port_stats.sw_stats; + tx = bp->port_stats.sw_stats + BNXT_TX_PORT_STATS_BYTE_OFFSET / 8; + + mac_stats->FramesReceivedOK = + BNXT_GET_RX_PORT_STATS64(rx, rx_good_frames); + mac_stats->FramesTransmittedOK = + BNXT_GET_TX_PORT_STATS64(tx, tx_good_frames); + mac_stats->FrameCheckSequenceErrors = + BNXT_GET_RX_PORT_STATS64(rx, rx_fcs_err_frames); + mac_stats->AlignmentErrors = + BNXT_GET_RX_PORT_STATS64(rx, rx_align_err_frames); + mac_stats->OutOfRangeLengthField = + BNXT_GET_RX_PORT_STATS64(rx, rx_oor_len_frames); +} + +static void bnxt_get_eth_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct bnxt *bp = netdev_priv(dev); + u64 *rx; + + if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS)) + return; + + rx = bp->port_stats.sw_stats; + ctrl_stats->MACControlFramesReceived = + BNXT_GET_RX_PORT_STATS64(rx, rx_ctrl_frames); +} + +static const struct ethtool_rmon_hist_range bnxt_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 2047 }, + { 2048, 4095 }, + { 4096, 9216 }, + { 9217, 16383 }, + {} +}; + +static void bnxt_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct bnxt *bp = netdev_priv(dev); + u64 *rx, *tx; + + if (BNXT_VF(bp) || !(bp->flags & BNXT_FLAG_PORT_STATS)) + return; + + rx = bp->port_stats.sw_stats; + tx = bp->port_stats.sw_stats + BNXT_TX_PORT_STATS_BYTE_OFFSET / 8; + + rmon_stats->jabbers = + BNXT_GET_RX_PORT_STATS64(rx, rx_jbr_frames); + rmon_stats->oversize_pkts = + BNXT_GET_RX_PORT_STATS64(rx, rx_ovrsz_frames); + rmon_stats->undersize_pkts = + BNXT_GET_RX_PORT_STATS64(rx, rx_undrsz_frames); + + rmon_stats->hist[0] = BNXT_GET_RX_PORT_STATS64(rx, rx_64b_frames); + rmon_stats->hist[1] = BNXT_GET_RX_PORT_STATS64(rx, rx_65b_127b_frames); + rmon_stats->hist[2] = BNXT_GET_RX_PORT_STATS64(rx, rx_128b_255b_frames); + rmon_stats->hist[3] = BNXT_GET_RX_PORT_STATS64(rx, rx_256b_511b_frames); + rmon_stats->hist[4] = + BNXT_GET_RX_PORT_STATS64(rx, rx_512b_1023b_frames); + rmon_stats->hist[5] = + BNXT_GET_RX_PORT_STATS64(rx, rx_1024b_1518b_frames); + rmon_stats->hist[6] = + BNXT_GET_RX_PORT_STATS64(rx, rx_1519b_2047b_frames); + rmon_stats->hist[7] = + BNXT_GET_RX_PORT_STATS64(rx, rx_2048b_4095b_frames); + rmon_stats->hist[8] = + BNXT_GET_RX_PORT_STATS64(rx, rx_4096b_9216b_frames); + rmon_stats->hist[9] = + BNXT_GET_RX_PORT_STATS64(rx, rx_9217b_16383b_frames); + + rmon_stats->hist_tx[0] = + BNXT_GET_TX_PORT_STATS64(tx, tx_64b_frames); + rmon_stats->hist_tx[1] = + BNXT_GET_TX_PORT_STATS64(tx, tx_65b_127b_frames); + rmon_stats->hist_tx[2] = + BNXT_GET_TX_PORT_STATS64(tx, tx_128b_255b_frames); + rmon_stats->hist_tx[3] = + BNXT_GET_TX_PORT_STATS64(tx, tx_256b_511b_frames); + rmon_stats->hist_tx[4] = + BNXT_GET_TX_PORT_STATS64(tx, tx_512b_1023b_frames); + rmon_stats->hist_tx[5] = + BNXT_GET_TX_PORT_STATS64(tx, tx_1024b_1518b_frames); + rmon_stats->hist_tx[6] = + BNXT_GET_TX_PORT_STATS64(tx, tx_1519b_2047b_frames); + rmon_stats->hist_tx[7] = + BNXT_GET_TX_PORT_STATS64(tx, tx_2048b_4095b_frames); + rmon_stats->hist_tx[8] = + BNXT_GET_TX_PORT_STATS64(tx, tx_4096b_9216b_frames); + rmon_stats->hist_tx[9] = + BNXT_GET_TX_PORT_STATS64(tx, tx_9217b_16383b_frames); + + *ranges = bnxt_rmon_ranges; +} + void bnxt_ethtool_free(struct bnxt *bp) { kfree(bp->test_info); @@ -3991,6 +4132,7 @@ const struct ethtool_ops bnxt_ethtool_ops = { ETHTOOL_COALESCE_USE_ADAPTIVE_RX, .get_link_ksettings = bnxt_get_link_ksettings, .set_link_ksettings = bnxt_set_link_ksettings, + .get_fec_stats = bnxt_get_fec_stats, .get_fecparam = bnxt_get_fecparam, .set_fecparam = bnxt_set_fecparam, .get_pause_stats = bnxt_get_pause_stats, @@ -4034,4 +4176,8 @@ const struct ethtool_ops bnxt_ethtool_ops = { .set_dump = bnxt_set_dump, .get_dump_flag = bnxt_get_dump_flag, .get_dump_data = bnxt_get_dump_data, + .get_eth_phy_stats = bnxt_get_eth_phy_stats, + .get_eth_mac_stats = bnxt_get_eth_mac_stats, + .get_eth_ctrl_stats = bnxt_get_eth_ctrl_stats, + .get_rmon_stats = bnxt_get_rmon_stats, }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index a217316228f4..eb00a219aa51 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -49,10 +49,6 @@ static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp, static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id) { - if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { - netdev_err(bp->dev, "vf ndo called though PF is down\n"); - return -EINVAL; - } if (!bp->pf.active_vfs) { netdev_err(bp->dev, "vf ndo called though sriov is disabled\n"); return -EINVAL; @@ -113,7 +109,7 @@ static int bnxt_hwrm_func_qcfg_flags(struct bnxt *bp, struct bnxt_vf_info *vf) int rc; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1); - req.fid = cpu_to_le16(vf->fw_fid); + req.fid = cpu_to_le16(BNXT_PF(bp) ? vf->fw_fid : 0xffff); mutex_lock(&bp->hwrm_cmd_lock); rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (rc) { @@ -125,9 +121,9 @@ static int bnxt_hwrm_func_qcfg_flags(struct bnxt *bp, struct bnxt_vf_info *vf) return 0; } -static bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf) +bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf) { - if (!(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF)) + if (BNXT_PF(bp) && !(bp->fw_cap & BNXT_FW_CAP_TRUSTED_VF)) return !!(vf->flags & BNXT_VF_TRUST); bnxt_hwrm_func_qcfg_flags(bp, vf); @@ -1120,10 +1116,38 @@ void bnxt_hwrm_exec_fwd_req(struct bnxt *bp) } } +int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict) +{ + struct hwrm_func_vf_cfg_input req = {0}; + int rc = 0; + + if (!BNXT_VF(bp)) + return 0; + + if (bp->hwrm_spec_code < 0x10202) { + if (is_valid_ether_addr(bp->vf.mac_addr)) + rc = -EADDRNOTAVAIL; + goto mac_done; + } + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1); + req.enables = cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR); + memcpy(req.dflt_mac_addr, mac, ETH_ALEN); + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); +mac_done: + if (rc && strict) { + rc = -EADDRNOTAVAIL; + netdev_warn(bp->dev, "VF MAC address %pM not approved by the PF\n", + mac); + return rc; + } + return 0; +} + void bnxt_update_vf_mac(struct bnxt *bp) { struct hwrm_func_qcaps_input req = {0}; struct hwrm_func_qcaps_output *resp = bp->hwrm_cmd_resp_addr; + bool inform_pf = false; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCAPS, -1, -1); req.fid = cpu_to_le16(0xffff); @@ -1139,42 +1163,24 @@ void bnxt_update_vf_mac(struct bnxt *bp) * default but the stored zero MAC will allow the VF user to change * the random MAC address using ndo_set_mac_address() if he wants. */ - if (!ether_addr_equal(resp->mac_address, bp->vf.mac_addr)) + if (!ether_addr_equal(resp->mac_address, bp->vf.mac_addr)) { memcpy(bp->vf.mac_addr, resp->mac_address, ETH_ALEN); + /* This means we are now using our own MAC address, let + * the PF know about this MAC address. + */ + if (!is_valid_ether_addr(bp->vf.mac_addr)) + inform_pf = true; + } /* overwrite netdev dev_addr with admin VF MAC */ if (is_valid_ether_addr(bp->vf.mac_addr)) memcpy(bp->dev->dev_addr, bp->vf.mac_addr, ETH_ALEN); update_vf_mac_exit: mutex_unlock(&bp->hwrm_cmd_lock); + if (inform_pf) + bnxt_approve_mac(bp, bp->dev->dev_addr, false); } -int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict) -{ - struct hwrm_func_vf_cfg_input req = {0}; - int rc = 0; - - if (!BNXT_VF(bp)) - return 0; - - if (bp->hwrm_spec_code < 0x10202) { - if (is_valid_ether_addr(bp->vf.mac_addr)) - rc = -EADDRNOTAVAIL; - goto mac_done; - } - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_CFG, -1, -1); - req.enables = cpu_to_le32(FUNC_VF_CFG_REQ_ENABLES_DFLT_MAC_ADDR); - memcpy(req.dflt_mac_addr, mac, ETH_ALEN); - rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); -mac_done: - if (rc && strict) { - rc = -EADDRNOTAVAIL; - netdev_warn(bp->dev, "VF MAC address %pM not approved by the PF\n", - mac); - return rc; - } - return 0; -} #else int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h index 629641bf6fc5..995535e4c11b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h @@ -34,6 +34,7 @@ int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16); int bnxt_set_vf_bw(struct net_device *, int, int, int); int bnxt_set_vf_link_state(struct net_device *, int, int); int bnxt_set_vf_spoofchk(struct net_device *, int, bool); +bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf); int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trust); int bnxt_sriov_configure(struct pci_dev *pdev, int num_vfs); int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 64dbbb04b043..a918e374f3c5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -491,3 +491,4 @@ struct bnxt_en_dev *bnxt_ulp_probe(struct net_device *dev) } return bp->edev; } +EXPORT_SYMBOL(bnxt_ulp_probe); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index 4b5c8fd76a51..dd66302343a2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -284,8 +284,26 @@ void bnxt_vf_reps_open(struct bnxt *bp) if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) return; - for (i = 0; i < pci_num_vf(bp->pdev); i++) - bnxt_vf_rep_open(bp->vf_reps[i]->dev); + for (i = 0; i < pci_num_vf(bp->pdev); i++) { + /* Open the VF-Rep only if it is allocated in the FW */ + if (bp->vf_reps[i]->tx_cfa_action != CFA_HANDLE_INVALID) + bnxt_vf_rep_open(bp->vf_reps[i]->dev); + } +} + +static void __bnxt_free_one_vf_rep(struct bnxt *bp, struct bnxt_vf_rep *vf_rep) +{ + if (!vf_rep) + return; + + if (vf_rep->dst) { + dst_release((struct dst_entry *)vf_rep->dst); + vf_rep->dst = NULL; + } + if (vf_rep->tx_cfa_action != CFA_HANDLE_INVALID) { + hwrm_cfa_vfr_free(bp, vf_rep->vf_idx); + vf_rep->tx_cfa_action = CFA_HANDLE_INVALID; + } } static void __bnxt_vf_reps_destroy(struct bnxt *bp) @@ -297,11 +315,7 @@ static void __bnxt_vf_reps_destroy(struct bnxt *bp) for (i = 0; i < num_vfs; i++) { vf_rep = bp->vf_reps[i]; if (vf_rep) { - dst_release((struct dst_entry *)vf_rep->dst); - - if (vf_rep->tx_cfa_action != CFA_HANDLE_INVALID) - hwrm_cfa_vfr_free(bp, vf_rep->vf_idx); - + __bnxt_free_one_vf_rep(bp, vf_rep); if (vf_rep->dev) { /* if register_netdev failed, then netdev_ops * would have been set to NULL @@ -350,6 +364,80 @@ void bnxt_vf_reps_destroy(struct bnxt *bp) __bnxt_vf_reps_destroy(bp); } +/* Free the VF-Reps in firmware, during firmware hot-reset processing. + * Note that the VF-Rep netdevs are still active (not unregistered) during + * this process. As the mode transition from SWITCHDEV to LEGACY happens + * under the rtnl_lock() this routine is safe under the rtnl_lock(). + */ +void bnxt_vf_reps_free(struct bnxt *bp) +{ + u16 num_vfs = pci_num_vf(bp->pdev); + int i; + + if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return; + + for (i = 0; i < num_vfs; i++) + __bnxt_free_one_vf_rep(bp, bp->vf_reps[i]); +} + +static int bnxt_alloc_vf_rep(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, + u16 *cfa_code_map) +{ + /* get cfa handles from FW */ + if (hwrm_cfa_vfr_alloc(bp, vf_rep->vf_idx, &vf_rep->tx_cfa_action, + &vf_rep->rx_cfa_code)) + return -ENOLINK; + + cfa_code_map[vf_rep->rx_cfa_code] = vf_rep->vf_idx; + vf_rep->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL); + if (!vf_rep->dst) + return -ENOMEM; + + /* only cfa_action is needed to mux a packet while TXing */ + vf_rep->dst->u.port_info.port_id = vf_rep->tx_cfa_action; + vf_rep->dst->u.port_info.lower_dev = bp->dev; + + return 0; +} + +/* Allocate the VF-Reps in firmware, during firmware hot-reset processing. + * Note that the VF-Rep netdevs are still active (not unregistered) during + * this process. As the mode transition from SWITCHDEV to LEGACY happens + * under the rtnl_lock() this routine is safe under the rtnl_lock(). + */ +int bnxt_vf_reps_alloc(struct bnxt *bp) +{ + u16 *cfa_code_map = bp->cfa_code_map, num_vfs = pci_num_vf(bp->pdev); + struct bnxt_vf_rep *vf_rep; + int rc, i; + + if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return 0; + + if (!cfa_code_map) + return -EINVAL; + + for (i = 0; i < MAX_CFA_CODE; i++) + cfa_code_map[i] = VF_IDX_INVALID; + + for (i = 0; i < num_vfs; i++) { + vf_rep = bp->vf_reps[i]; + vf_rep->vf_idx = i; + + rc = bnxt_alloc_vf_rep(bp, vf_rep, cfa_code_map); + if (rc) + goto err; + } + + return 0; + +err: + netdev_info(bp->dev, "%s error=%d\n", __func__, rc); + bnxt_vf_reps_free(bp); + return rc; +} + /* Use the OUI of the PF's perm addr and report the same mac addr * for the same VF-rep each time */ @@ -428,25 +516,9 @@ static int bnxt_vf_reps_create(struct bnxt *bp) vf_rep->vf_idx = i; vf_rep->tx_cfa_action = CFA_HANDLE_INVALID; - /* get cfa handles from FW */ - rc = hwrm_cfa_vfr_alloc(bp, vf_rep->vf_idx, - &vf_rep->tx_cfa_action, - &vf_rep->rx_cfa_code); - if (rc) { - rc = -ENOLINK; + rc = bnxt_alloc_vf_rep(bp, vf_rep, cfa_code_map); + if (rc) goto err; - } - cfa_code_map[vf_rep->rx_cfa_code] = vf_rep->vf_idx; - - vf_rep->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, - GFP_KERNEL); - if (!vf_rep->dst) { - rc = -ENOMEM; - goto err; - } - /* only cfa_action is needed to mux a packet while TXing */ - vf_rep->dst->u.port_info.port_id = vf_rep->tx_cfa_action; - vf_rep->dst->u.port_info.lower_dev = bp->dev; bnxt_vf_rep_netdev_init(bp, vf_rep, dev); rc = register_netdev(dev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h index d7287651422f..5637a84884d7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h @@ -19,6 +19,8 @@ void bnxt_vf_reps_close(struct bnxt *bp); void bnxt_vf_reps_open(struct bnxt *bp); void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb); struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code); +int bnxt_vf_reps_alloc(struct bnxt *bp); +void bnxt_vf_reps_free(struct bnxt *bp); static inline u16 bnxt_vf_rep_get_fid(struct net_device *dev) { @@ -61,5 +63,15 @@ static inline bool bnxt_dev_is_vf_rep(struct net_device *dev) { return false; } + +static inline int bnxt_vf_reps_alloc(struct bnxt *bp) +{ + return 0; +} + +static inline void bnxt_vf_reps_free(struct bnxt *bp) +{ +} + #endif /* CONFIG_BNXT_SRIOV */ #endif /* BNXT_VFR_H */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 641303894341..ec9564e584e0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -217,7 +217,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, struct pci_dev *pdev = bp->pdev; struct bnxt_tx_ring_info *txr; dma_addr_t mapping; - int drops = 0; + int nxmit = 0; int ring; int i; @@ -233,21 +233,17 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, struct xdp_frame *xdp = frames[i]; if (!txr || !bnxt_tx_avail(bp, txr) || - !(bp->bnapi[ring]->flags & BNXT_NAPI_FLAG_XDP)) { - xdp_return_frame_rx_napi(xdp); - drops++; - continue; - } + !(bp->bnapi[ring]->flags & BNXT_NAPI_FLAG_XDP)) + break; mapping = dma_map_single(&pdev->dev, xdp->data, xdp->len, DMA_TO_DEVICE); - if (dma_mapping_error(&pdev->dev, mapping)) { - xdp_return_frame_rx_napi(xdp); - drops++; - continue; - } + if (dma_mapping_error(&pdev->dev, mapping)) + break; + __bnxt_xmit_xdp_redirect(bp, txr, mapping, xdp->len, xdp); + nxmit++; } if (flags & XDP_XMIT_FLUSH) { @@ -256,7 +252,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, bnxt_db_write(bp, &txr->tx_db, txr->tx_prod); } - return num_frames - drops; + return nxmit; } /* Under rtnl_lock */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 1c86eddb1b51..facde824bcaa 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -18,7 +18,6 @@ #include <linux/delay.h> #include <linux/pm.h> #include <linux/clk.h> -#include <linux/version.h> #include <linux/platform_device.h> #include <net/arp.h> diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index d2381929931b..b0e49643f483 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -13016,7 +13016,7 @@ static int tg3_test_nvram(struct tg3 *tp) if (!buf) return -ENOMEM; - i = pci_vpd_find_tag((u8 *)buf, 0, len, PCI_VPD_LRDT_RO_DATA); + i = pci_vpd_find_tag((u8 *)buf, len, PCI_VPD_LRDT_RO_DATA); if (i > 0) { j = pci_vpd_lrdt_size(&((u8 *)buf)[i]); if (j < 0) @@ -15629,7 +15629,7 @@ static void tg3_read_vpd(struct tg3 *tp) if (!vpd_data) goto out_no_vpd; - i = pci_vpd_find_tag(vpd_data, 0, vpdlen, PCI_VPD_LRDT_RO_DATA); + i = pci_vpd_find_tag(vpd_data, vpdlen, PCI_VPD_LRDT_RO_DATA); if (i < 0) goto out_not_found; diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 588c4804d10a..265c2fa6bbe0 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -524,6 +524,68 @@ bnad_set_pauseparam(struct net_device *netdev, return 0; } +static void bnad_get_txf_strings(u8 **string, int f_num) +{ + ethtool_sprintf(string, "txf%d_ucast_octets", f_num); + ethtool_sprintf(string, "txf%d_ucast", f_num); + ethtool_sprintf(string, "txf%d_ucast_vlan", f_num); + ethtool_sprintf(string, "txf%d_mcast_octets", f_num); + ethtool_sprintf(string, "txf%d_mcast", f_num); + ethtool_sprintf(string, "txf%d_mcast_vlan", f_num); + ethtool_sprintf(string, "txf%d_bcast_octets", f_num); + ethtool_sprintf(string, "txf%d_bcast", f_num); + ethtool_sprintf(string, "txf%d_bcast_vlan", f_num); + ethtool_sprintf(string, "txf%d_errors", f_num); + ethtool_sprintf(string, "txf%d_filter_vlan", f_num); + ethtool_sprintf(string, "txf%d_filter_mac_sa", f_num); +} + +static void bnad_get_rxf_strings(u8 **string, int f_num) +{ + ethtool_sprintf(string, "rxf%d_ucast_octets", f_num); + ethtool_sprintf(string, "rxf%d_ucast", f_num); + ethtool_sprintf(string, "rxf%d_ucast_vlan", f_num); + ethtool_sprintf(string, "rxf%d_mcast_octets", f_num); + ethtool_sprintf(string, "rxf%d_mcast", f_num); + ethtool_sprintf(string, "rxf%d_mcast_vlan", f_num); + ethtool_sprintf(string, "rxf%d_bcast_octets", f_num); + ethtool_sprintf(string, "rxf%d_bcast", f_num); + ethtool_sprintf(string, "rxf%d_bcast_vlan", f_num); + ethtool_sprintf(string, "rxf%d_frame_drops", f_num); +} + +static void bnad_get_cq_strings(u8 **string, int q_num) +{ + ethtool_sprintf(string, "cq%d_producer_index", q_num); + ethtool_sprintf(string, "cq%d_consumer_index", q_num); + ethtool_sprintf(string, "cq%d_hw_producer_index", q_num); + ethtool_sprintf(string, "cq%d_intr", q_num); + ethtool_sprintf(string, "cq%d_poll", q_num); + ethtool_sprintf(string, "cq%d_schedule", q_num); + ethtool_sprintf(string, "cq%d_keep_poll", q_num); + ethtool_sprintf(string, "cq%d_complete", q_num); +} + +static void bnad_get_rxq_strings(u8 **string, int q_num) +{ + ethtool_sprintf(string, "rxq%d_packets", q_num); + ethtool_sprintf(string, "rxq%d_bytes", q_num); + ethtool_sprintf(string, "rxq%d_packets_with_error", q_num); + ethtool_sprintf(string, "rxq%d_allocbuf_failed", q_num); + ethtool_sprintf(string, "rxq%d_mapbuf_failed", q_num); + ethtool_sprintf(string, "rxq%d_producer_index", q_num); + ethtool_sprintf(string, "rxq%d_consumer_index", q_num); +} + +static void bnad_get_txq_strings(u8 **string, int q_num) +{ + ethtool_sprintf(string, "txq%d_packets", q_num); + ethtool_sprintf(string, "txq%d_bytes", q_num); + ethtool_sprintf(string, "txq%d_producer_index", q_num); + ethtool_sprintf(string, "txq%d_consumer_index", q_num); + ethtool_sprintf(string, "txq%d_hw_consumer_index", q_num); +} + static void bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string) { @@ -531,175 +593,57 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string) int i, j, q_num; u32 bmap; + if (stringset != ETH_SS_STATS) + return; + mutex_lock(&bnad->conf_mutex); - switch (stringset) { - case ETH_SS_STATS: - for (i = 0; i < BNAD_ETHTOOL_STATS_NUM; i++) { - BUG_ON(!(strlen(bnad_net_stats_strings[i]) < - ETH_GSTRING_LEN)); - strncpy(string, bnad_net_stats_strings[i], - ETH_GSTRING_LEN); - string += ETH_GSTRING_LEN; - } - bmap = bna_tx_rid_mask(&bnad->bna); - for (i = 0; bmap; i++) { - if (bmap & 1) { - sprintf(string, "txf%d_ucast_octets", i); - string += ETH_GSTRING_LEN; - sprintf(string, "txf%d_ucast", i); - string += ETH_GSTRING_LEN; - sprintf(string, "txf%d_ucast_vlan", i); - string += ETH_GSTRING_LEN; - sprintf(string, "txf%d_mcast_octets", i); - string += ETH_GSTRING_LEN; - sprintf(string, "txf%d_mcast", i); - string += ETH_GSTRING_LEN; - sprintf(string, "txf%d_mcast_vlan", i); - string += ETH_GSTRING_LEN; - sprintf(string, "txf%d_bcast_octets", i); - string += ETH_GSTRING_LEN; - sprintf(string, "txf%d_bcast", i); - string += ETH_GSTRING_LEN; - sprintf(string, "txf%d_bcast_vlan", i); - string += ETH_GSTRING_LEN; - sprintf(string, "txf%d_errors", i); - string += ETH_GSTRING_LEN; - sprintf(string, "txf%d_filter_vlan", i); - string += ETH_GSTRING_LEN; - sprintf(string, "txf%d_filter_mac_sa", i); - string += ETH_GSTRING_LEN; - } - bmap >>= 1; - } + for (i = 0; i < BNAD_ETHTOOL_STATS_NUM; i++) { + BUG_ON(!(strlen(bnad_net_stats_strings[i]) < ETH_GSTRING_LEN)); + ethtool_sprintf(&string, bnad_net_stats_strings[i]); + } - bmap = bna_rx_rid_mask(&bnad->bna); - for (i = 0; bmap; i++) { - if (bmap & 1) { - sprintf(string, "rxf%d_ucast_octets", i); - string += ETH_GSTRING_LEN; - sprintf(string, "rxf%d_ucast", i); - string += ETH_GSTRING_LEN; - sprintf(string, "rxf%d_ucast_vlan", i); - string += ETH_GSTRING_LEN; - sprintf(string, "rxf%d_mcast_octets", i); - string += ETH_GSTRING_LEN; - sprintf(string, "rxf%d_mcast", i); - string += ETH_GSTRING_LEN; - sprintf(string, "rxf%d_mcast_vlan", i); - string += ETH_GSTRING_LEN; - sprintf(string, "rxf%d_bcast_octets", i); - string += ETH_GSTRING_LEN; - sprintf(string, "rxf%d_bcast", i); - string += ETH_GSTRING_LEN; - sprintf(string, "rxf%d_bcast_vlan", i); - string += ETH_GSTRING_LEN; - sprintf(string, "rxf%d_frame_drops", i); - string += ETH_GSTRING_LEN; - } - bmap >>= 1; - } + bmap = bna_tx_rid_mask(&bnad->bna); + for (i = 0; bmap; i++) { + if (bmap & 1) + bnad_get_txf_strings(&string, i); + bmap >>= 1; + } - q_num = 0; - for (i = 0; i < bnad->num_rx; i++) { - if (!bnad->rx_info[i].rx) - continue; - for (j = 0; j < bnad->num_rxp_per_rx; j++) { - sprintf(string, "cq%d_producer_index", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "cq%d_consumer_index", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "cq%d_hw_producer_index", - q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "cq%d_intr", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "cq%d_poll", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "cq%d_schedule", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "cq%d_keep_poll", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "cq%d_complete", q_num); - string += ETH_GSTRING_LEN; - q_num++; - } - } + bmap = bna_rx_rid_mask(&bnad->bna); + for (i = 0; bmap; i++, bmap >>= 1) { + if (bmap & 1) + bnad_get_rxf_strings(&string, i); + bmap >>= 1; + } - q_num = 0; - for (i = 0; i < bnad->num_rx; i++) { - if (!bnad->rx_info[i].rx) - continue; - for (j = 0; j < bnad->num_rxp_per_rx; j++) { - sprintf(string, "rxq%d_packets", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "rxq%d_bytes", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "rxq%d_packets_with_error", - q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "rxq%d_allocbuf_failed", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "rxq%d_mapbuf_failed", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "rxq%d_producer_index", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "rxq%d_consumer_index", q_num); - string += ETH_GSTRING_LEN; - q_num++; - if (bnad->rx_info[i].rx_ctrl[j].ccb && - bnad->rx_info[i].rx_ctrl[j].ccb-> - rcb[1] && - bnad->rx_info[i].rx_ctrl[j].ccb-> - rcb[1]->rxq) { - sprintf(string, "rxq%d_packets", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "rxq%d_bytes", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, - "rxq%d_packets_with_error", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "rxq%d_allocbuf_failed", - q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "rxq%d_mapbuf_failed", - q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "rxq%d_producer_index", - q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "rxq%d_consumer_index", - q_num); - string += ETH_GSTRING_LEN; - q_num++; - } - } - } + q_num = 0; + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) + bnad_get_cq_strings(&string, q_num++); + } - q_num = 0; - for (i = 0; i < bnad->num_tx; i++) { - if (!bnad->tx_info[i].tx) - continue; - for (j = 0; j < bnad->num_txq_per_tx; j++) { - sprintf(string, "txq%d_packets", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "txq%d_bytes", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "txq%d_producer_index", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "txq%d_consumer_index", q_num); - string += ETH_GSTRING_LEN; - sprintf(string, "txq%d_hw_consumer_index", - q_num); - string += ETH_GSTRING_LEN; - q_num++; - } + q_num = 0; + for (i = 0; i < bnad->num_rx; i++) { + if (!bnad->rx_info[i].rx) + continue; + for (j = 0; j < bnad->num_rxp_per_rx; j++) { + bnad_get_rxq_strings(&string, q_num++); + if (bnad->rx_info[i].rx_ctrl[j].ccb && + bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1] && + bnad->rx_info[i].rx_ctrl[j].ccb->rcb[1]->rxq) + bnad_get_rxq_strings(&string, q_num++); } + } - break; - - default: - break; + q_num = 0; + for (i = 0; i < bnad->num_tx; i++) { + if (!bnad->tx_info[i].tx) + continue; + for (j = 0; j < bnad->num_txq_per_tx; j++) + bnad_get_txq_strings(&string, q_num++); } mutex_unlock(&bnad->conf_mutex); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index d8c68906525a..d8d87213697c 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -159,6 +159,16 @@ #define GEM_PEFTN 0x01f4 /* PTP Peer Event Frame Tx Ns */ #define GEM_PEFRSL 0x01f8 /* PTP Peer Event Frame Rx Sec Low */ #define GEM_PEFRN 0x01fc /* PTP Peer Event Frame Rx Ns */ +#define GEM_PCSCNTRL 0x0200 /* PCS Control */ +#define GEM_PCSSTS 0x0204 /* PCS Status */ +#define GEM_PCSPHYTOPID 0x0208 /* PCS PHY Top ID */ +#define GEM_PCSPHYBOTID 0x020c /* PCS PHY Bottom ID */ +#define GEM_PCSANADV 0x0210 /* PCS AN Advertisement */ +#define GEM_PCSANLPBASE 0x0214 /* PCS AN Link Partner Base */ +#define GEM_PCSANEXP 0x0218 /* PCS AN Expansion */ +#define GEM_PCSANNPTX 0x021c /* PCS AN Next Page TX */ +#define GEM_PCSANNPLP 0x0220 /* PCS AN Next Page LP */ +#define GEM_PCSANEXTSTS 0x023c /* PCS AN Extended Status */ #define GEM_DCFG1 0x0280 /* Design Config 1 */ #define GEM_DCFG2 0x0284 /* Design Config 2 */ #define GEM_DCFG3 0x0288 /* Design Config 3 */ @@ -478,6 +488,10 @@ #define GEM_HS_MAC_SPEED_OFFSET 0 #define GEM_HS_MAC_SPEED_SIZE 3 +/* Bitfields in PCSCNTRL */ +#define GEM_PCSAUTONEG_OFFSET 12 +#define GEM_PCSAUTONEG_SIZE 1 + /* Bitfields in DCFG1. */ #define GEM_IRQCOR_OFFSET 23 #define GEM_IRQCOR_SIZE 1 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 15362d016a87..0e94db9cd45d 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -694,6 +694,22 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode, if (old_ncr ^ ncr) macb_or_gem_writel(bp, NCR, ncr); + /* Disable AN for SGMII fixed link configuration, enable otherwise. + * Must be written after PCSSEL is set in NCFGR, + * otherwise writes will not take effect. + */ + if (macb_is_gem(bp) && state->interface == PHY_INTERFACE_MODE_SGMII) { + u32 pcsctrl, old_pcsctrl; + + old_pcsctrl = gem_readl(bp, PCSCNTRL); + if (mode == MLO_AN_FIXED) + pcsctrl = old_pcsctrl & ~GEM_BIT(PCSAUTONEG); + else + pcsctrl = old_pcsctrl | GEM_BIT(PCSAUTONEG); + if (old_pcsctrl != pcsctrl) + gem_writel(bp, PCSCNTRL, pcsctrl); + } + spin_unlock_irqrestore(&bp->lock, flags); } @@ -847,6 +863,15 @@ static int macb_phylink_connect(struct macb *bp) return 0; } +static void macb_get_pcs_fixed_state(struct phylink_config *config, + struct phylink_link_state *state) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct macb *bp = netdev_priv(ndev); + + state->link = (macb_readl(bp, NSR) & MACB_BIT(NSR_LINK)) != 0; +} + /* based on au1000_eth. c*/ static int macb_mii_probe(struct net_device *dev) { @@ -855,6 +880,11 @@ static int macb_mii_probe(struct net_device *dev) bp->phylink_config.dev = &dev->dev; bp->phylink_config.type = PHYLINK_NETDEV; + if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) { + bp->phylink_config.poll_fixed_state = true; + bp->phylink_config.get_fixed_state = macb_get_pcs_fixed_state; + } + bp->phylink = phylink_create(&bp->phylink_config, bp->pdev->dev.fwnode, bp->phy_interface, &macb_phylink_ops); if (IS_ERR(bp->phylink)) { @@ -3239,6 +3269,9 @@ static void gem_prog_cmp_regs(struct macb *bp, struct ethtool_rx_flow_spec *fs) bool cmp_b = false; bool cmp_c = false; + if (!macb_is_gem(bp)) + return; + tp4sp_v = &(fs->h_u.tcp_ip4_spec); tp4sp_m = &(fs->m_u.tcp_ip4_spec); @@ -3607,6 +3640,7 @@ static void macb_restore_features(struct macb *bp) { struct net_device *netdev = bp->dev; netdev_features_t features = netdev->features; + struct ethtool_rx_fs_item *item; /* TX checksum offload */ macb_set_txcsum_feature(bp, features); @@ -3615,6 +3649,9 @@ static void macb_restore_features(struct macb *bp) macb_set_rxcsum_feature(bp, features); /* RX Flow Filters */ + list_for_each_entry(item, &bp->rx_fs_list.list, list) + gem_prog_cmp_regs(bp, &item->fs); + macb_set_rxflow_feature(bp, features); } @@ -3728,17 +3765,15 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, *hclk = devm_clk_get(&pdev->dev, "hclk"); } - if (IS_ERR_OR_NULL(*pclk)) { - err = IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV; - dev_err(&pdev->dev, "failed to get macb_clk (%d)\n", err); - return err; - } + if (IS_ERR_OR_NULL(*pclk)) + return dev_err_probe(&pdev->dev, + IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV, + "failed to get pclk\n"); - if (IS_ERR_OR_NULL(*hclk)) { - err = IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV; - dev_err(&pdev->dev, "failed to get hclk (%d)\n", err); - return err; - } + if (IS_ERR_OR_NULL(*hclk)) + return dev_err_probe(&pdev->dev, + IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV, + "failed to get hclk\n"); *tx_clk = devm_clk_get_optional(&pdev->dev, "tx_clk"); if (IS_ERR(*tx_clk)) @@ -3911,6 +3946,7 @@ static int macb_init(struct platform_device *pdev) reg = gem_readl(bp, DCFG8); bp->max_tuples = min((GEM_BFEXT(SCR2CMP, reg) / 3), GEM_BFEXT(T2SCR, reg)); + INIT_LIST_HEAD(&bp->rx_fs_list.list); if (bp->max_tuples > 0) { /* also needs one ethtype match to check IPv4 */ if (GEM_BFEXT(SCR2ETH, reg) > 0) { @@ -3921,7 +3957,6 @@ static int macb_init(struct platform_device *pdev) /* Filtering is supported in hw but don't enable it in kernel now */ dev->hw_features |= NETIF_F_NTUPLE; /* init Rx flow definitions */ - INIT_LIST_HEAD(&bp->rx_fs_list.list); bp->rx_fs_list.count = 0; spin_lock_init(&bp->rx_fs_lock); } else @@ -4614,7 +4649,6 @@ static int macb_probe(struct platform_device *pdev) struct net_device *dev; struct resource *regs; void __iomem *mem; - const char *mac; struct macb *bp; int err, val; @@ -4729,15 +4763,11 @@ static int macb_probe(struct platform_device *pdev) if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR) bp->rx_intr_mask |= MACB_BIT(RXUBR); - mac = of_get_mac_address(np); - if (PTR_ERR(mac) == -EPROBE_DEFER) { - err = -EPROBE_DEFER; + err = of_get_mac_address(np, bp->dev->dev_addr); + if (err == -EPROBE_DEFER) goto err_out_free_netdev; - } else if (!IS_ERR_OR_NULL(mac)) { - ether_addr_copy(bp->dev->dev_addr, mac); - } else { + else if (err) macb_get_hwaddr(bp); - } err = of_get_phy_mode(np, &interface); if (err) diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h index e6d4ad99cc38..3f1c189646f4 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h @@ -521,7 +521,7 @@ #define CN23XX_BAR1_INDEX_OFFSET 3 #define CN23XX_PEM_BAR1_INDEX_REG(port, idx) \ - (CN23XX_PEM_BAR1_INDEX_START + ((port) << CN23XX_PEM_OFFSET) + \ + (CN23XX_PEM_BAR1_INDEX_START + (((u64)port) << CN23XX_PEM_OFFSET) + \ ((idx) << CN23XX_BAR1_INDEX_OFFSET)) /*############################ DPI #########################*/ diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h index b248966837b4..7aad40b2aa73 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h @@ -412,7 +412,7 @@ | CN6XXX_INTR_M0UNWI_ERR \ | CN6XXX_INTR_M1UPB0_ERR \ | CN6XXX_INTR_M1UPWI_ERR \ - | CN6XXX_INTR_M1UPB0_ERR \ + | CN6XXX_INTR_M1UNB0_ERR \ | CN6XXX_INTR_M1UNWI_ERR \ | CN6XXX_INTR_INSTR_DB_OF_ERR \ | CN6XXX_INTR_SLIST_DB_OF_ERR \ diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index ecffebd513be..48ff6fb0eed9 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1385,7 +1385,6 @@ static int octeon_mgmt_probe(struct platform_device *pdev) struct net_device *netdev; struct octeon_mgmt *p; const __be32 *data; - const u8 *mac; struct resource *res_mix; struct resource *res_agl; struct resource *res_agl_prt_ctl; @@ -1502,11 +1501,8 @@ static int octeon_mgmt_probe(struct platform_device *pdev) netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM; netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM - VLAN_HLEN; - mac = of_get_mac_address(pdev->dev.of_node); - - if (!IS_ERR(mac)) - ether_addr_copy(netdev->dev_addr, mac); - else + result = of_get_mac_address(pdev->dev.of_node, netdev->dev_addr); + if (result) eth_hw_addr_random(netdev); p->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index f782e6af45e9..50bbe79fb93d 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -776,7 +776,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs, mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG; mbx.rq.qs_num = qs->vnic_id; mbx.rq.rq_num = qidx; - mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) | + mbx.rq.cfg = ((u64)rq->caching << 26) | (rq->cq_qs << 19) | (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) | (rq->cont_qs_rbdr_idx << 8) | (rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 8ff28ed04b7f..0c783aadf393 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1474,7 +1474,6 @@ static int bgx_init_of_phy(struct bgx *bgx) device_for_each_child_node(&bgx->pdev->dev, fwn) { struct phy_device *pd; struct device_node *phy_np; - const char *mac; /* Should always be an OF node. But if it is not, we * cannot handle it, so exit the loop. @@ -1483,9 +1482,7 @@ static int bgx_init_of_phy(struct bgx *bgx) if (!node) break; - mac = of_get_mac_address(node); - if (!IS_ERR(mac)) - ether_addr_copy(bgx->lmac[lmac].mac, mac); + of_get_mac_address(node, bgx->lmac[lmac].mac); SET_NETDEV_DEV(&bgx->lmac[lmac].netdev, &bgx->pdev->dev); bgx->lmac[lmac].lmacid = lmac; diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c index ce28820c57c9..12fcf84d67ad 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c @@ -323,8 +323,7 @@ void t4_cleanup_clip_tbl(struct adapter *adap) struct clip_tbl *ctbl = adap->clipt; if (ctbl) { - if (ctbl->cl_list) - kvfree(ctbl->cl_list); + kvfree(ctbl->cl_list); kvfree(ctbl); } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 6c85a10f465c..a7f291c89702 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -1794,11 +1794,25 @@ int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, struct cudbg_buffer temp_buff = { 0 }; struct sge_qbase_reg_field *sge_qbase; struct ireg_buf *ch_sge_dbg; + u8 padap_running = 0; int i, rc; + u32 size; - rc = cudbg_get_buff(pdbg_init, dbg_buff, - sizeof(*ch_sge_dbg) * 2 + sizeof(*sge_qbase), - &temp_buff); + /* Accessing SGE_QBASE_MAP[0-3] and SGE_QBASE_INDEX regs can + * lead to SGE missing doorbells under heavy traffic. So, only + * collect them when adapter is idle. + */ + for_each_port(padap, i) { + padap_running = netif_running(padap->port[i]); + if (padap_running) + break; + } + + size = sizeof(*ch_sge_dbg) * 2; + if (!padap_running) + size += sizeof(*sge_qbase); + + rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff); if (rc) return rc; @@ -1820,7 +1834,8 @@ int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, ch_sge_dbg++; } - if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5) { + if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5 && + !padap_running) { sge_qbase = (struct sge_qbase_reg_field *)ch_sge_dbg; /* 1 addr reg SGE_QBASE_INDEX and 4 data reg * SGE_QBASE_MAP[0-3] @@ -3536,8 +3551,7 @@ out: } out_free: - if (data) - kvfree(data); + kvfree(data); #undef QDESC_GET_FLQ #undef QDESC_GET_RXQ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c index 77648e4ab4cc..dd66b244466d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -157,8 +157,7 @@ static int cudbg_alloc_compress_buff(struct cudbg_init *pdbg_init) static void cudbg_free_compress_buff(struct cudbg_init *pdbg_init) { - if (pdbg_init->compress_buff) - vfree(pdbg_init->compress_buff); + vfree(pdbg_init->compress_buff); } int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 83b46440408b..bc581b149b11 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -174,31 +174,31 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f, WORD_MASK, f->fs.nat_lip[15] | f->fs.nat_lip[14] << 8 | f->fs.nat_lip[13] << 16 | - f->fs.nat_lip[12] << 24, 1); + (u64)f->fs.nat_lip[12] << 24, 1); set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 1, WORD_MASK, f->fs.nat_lip[11] | f->fs.nat_lip[10] << 8 | f->fs.nat_lip[9] << 16 | - f->fs.nat_lip[8] << 24, 1); + (u64)f->fs.nat_lip[8] << 24, 1); set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 2, WORD_MASK, f->fs.nat_lip[7] | f->fs.nat_lip[6] << 8 | f->fs.nat_lip[5] << 16 | - f->fs.nat_lip[4] << 24, 1); + (u64)f->fs.nat_lip[4] << 24, 1); set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 3, WORD_MASK, f->fs.nat_lip[3] | f->fs.nat_lip[2] << 8 | f->fs.nat_lip[1] << 16 | - f->fs.nat_lip[0] << 24, 1); + (u64)f->fs.nat_lip[0] << 24, 1); } else { set_tcb_field(adap, f, tid, TCB_RX_FRAG3_LEN_RAW_W, WORD_MASK, f->fs.nat_lip[3] | f->fs.nat_lip[2] << 8 | f->fs.nat_lip[1] << 16 | - f->fs.nat_lip[0] << 24, 1); + (u64)f->fs.nat_lip[0] << 25, 1); } } @@ -208,25 +208,25 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f, WORD_MASK, f->fs.nat_fip[15] | f->fs.nat_fip[14] << 8 | f->fs.nat_fip[13] << 16 | - f->fs.nat_fip[12] << 24, 1); + (u64)f->fs.nat_fip[12] << 24, 1); set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 1, WORD_MASK, f->fs.nat_fip[11] | f->fs.nat_fip[10] << 8 | f->fs.nat_fip[9] << 16 | - f->fs.nat_fip[8] << 24, 1); + (u64)f->fs.nat_fip[8] << 24, 1); set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 2, WORD_MASK, f->fs.nat_fip[7] | f->fs.nat_fip[6] << 8 | f->fs.nat_fip[5] << 16 | - f->fs.nat_fip[4] << 24, 1); + (u64)f->fs.nat_fip[4] << 24, 1); set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 3, WORD_MASK, f->fs.nat_fip[3] | f->fs.nat_fip[2] << 8 | f->fs.nat_fip[1] << 16 | - f->fs.nat_fip[0] << 24, 1); + (u64)f->fs.nat_fip[0] << 24, 1); } else { set_tcb_field(adap, f, tid, @@ -234,13 +234,13 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f, WORD_MASK, f->fs.nat_fip[3] | f->fs.nat_fip[2] << 8 | f->fs.nat_fip[1] << 16 | - f->fs.nat_fip[0] << 24, 1); + (u64)f->fs.nat_fip[0] << 24, 1); } } set_tcb_field(adap, f, tid, TCB_PDU_HDR_LEN_W, WORD_MASK, (dp ? (nat_lp[1] | nat_lp[0] << 8) : 0) | - (sp ? (nat_fp[1] << 16 | nat_fp[0] << 24) : 0), + (sp ? (nat_fp[1] << 16 | (u64)nat_fp[0] << 24) : 0), 1); } @@ -979,7 +979,7 @@ void clear_filter(struct adapter *adap, struct filter_entry *f) { struct port_info *pi = netdev_priv(f->dev); - /* If the new or old filter have loopback rewriteing rules then we'll + /* If the new or old filter have loopback rewriting rules then we'll * need to free any existing L2T, SMT, CLIP entries of filter * rule. */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c index 2e309f6673f7..28fd2de9e4cf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c @@ -48,6 +48,11 @@ static int cxgb4_matchall_egress_validate(struct net_device *dev, flow_action_for_each(i, entry, actions) { switch (entry->id) { case FLOW_ACTION_POLICE: + if (entry->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } /* Convert bytes per second to bits per second */ if (entry->police.rate_bytes_ps * 8 > max_link_rate) { NL_SET_ERR_MSG_MOD(extack, @@ -145,7 +150,11 @@ static int cxgb4_matchall_alloc_tc(struct net_device *dev, flow_action_for_each(i, entry, &cls->rule->action) if (entry->id == FLOW_ACTION_POLICE) break; - + if (entry->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } /* Convert from bytes per second to Kbps */ p.u.params.maxrate = div_u64(entry->police.rate_bytes_ps * 8, 1000); p.u.params.channel = pi->tx_chan; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index dede02505ceb..a5d2f84dcdd5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -524,13 +524,9 @@ struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap) out_no_mem: for (i = 0; i < t->size; i++) { struct cxgb4_link *link = &t->table[i]; - - if (link->tid_map) - kvfree(link->tid_map); + kvfree(link->tid_map); } - - if (t) - kvfree(t); + kvfree(t); return NULL; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 98829e482bfa..9428ef1f04a8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2090,7 +2090,8 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) 0x1190, 0x1194, 0x11a0, 0x11a4, 0x11b0, 0x11b4, - 0x11fc, 0x1274, + 0x11fc, 0x123c, + 0x1254, 0x1274, 0x1280, 0x133c, 0x1800, 0x18fc, 0x3000, 0x302c, @@ -2774,7 +2775,7 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) if (id_len > ID_LEN) id_len = ID_LEN; - i = pci_vpd_find_tag(vpd, 0, VPD_LEN, PCI_VPD_LRDT_RO_DATA); + i = pci_vpd_find_tag(vpd, VPD_LEN, PCI_VPD_LRDT_RO_DATA); if (i < 0) { dev_err(adapter->pdev_dev, "missing VPD-R section\n"); ret = -EINVAL; diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 169e10c91378..ef3f1e92632f 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -33,7 +33,6 @@ static int chcr_get_nfrags_to_send(struct sk_buff *skb, u32 start, u32 len) if (unlikely(start < skb_linear_data_len)) { frag_size = min(len, skb_linear_data_len - start); - start = 0; } else { start -= skb_linear_data_len; @@ -350,18 +349,6 @@ static int chcr_set_tcb_field(struct chcr_ktls_info *tx_info, u16 word, } /* - * chcr_ktls_mark_tcb_close: mark tcb state to CLOSE - * @tx_info - driver specific tls info. - * return: NET_TX_OK/NET_XMIT_DROP. - */ -static int chcr_ktls_mark_tcb_close(struct chcr_ktls_info *tx_info) -{ - return chcr_set_tcb_field(tx_info, TCB_T_STATE_W, - TCB_T_STATE_V(TCB_T_STATE_M), - CHCR_TCB_STATE_CLOSED, 1); -} - -/* * chcr_ktls_dev_del: call back for tls_dev_del. * Remove the tid and l2t entry and close the connection. * it per connection basis. @@ -395,8 +382,6 @@ static void chcr_ktls_dev_del(struct net_device *netdev, /* clear tid */ if (tx_info->tid != -1) { - /* clear tcb state and then release tid */ - chcr_ktls_mark_tcb_close(tx_info); cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan, tx_info->tid, tx_info->ip_family); } @@ -574,7 +559,6 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk, return 0; free_tid: - chcr_ktls_mark_tcb_close(tx_info); #if IS_ENABLED(CONFIG_IPV6) /* clear clip entry */ if (tx_info->ip_family == AF_INET6) @@ -672,10 +656,6 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, if (tx_info->pending_close) { spin_unlock(&tx_info->lock); if (!status) { - /* it's a late success, tcb status is established, - * mark it close. - */ - chcr_ktls_mark_tcb_close(tx_info); cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan, tid, tx_info->ip_family); } @@ -722,7 +702,7 @@ static int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input) kvfree(tx_info); return 0; } - tx_info->open_state = false; + tx_info->open_state = CH_KTLS_OPEN_SUCCESS; spin_unlock(&tx_info->lock); complete(&tx_info->completion); @@ -892,10 +872,10 @@ static int chcr_ktls_xmit_tcb_cpls(struct chcr_ktls_info *tx_info, } /* update receive window */ if (first_wr || tx_info->prev_win != tcp_win) { - pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos, - TCB_RCV_WND_W, - TCB_RCV_WND_V(TCB_RCV_WND_M), - TCB_RCV_WND_V(tcp_win), 0); + chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos, + TCB_RCV_WND_W, + TCB_RCV_WND_V(TCB_RCV_WND_M), + TCB_RCV_WND_V(tcp_win), 0); tx_info->prev_win = tcp_win; cpl++; } @@ -1504,7 +1484,6 @@ static int chcr_ktls_tx_plaintxt(struct chcr_ktls_info *tx_info, wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR)); wr->flowid_len16 = htonl(wr_mid | FW_WR_LEN16_V(len16)); wr->cookie = 0; - pos += sizeof(*wr); /* ULP_TXPKT */ ulptx = (struct ulp_txpkt *)(wr + 1); ulptx->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | @@ -1664,54 +1643,6 @@ static void chcr_ktls_copy_record_in_skb(struct sk_buff *nskb, } /* - * chcr_ktls_update_snd_una: Reset the SEND_UNA. It will be done to avoid - * sending the same segment again. It will discard the segment which is before - * the current tx max. - * @tx_info - driver specific tls info. - * @q - TX queue. - * return: NET_TX_OK/NET_XMIT_DROP. - */ -static int chcr_ktls_update_snd_una(struct chcr_ktls_info *tx_info, - struct sge_eth_txq *q) -{ - struct fw_ulptx_wr *wr; - unsigned int ndesc; - int credits; - void *pos; - u32 len; - - len = sizeof(*wr) + roundup(CHCR_SET_TCB_FIELD_LEN, 16); - ndesc = DIV_ROUND_UP(len, 64); - - credits = chcr_txq_avail(&q->q) - ndesc; - if (unlikely(credits < 0)) { - chcr_eth_txq_stop(q); - return NETDEV_TX_BUSY; - } - - pos = &q->q.desc[q->q.pidx]; - - wr = pos; - /* ULPTX wr */ - wr->op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR)); - wr->cookie = 0; - /* fill len in wr field */ - wr->flowid_len16 = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(len, 16))); - - pos += sizeof(*wr); - - pos = chcr_write_cpl_set_tcb_ulp(tx_info, q, tx_info->tid, pos, - TCB_SND_UNA_RAW_W, - TCB_SND_UNA_RAW_V(TCB_SND_UNA_RAW_M), - TCB_SND_UNA_RAW_V(0), 0); - - chcr_txq_advance(&q->q, ndesc); - cxgb4_ring_tx_db(tx_info->adap, &q->q, ndesc); - - return 0; -} - -/* * chcr_end_part_handler: This handler will handle the record which * is complete or if record's end part is received. T6 adapter has a issue that * it can't send out TAG with partial record so if its an end part then we have @@ -1735,7 +1666,9 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, struct sge_eth_txq *q, u32 skb_offset, u32 tls_end_offset, bool last_wr) { + bool free_skb_if_tx_fails = false; struct sk_buff *nskb = NULL; + /* check if it is a complete record */ if (tls_end_offset == record->len) { nskb = skb; @@ -1758,6 +1691,8 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, if (last_wr) dev_kfree_skb_any(skb); + else + free_skb_if_tx_fails = true; last_wr = true; @@ -1769,6 +1704,8 @@ static int chcr_end_part_handler(struct chcr_ktls_info *tx_info, record->num_frags, (last_wr && tcp_push_no_fin), mss)) { + if (free_skb_if_tx_fails) + dev_kfree_skb_any(skb); goto out; } tx_info->prev_seq = record->end_seq; @@ -1905,11 +1842,6 @@ static int chcr_short_record_handler(struct chcr_ktls_info *tx_info, /* reset tcp_seq as per the prior_data_required len */ tcp_seq -= prior_data_len; } - /* reset snd una, so the middle record won't send the already - * sent part. - */ - if (chcr_ktls_update_snd_una(tx_info, q)) - goto out; atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_middle_pkts); } else { atomic64_inc(&tx_info->adap->ch_ktls_stats.ktls_tx_start_pkts); @@ -2010,12 +1942,11 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) * we will send the complete record again. */ + spin_lock_irqsave(&tx_ctx->base.lock, flags); + do { - int i; cxgb4_reclaim_completed_tx(adap, &q->q, true); - /* lock taken */ - spin_lock_irqsave(&tx_ctx->base.lock, flags); /* fetch the tls record */ record = tls_get_record(&tx_ctx->base, tcp_seq, &tx_info->record_no); @@ -2074,11 +2005,11 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) tls_end_offset, skb_offset, 0); - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); if (ret) { /* free the refcount taken earlier */ if (tls_end_offset < data_len) dev_kfree_skb_any(skb); + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); goto out; } @@ -2088,16 +2019,6 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) continue; } - /* increase page reference count of the record, so that there - * won't be any chance of page free in middle if in case stack - * receives ACK and try to delete the record. - */ - for (i = 0; i < record->num_frags; i++) - __skb_frag_ref(&record->frags[i]); - /* lock cleared */ - spin_unlock_irqrestore(&tx_ctx->base.lock, flags); - - /* if a tls record is finishing in this SKB */ if (tls_end_offset <= data_len) { ret = chcr_end_part_handler(tx_info, skb, record, @@ -2122,13 +2043,9 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) data_len = 0; } - /* clear the frag ref count which increased locally before */ - for (i = 0; i < record->num_frags; i++) { - /* clear the frag ref count */ - __skb_frag_unref(&record->frags[i]); - } /* if any failure, come out from the loop. */ if (ret) { + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); if (th->fin) dev_kfree_skb_any(skb); @@ -2143,6 +2060,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) } while (data_len > 0); + spin_unlock_irqrestore(&tx_ctx->base.lock, flags); atomic64_inc(&port_stats->ktls_tx_encrypted_packets); atomic64_add(skb_data_len, &port_stats->ktls_tx_encrypted_bytes); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index f04ec53544ae..f48957a17c3a 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -3040,15 +3040,4 @@ static struct pci_driver enic_driver = { .remove = enic_remove, }; -static int __init enic_init_module(void) -{ - return pci_register_driver(&enic_driver); -} - -static void __exit enic_cleanup_module(void) -{ - pci_unregister_driver(&enic_driver); -} - -module_init(enic_init_module); -module_exit(enic_cleanup_module); +module_pci_driver(enic_driver); diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 252adfa5d837..2a8bf53c2f75 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1385,7 +1385,7 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev) { struct dm9000_plat_data *pdata; struct device_node *np = dev->of_node; - const void *mac_addr; + int ret; if (!IS_ENABLED(CONFIG_OF) || !np) return ERR_PTR(-ENXIO); @@ -1399,11 +1399,9 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev) if (of_find_property(np, "davicom,no-eeprom", NULL)) pdata->flags |= DM9000_PLATF_NO_EEPROM; - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(pdata->dev_addr, mac_addr); - else if (PTR_ERR(mac_addr) == -EPROBE_DEFER) - return ERR_CAST(mac_addr); + ret = of_get_mac_address(np, pdata->dev_addr); + if (ret == -EPROBE_DEFER) + return ERR_PTR(ret); return pdata; } @@ -1471,8 +1469,10 @@ dm9000_probe(struct platform_device *pdev) /* Init network device */ ndev = alloc_etherdev(sizeof(struct board_info)); - if (!ndev) - return -ENOMEM; + if (!ndev) { + ret = -ENOMEM; + goto out_regulator_disable; + } SET_NETDEV_DEV(ndev, &pdev->dev); @@ -1522,7 +1522,6 @@ dm9000_probe(struct platform_device *pdev) if (ret) { dev_err(db->dev, "irq %d cannot set wakeup (%d)\n", db->irq_wake, ret); - ret = 0; } else { irq_set_irq_wake(db->irq_wake, 0); db->wake_supported = 1; diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index c3cbe55205a7..b018195f0243 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -2193,15 +2193,4 @@ static struct pci_driver de_driver = { .driver.pm = &de_pm_ops, }; -static int __init de_init (void) -{ - return pci_register_driver(&de_driver); -} - -static void __exit de_exit (void) -{ - pci_unregister_driver (&de_driver); -} - -module_init(de_init); -module_exit(de_exit); +module_pci_driver(de_driver); diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 89cbdc1f4857..514df170ec5d 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -1629,15 +1629,4 @@ static struct pci_driver w840_driver = { .driver.pm = &w840_pm_ops, }; -static int __init w840_init(void) -{ - return pci_register_driver(&w840_driver); -} - -static void __exit w840_exit(void) -{ - pci_unregister_driver(&w840_driver); -} - -module_init(w840_init); -module_exit(w840_exit); +module_pci_driver(w840_driver); diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index e3a8858915b3..ce61f79f3b7c 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -963,7 +963,7 @@ static void tx_timeout(struct net_device *dev, unsigned int txqueue) unsigned long flag; netif_stop_queue(dev); - tasklet_disable(&np->tx_tasklet); + tasklet_disable_in_atomic(&np->tx_tasklet); iowrite16(0, ioaddr + IntrEnable); printk(KERN_WARNING "%s: Transmit timed out, TxStatus %2.2x " "TxFrameId %2.2x," @@ -1982,17 +1982,4 @@ static struct pci_driver sundance_driver = { .driver.pm = &sundance_pm_ops, }; -static int __init sundance_init(void) -{ - return pci_register_driver(&sundance_driver); -} - -static void __exit sundance_exit(void) -{ - pci_unregister_driver(&sundance_driver); -} - -module_init(sundance_init); -module_exit(sundance_exit); - - +module_pci_driver(sundance_driver); diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index 3d9b0b161e24..e1b43b07755b 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1151,11 +1151,7 @@ static int ethoc_probe(struct platform_device *pdev) ether_addr_copy(netdev->dev_addr, pdata->hwaddr); priv->phy_id = pdata->phy_id; } else { - const void *mac; - - mac = of_get_mac_address(pdev->dev.of_node); - if (!IS_ERR(mac)) - ether_addr_copy(netdev->dev_addr, mac); + of_get_mac_address(pdev->dev.of_node, netdev->dev_addr); priv->phy_id = -1; } diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index 815fb62c4b02..e3954d8835e7 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -575,7 +575,6 @@ static s32 nps_enet_probe(struct platform_device *pdev) struct net_device *ndev; struct nps_enet_priv *priv; s32 err = 0; - const char *mac_addr; if (!dev->of_node) return -ENODEV; @@ -602,10 +601,8 @@ static s32 nps_enet_probe(struct platform_device *pdev) dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs_base); /* set kernel MAC address to dev */ - mac_addr = of_get_mac_address(dev->of_node); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - else + err = of_get_mac_address(dev->of_node, ndev->dev_addr); + if (err) eth_hw_addr_random(ndev); /* Get IRQ number */ diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 88bfe2107938..04421aec2dfd 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1337,6 +1337,7 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) */ if (unlikely(priv->need_mac_restart)) { ftgmac100_start_hw(priv); + priv->need_mac_restart = false; /* Re-enable "bad" interrupts */ iowrite32(FTGMAC100_INT_BAD, diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 473b337b2e3b..5a1a8f2ea63c 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -1177,18 +1177,7 @@ static struct platform_driver ftmac100_driver = { /****************************************************************************** * initialization / finalization *****************************************************************************/ -static int __init ftmac100_init(void) -{ - return platform_driver_register(&ftmac100_driver); -} - -static void __exit ftmac100_exit(void) -{ - platform_driver_unregister(&ftmac100_driver); -} - -module_init(ftmac100_init); -module_exit(ftmac100_exit); +module_platform_driver(ftmac100_driver); MODULE_AUTHOR("Po-Yu Chuang <ratbert@faraday-tech.com>"); MODULE_DESCRIPTION("FTMAC100 driver"); diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index c696651dd735..0908771aa9ac 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -1948,15 +1948,4 @@ static struct pci_driver fealnx_driver = { .remove = fealnx_remove_one, }; -static int __init fealnx_init(void) -{ - return pci_register_driver(&fealnx_driver); -} - -static void __exit fealnx_exit(void) -{ - pci_unregister_driver(&fealnx_driver); -} - -module_init(fealnx_init); -module_exit(fealnx_exit); +module_pci_driver(fealnx_driver); diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index 3f9175bdce77..2d1abdd58fab 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -27,6 +27,7 @@ config FEC default ARCH_MXC || SOC_IMX28 if ARM select CRC32 select PHYLIB + imply NET_SELFTESTS imply PTP_1588_CLOCK help Say Y here if you want to use the built-in 10/100 Fast ethernet diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile index 67c436400352..de7b31842233 100644 --- a/drivers/net/ethernet/freescale/Makefile +++ b/drivers/net/ethernet/freescale/Makefile @@ -24,6 +24,4 @@ obj-$(CONFIG_FSL_DPAA_ETH) += dpaa/ obj-$(CONFIG_FSL_DPAA2_ETH) += dpaa2/ -obj-$(CONFIG_FSL_ENETC) += enetc/ -obj-$(CONFIG_FSL_ENETC_MDIO) += enetc/ -obj-$(CONFIG_FSL_ENETC_VF) += enetc/ +obj-y += enetc/ diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 720dc99bd1fc..177c020bf34a 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -3081,7 +3081,7 @@ static int dpaa_xdp_xmit(struct net_device *net_dev, int n, struct xdp_frame **frames, u32 flags) { struct xdp_frame *xdpf; - int i, err, drops = 0; + int i, nxmit = 0; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; @@ -3091,14 +3091,12 @@ static int dpaa_xdp_xmit(struct net_device *net_dev, int n, for (i = 0; i < n; i++) { xdpf = frames[i]; - err = dpaa_xdp_xmit_frame(net_dev, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (dpaa_xdp_xmit_frame(net_dev, xdpf)) + break; + nxmit++; } - return n - drops; + return nxmit; } static int dpaa_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig index ee7a906e30b3..d029b69c3f18 100644 --- a/drivers/net/ethernet/freescale/dpaa2/Kconfig +++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig @@ -29,3 +29,11 @@ config FSL_DPAA2_PTP_CLOCK help This driver adds support for using the DPAA2 1588 timer module as a PTP clock. + +config FSL_DPAA2_SWITCH + tristate "Freescale DPAA2 Ethernet Switch" + depends on BRIDGE || BRIDGE=n + depends on NET_SWITCHDEV + help + Driver for Freescale DPAA2 Ethernet Switch. This driver manages + switch objects discovered on the Freeescale MC bus. diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile index 146cb3540e61..c2ef74052ef8 100644 --- a/drivers/net/ethernet/freescale/dpaa2/Makefile +++ b/drivers/net/ethernet/freescale/dpaa2/Makefile @@ -5,11 +5,13 @@ obj-$(CONFIG_FSL_DPAA2_ETH) += fsl-dpaa2-eth.o obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK) += fsl-dpaa2-ptp.o +obj-$(CONFIG_FSL_DPAA2_SWITCH) += fsl-dpaa2-switch.o fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpaa2-eth-devlink.o fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o +fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o # Needed by the tracing framework CFLAGS_dpaa2-eth.o := -I$(src) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 492943bb9c48..e0c3c58e2ac7 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -223,31 +223,31 @@ static void dpaa2_eth_free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, } } -static void dpaa2_eth_xdp_release_buf(struct dpaa2_eth_priv *priv, - struct dpaa2_eth_channel *ch, - dma_addr_t addr) +static void dpaa2_eth_recycle_buf(struct dpaa2_eth_priv *priv, + struct dpaa2_eth_channel *ch, + dma_addr_t addr) { int retries = 0; int err; - ch->xdp.drop_bufs[ch->xdp.drop_cnt++] = addr; - if (ch->xdp.drop_cnt < DPAA2_ETH_BUFS_PER_CMD) + ch->recycled_bufs[ch->recycled_bufs_cnt++] = addr; + if (ch->recycled_bufs_cnt < DPAA2_ETH_BUFS_PER_CMD) return; while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid, - ch->xdp.drop_bufs, - ch->xdp.drop_cnt)) == -EBUSY) { + ch->recycled_bufs, + ch->recycled_bufs_cnt)) == -EBUSY) { if (retries++ >= DPAA2_ETH_SWP_BUSY_RETRIES) break; cpu_relax(); } if (err) { - dpaa2_eth_free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt); - ch->buf_count -= ch->xdp.drop_cnt; + dpaa2_eth_free_bufs(priv, ch->recycled_bufs, ch->recycled_bufs_cnt); + ch->buf_count -= ch->recycled_bufs_cnt; } - ch->xdp.drop_cnt = 0; + ch->recycled_bufs_cnt = 0; } static int dpaa2_eth_xdp_flush(struct dpaa2_eth_priv *priv, @@ -300,7 +300,7 @@ static void dpaa2_eth_xdp_tx_flush(struct dpaa2_eth_priv *priv, ch->stats.xdp_tx++; } for (i = enqueued; i < fq->xdp_tx_fds.num; i++) { - dpaa2_eth_xdp_release_buf(priv, ch, dpaa2_fd_get_addr(&fds[i])); + dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(&fds[i])); percpu_stats->tx_errors++; ch->stats.xdp_tx_err++; } @@ -382,7 +382,7 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv, trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act); fallthrough; case XDP_DROP: - dpaa2_eth_xdp_release_buf(priv, ch, addr); + dpaa2_eth_recycle_buf(priv, ch, addr); ch->stats.xdp_drop++; break; case XDP_REDIRECT: @@ -403,7 +403,7 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv, free_pages((unsigned long)vaddr, 0); } else { ch->buf_count++; - dpaa2_eth_xdp_release_buf(priv, ch, addr); + dpaa2_eth_recycle_buf(priv, ch, addr); } ch->stats.xdp_drop++; } else { @@ -418,6 +418,35 @@ out: return xdp_act; } +static struct sk_buff *dpaa2_eth_copybreak(struct dpaa2_eth_channel *ch, + const struct dpaa2_fd *fd, + void *fd_vaddr) +{ + u16 fd_offset = dpaa2_fd_get_offset(fd); + struct dpaa2_eth_priv *priv = ch->priv; + u32 fd_length = dpaa2_fd_get_len(fd); + struct sk_buff *skb = NULL; + unsigned int skb_len; + + if (fd_length > priv->rx_copybreak) + return NULL; + + skb_len = fd_length + dpaa2_eth_needed_headroom(NULL); + + skb = napi_alloc_skb(&ch->napi, skb_len); + if (!skb) + return NULL; + + skb_reserve(skb, dpaa2_eth_needed_headroom(NULL)); + skb_put(skb, fd_length); + + memcpy(skb->data, fd_vaddr + fd_offset, fd_length); + + dpaa2_eth_recycle_buf(priv, ch, dpaa2_fd_get_addr(fd)); + + return skb; +} + /* Main Rx frame processing routine */ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, struct dpaa2_eth_channel *ch, @@ -459,9 +488,12 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv, return; } - dma_unmap_page(dev, addr, priv->rx_buf_size, - DMA_BIDIRECTIONAL); - skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr); + skb = dpaa2_eth_copybreak(ch, fd, vaddr); + if (!skb) { + dma_unmap_page(dev, addr, priv->rx_buf_size, + DMA_BIDIRECTIONAL); + skb = dpaa2_eth_build_linear_skb(ch, fd, vaddr); + } } else if (fd_format == dpaa2_fd_sg) { WARN_ON(priv->xdp_prog); @@ -2431,8 +2463,6 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n, percpu_stats->tx_packets += enqueued; for (i = 0; i < enqueued; i++) percpu_stats->tx_bytes += dpaa2_fd_get_len(&fds[i]); - for (i = enqueued; i < n; i++) - xdp_return_frame_rx_napi(frames[i]); return enqueued; } @@ -4304,6 +4334,8 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) skb_queue_head_init(&priv->tx_skbs); + priv->rx_copybreak = DPAA2_ETH_DEFAULT_COPYBREAK; + /* Obtain a MC portal */ err = fsl_mc_portal_allocate(dpni_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL, &priv->mc_io); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 9b6a89709ce1..cdb623d5f2c1 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -438,8 +438,6 @@ struct dpaa2_eth_fq { struct dpaa2_eth_ch_xdp { struct bpf_prog *prog; - u64 drop_bufs[DPAA2_ETH_BUFS_PER_CMD]; - int drop_cnt; unsigned int res; }; @@ -457,6 +455,10 @@ struct dpaa2_eth_channel { struct dpaa2_eth_ch_xdp xdp; struct xdp_rxq_info xdp_rxq; struct list_head *rx_list; + + /* Buffers to be recycled back in the buffer pool */ + u64 recycled_bufs[DPAA2_ETH_BUFS_PER_CMD]; + int recycled_bufs_cnt; }; struct dpaa2_eth_dist_fields { @@ -487,6 +489,8 @@ struct dpaa2_eth_trap_data { struct dpaa2_eth_priv *priv; }; +#define DPAA2_ETH_DEFAULT_COPYBREAK 512 + /* Driver private data */ struct dpaa2_eth_priv { struct net_device *net_dev; @@ -567,6 +571,8 @@ struct dpaa2_eth_priv { struct devlink *devlink; struct dpaa2_eth_trap_data *trap_data; struct devlink_port devlink_port; + + u32 rx_copybreak; }; struct dpaa2_eth_devlink_priv { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index bf59708b869e..ad5e374eeccf 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -782,6 +782,44 @@ static int dpaa2_eth_get_ts_info(struct net_device *dev, return 0; } +static int dpaa2_eth_get_tunable(struct net_device *net_dev, + const struct ethtool_tunable *tuna, + void *data) +{ + struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + int err = 0; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + *(u32 *)data = priv->rx_copybreak; + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int dpaa2_eth_set_tunable(struct net_device *net_dev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + int err = 0; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + priv->rx_copybreak = *(u32 *)data; + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + const struct ethtool_ops dpaa2_ethtool_ops = { .get_drvinfo = dpaa2_eth_get_drvinfo, .nway_reset = dpaa2_eth_nway_reset, @@ -796,4 +834,6 @@ const struct ethtool_ops dpaa2_ethtool_ops = { .get_rxnfc = dpaa2_eth_get_rxnfc, .set_rxnfc = dpaa2_eth_set_rxnfc, .get_ts_info = dpaa2_eth_get_ts_info, + .get_tunable = dpaa2_eth_get_tunable, + .set_tunable = dpaa2_eth_set_tunable, }; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c new file mode 100644 index 000000000000..70e04321c420 --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DPAA2 Ethernet Switch ethtool support + * + * Copyright 2014-2016 Freescale Semiconductor Inc. + * Copyright 2017-2018 NXP + * + */ + +#include <linux/ethtool.h> + +#include "dpaa2-switch.h" + +static struct { + enum dpsw_counter id; + char name[ETH_GSTRING_LEN]; +} dpaa2_switch_ethtool_counters[] = { + {DPSW_CNT_ING_FRAME, "rx frames"}, + {DPSW_CNT_ING_BYTE, "rx bytes"}, + {DPSW_CNT_ING_FLTR_FRAME, "rx filtered frames"}, + {DPSW_CNT_ING_FRAME_DISCARD, "rx discarded frames"}, + {DPSW_CNT_ING_BCAST_FRAME, "rx b-cast frames"}, + {DPSW_CNT_ING_BCAST_BYTES, "rx b-cast bytes"}, + {DPSW_CNT_ING_MCAST_FRAME, "rx m-cast frames"}, + {DPSW_CNT_ING_MCAST_BYTE, "rx m-cast bytes"}, + {DPSW_CNT_EGR_FRAME, "tx frames"}, + {DPSW_CNT_EGR_BYTE, "tx bytes"}, + {DPSW_CNT_EGR_FRAME_DISCARD, "tx discarded frames"}, + {DPSW_CNT_ING_NO_BUFF_DISCARD, "rx discarded no buffer frames"}, +}; + +#define DPAA2_SWITCH_NUM_COUNTERS ARRAY_SIZE(dpaa2_switch_ethtool_counters) + +static void dpaa2_switch_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + u16 version_major, version_minor; + int err; + + strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); + + err = dpsw_get_api_version(port_priv->ethsw_data->mc_io, 0, + &version_major, + &version_minor); + if (err) + strscpy(drvinfo->fw_version, "N/A", + sizeof(drvinfo->fw_version)); + else + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%u.%u", version_major, version_minor); + + strscpy(drvinfo->bus_info, dev_name(netdev->dev.parent->parent), + sizeof(drvinfo->bus_info)); +} + +static int +dpaa2_switch_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct dpsw_link_state state = {0}; + int err = 0; + + err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, + &state); + if (err) { + netdev_err(netdev, "ERROR %d getting link state\n", err); + goto out; + } + + /* At the moment, we have no way of interrogating the DPMAC + * from the DPSW side or there may not exist a DPMAC at all. + * Report only autoneg state, duplexity and speed. + */ + if (state.options & DPSW_LINK_OPT_AUTONEG) + link_ksettings->base.autoneg = AUTONEG_ENABLE; + if (!(state.options & DPSW_LINK_OPT_HALF_DUPLEX)) + link_ksettings->base.duplex = DUPLEX_FULL; + link_ksettings->base.speed = state.rate; + +out: + return err; +} + +static int +dpaa2_switch_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpsw_link_cfg cfg = {0}; + bool if_running; + int err = 0, ret; + + /* Interface needs to be down to change link settings */ + if_running = netif_running(netdev); + if (if_running) { + err = dpsw_if_disable(ethsw->mc_io, 0, + ethsw->dpsw_handle, + port_priv->idx); + if (err) { + netdev_err(netdev, "dpsw_if_disable err %d\n", err); + return err; + } + } + + cfg.rate = link_ksettings->base.speed; + if (link_ksettings->base.autoneg == AUTONEG_ENABLE) + cfg.options |= DPSW_LINK_OPT_AUTONEG; + else + cfg.options &= ~DPSW_LINK_OPT_AUTONEG; + if (link_ksettings->base.duplex == DUPLEX_HALF) + cfg.options |= DPSW_LINK_OPT_HALF_DUPLEX; + else + cfg.options &= ~DPSW_LINK_OPT_HALF_DUPLEX; + + err = dpsw_if_set_link_cfg(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, + &cfg); + + if (if_running) { + ret = dpsw_if_enable(ethsw->mc_io, 0, + ethsw->dpsw_handle, + port_priv->idx); + if (ret) { + netdev_err(netdev, "dpsw_if_enable err %d\n", ret); + return ret; + } + } + return err; +} + +static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return DPAA2_SWITCH_NUM_COUNTERS; + default: + return -EOPNOTSUPP; + } +} + +static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev, + u32 stringset, u8 *data) +{ + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) + memcpy(data + i * ETH_GSTRING_LEN, + dpaa2_switch_ethtool_counters[i].name, + ETH_GSTRING_LEN); + break; + } +} + +static void dpaa2_switch_ethtool_get_stats(struct net_device *netdev, + struct ethtool_stats *stats, + u64 *data) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + int i, err; + + for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) { + err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, + dpaa2_switch_ethtool_counters[i].id, + &data[i]); + if (err) + netdev_err(netdev, "dpsw_if_get_counter[%s] err %d\n", + dpaa2_switch_ethtool_counters[i].name, err); + } +} + +const struct ethtool_ops dpaa2_switch_port_ethtool_ops = { + .get_drvinfo = dpaa2_switch_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_link_ksettings = dpaa2_switch_get_link_ksettings, + .set_link_ksettings = dpaa2_switch_set_link_ksettings, + .get_strings = dpaa2_switch_ethtool_get_strings, + .get_ethtool_stats = dpaa2_switch_ethtool_get_stats, + .get_sset_count = dpaa2_switch_ethtool_get_sset_count, +}; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c new file mode 100644 index 000000000000..f9451ec5f2cb --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c @@ -0,0 +1,492 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DPAA2 Ethernet Switch flower support + * + * Copyright 2021 NXP + * + */ + +#include "dpaa2-switch.h" + +static int dpaa2_switch_flower_parse_key(struct flow_cls_offload *cls, + struct dpsw_acl_key *acl_key) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; + struct netlink_ext_ack *extack = cls->common.extack; + struct dpsw_acl_fields *acl_h, *acl_m; + + if (dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_IP) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS))) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported keys used"); + return -EOPNOTSUPP; + } + + acl_h = &acl_key->match; + acl_m = &acl_key->mask; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + acl_h->l3_protocol = match.key->ip_proto; + acl_h->l2_ether_type = be16_to_cpu(match.key->n_proto); + acl_m->l3_protocol = match.mask->ip_proto; + acl_m->l2_ether_type = be16_to_cpu(match.mask->n_proto); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); + ether_addr_copy(acl_h->l2_dest_mac, &match.key->dst[0]); + ether_addr_copy(acl_h->l2_source_mac, &match.key->src[0]); + ether_addr_copy(acl_m->l2_dest_mac, &match.mask->dst[0]); + ether_addr_copy(acl_m->l2_source_mac, &match.mask->src[0]); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + acl_h->l2_vlan_id = match.key->vlan_id; + acl_h->l2_tpid = be16_to_cpu(match.key->vlan_tpid); + acl_h->l2_pcp_dei = match.key->vlan_priority << 1 | + match.key->vlan_dei; + + acl_m->l2_vlan_id = match.mask->vlan_id; + acl_m->l2_tpid = be16_to_cpu(match.mask->vlan_tpid); + acl_m->l2_pcp_dei = match.mask->vlan_priority << 1 | + match.mask->vlan_dei; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + acl_h->l3_source_ip = be32_to_cpu(match.key->src); + acl_h->l3_dest_ip = be32_to_cpu(match.key->dst); + acl_m->l3_source_ip = be32_to_cpu(match.mask->src); + acl_m->l3_dest_ip = be32_to_cpu(match.mask->dst); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + acl_h->l4_source_port = be16_to_cpu(match.key->src); + acl_h->l4_dest_port = be16_to_cpu(match.key->dst); + acl_m->l4_source_port = be16_to_cpu(match.mask->src); + acl_m->l4_dest_port = be16_to_cpu(match.mask->dst); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; + + flow_rule_match_ip(rule, &match); + if (match.mask->ttl != 0) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL not supported"); + return -EOPNOTSUPP; + } + + if ((match.mask->tos & 0x3) != 0) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on ECN not supported, only DSCP"); + return -EOPNOTSUPP; + } + + acl_h->l3_dscp = match.key->tos >> 2; + acl_m->l3_dscp = match.mask->tos >> 2; + } + + return 0; +} + +int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry) +{ + struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg; + struct ethsw_core *ethsw = acl_tbl->ethsw; + struct dpsw_acl_key *acl_key = &entry->key; + struct device *dev = ethsw->dev; + u8 *cmd_buff; + int err; + + cmd_buff = kzalloc(DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, GFP_KERNEL); + if (!cmd_buff) + return -ENOMEM; + + dpsw_acl_prepare_entry_cfg(acl_key, cmd_buff); + + acl_entry_cfg->key_iova = dma_map_single(dev, cmd_buff, + DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { + dev_err(dev, "DMA mapping failed\n"); + return -EFAULT; + } + + err = dpsw_acl_add_entry(ethsw->mc_io, 0, ethsw->dpsw_handle, + acl_tbl->id, acl_entry_cfg); + + dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff), + DMA_TO_DEVICE); + if (err) { + dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err); + return err; + } + + kfree(cmd_buff); + + return 0; +} + +static int dpaa2_switch_acl_entry_remove(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry) +{ + struct dpsw_acl_entry_cfg *acl_entry_cfg = &entry->cfg; + struct dpsw_acl_key *acl_key = &entry->key; + struct ethsw_core *ethsw = acl_tbl->ethsw; + struct device *dev = ethsw->dev; + u8 *cmd_buff; + int err; + + cmd_buff = kzalloc(DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, GFP_KERNEL); + if (!cmd_buff) + return -ENOMEM; + + dpsw_acl_prepare_entry_cfg(acl_key, cmd_buff); + + acl_entry_cfg->key_iova = dma_map_single(dev, cmd_buff, + DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { + dev_err(dev, "DMA mapping failed\n"); + return -EFAULT; + } + + err = dpsw_acl_remove_entry(ethsw->mc_io, 0, ethsw->dpsw_handle, + acl_tbl->id, acl_entry_cfg); + + dma_unmap_single(dev, acl_entry_cfg->key_iova, sizeof(cmd_buff), + DMA_TO_DEVICE); + if (err) { + dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err); + return err; + } + + kfree(cmd_buff); + + return 0; +} + +static int +dpaa2_switch_acl_entry_add_to_list(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry) +{ + struct dpaa2_switch_acl_entry *tmp; + struct list_head *pos, *n; + int index = 0; + + if (list_empty(&acl_tbl->entries)) { + list_add(&entry->list, &acl_tbl->entries); + return index; + } + + list_for_each_safe(pos, n, &acl_tbl->entries) { + tmp = list_entry(pos, struct dpaa2_switch_acl_entry, list); + if (entry->prio < tmp->prio) + break; + index++; + } + list_add(&entry->list, pos->prev); + return index; +} + +static struct dpaa2_switch_acl_entry* +dpaa2_switch_acl_entry_get_by_index(struct dpaa2_switch_acl_tbl *acl_tbl, + int index) +{ + struct dpaa2_switch_acl_entry *tmp; + int i = 0; + + list_for_each_entry(tmp, &acl_tbl->entries, list) { + if (i == index) + return tmp; + ++i; + } + + return NULL; +} + +static int +dpaa2_switch_acl_entry_set_precedence(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry, + int precedence) +{ + int err; + + err = dpaa2_switch_acl_entry_remove(acl_tbl, entry); + if (err) + return err; + + entry->cfg.precedence = precedence; + return dpaa2_switch_acl_entry_add(acl_tbl, entry); +} + +static int dpaa2_switch_acl_tbl_add_entry(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry) +{ + struct dpaa2_switch_acl_entry *tmp; + int index, i, precedence, err; + + /* Add the new ACL entry to the linked list and get its index */ + index = dpaa2_switch_acl_entry_add_to_list(acl_tbl, entry); + + /* Move up in priority the ACL entries to make space + * for the new filter. + */ + precedence = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES - acl_tbl->num_rules - 1; + for (i = 0; i < index; i++) { + tmp = dpaa2_switch_acl_entry_get_by_index(acl_tbl, i); + + err = dpaa2_switch_acl_entry_set_precedence(acl_tbl, tmp, + precedence); + if (err) + return err; + + precedence++; + } + + /* Add the new entry to hardware */ + entry->cfg.precedence = precedence; + err = dpaa2_switch_acl_entry_add(acl_tbl, entry); + acl_tbl->num_rules++; + + return err; +} + +static struct dpaa2_switch_acl_entry * +dpaa2_switch_acl_tbl_find_entry_by_cookie(struct dpaa2_switch_acl_tbl *acl_tbl, + unsigned long cookie) +{ + struct dpaa2_switch_acl_entry *tmp, *n; + + list_for_each_entry_safe(tmp, n, &acl_tbl->entries, list) { + if (tmp->cookie == cookie) + return tmp; + } + return NULL; +} + +static int +dpaa2_switch_acl_entry_get_index(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry) +{ + struct dpaa2_switch_acl_entry *tmp, *n; + int index = 0; + + list_for_each_entry_safe(tmp, n, &acl_tbl->entries, list) { + if (tmp->cookie == entry->cookie) + return index; + index++; + } + return -ENOENT; +} + +static int +dpaa2_switch_acl_tbl_remove_entry(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry) +{ + struct dpaa2_switch_acl_entry *tmp; + int index, i, precedence, err; + + index = dpaa2_switch_acl_entry_get_index(acl_tbl, entry); + + /* Remove from hardware the ACL entry */ + err = dpaa2_switch_acl_entry_remove(acl_tbl, entry); + if (err) + return err; + + acl_tbl->num_rules--; + + /* Remove it from the list also */ + list_del(&entry->list); + + /* Move down in priority the entries over the deleted one */ + precedence = entry->cfg.precedence; + for (i = index - 1; i >= 0; i--) { + tmp = dpaa2_switch_acl_entry_get_by_index(acl_tbl, i); + err = dpaa2_switch_acl_entry_set_precedence(acl_tbl, tmp, + precedence); + if (err) + return err; + + precedence--; + } + + kfree(entry); + + return 0; +} + +static int dpaa2_switch_tc_parse_action(struct ethsw_core *ethsw, + struct flow_action_entry *cls_act, + struct dpsw_acl_result *dpsw_act, + struct netlink_ext_ack *extack) +{ + int err = 0; + + switch (cls_act->id) { + case FLOW_ACTION_TRAP: + dpsw_act->action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF; + break; + case FLOW_ACTION_REDIRECT: + if (!dpaa2_switch_port_dev_check(cls_act->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Destination not a DPAA2 switch port"); + return -EOPNOTSUPP; + } + + dpsw_act->if_id = dpaa2_switch_get_index(ethsw, cls_act->dev); + dpsw_act->action = DPSW_ACL_ACTION_REDIRECT; + break; + case FLOW_ACTION_DROP: + dpsw_act->action = DPSW_ACL_ACTION_DROP; + break; + default: + NL_SET_ERR_MSG_MOD(extack, + "Action not supported"); + err = -EOPNOTSUPP; + goto out; + } + +out: + return err; +} + +int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl, + struct flow_cls_offload *cls) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct netlink_ext_ack *extack = cls->common.extack; + struct ethsw_core *ethsw = acl_tbl->ethsw; + struct dpaa2_switch_acl_entry *acl_entry; + struct flow_action_entry *act; + int err; + + if (!flow_offload_has_one_action(&rule->action)) { + NL_SET_ERR_MSG(extack, "Only singular actions are supported"); + return -EOPNOTSUPP; + } + + if (dpaa2_switch_acl_tbl_is_full(acl_tbl)) { + NL_SET_ERR_MSG(extack, "Maximum filter capacity reached"); + return -ENOMEM; + } + + acl_entry = kzalloc(sizeof(*acl_entry), GFP_KERNEL); + if (!acl_entry) + return -ENOMEM; + + err = dpaa2_switch_flower_parse_key(cls, &acl_entry->key); + if (err) + goto free_acl_entry; + + act = &rule->action.entries[0]; + err = dpaa2_switch_tc_parse_action(ethsw, act, + &acl_entry->cfg.result, extack); + if (err) + goto free_acl_entry; + + acl_entry->prio = cls->common.prio; + acl_entry->cookie = cls->cookie; + + err = dpaa2_switch_acl_tbl_add_entry(acl_tbl, acl_entry); + if (err) + goto free_acl_entry; + + return 0; + +free_acl_entry: + kfree(acl_entry); + + return err; +} + +int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl, + struct flow_cls_offload *cls) +{ + struct dpaa2_switch_acl_entry *entry; + + entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(acl_tbl, cls->cookie); + if (!entry) + return 0; + + return dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry); +} + +int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl, + struct tc_cls_matchall_offload *cls) +{ + struct netlink_ext_ack *extack = cls->common.extack; + struct ethsw_core *ethsw = acl_tbl->ethsw; + struct dpaa2_switch_acl_entry *acl_entry; + struct flow_action_entry *act; + int err; + + if (!flow_offload_has_one_action(&cls->rule->action)) { + NL_SET_ERR_MSG(extack, "Only singular actions are supported"); + return -EOPNOTSUPP; + } + + if (dpaa2_switch_acl_tbl_is_full(acl_tbl)) { + NL_SET_ERR_MSG(extack, "Maximum filter capacity reached"); + return -ENOMEM; + } + + acl_entry = kzalloc(sizeof(*acl_entry), GFP_KERNEL); + if (!acl_entry) + return -ENOMEM; + + act = &cls->rule->action.entries[0]; + err = dpaa2_switch_tc_parse_action(ethsw, act, + &acl_entry->cfg.result, extack); + if (err) + goto free_acl_entry; + + acl_entry->prio = cls->common.prio; + acl_entry->cookie = cls->cookie; + + err = dpaa2_switch_acl_tbl_add_entry(acl_tbl, acl_entry); + if (err) + goto free_acl_entry; + + return 0; + +free_acl_entry: + kfree(acl_entry); + + return err; +} + +int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl, + struct tc_cls_matchall_offload *cls) +{ + struct dpaa2_switch_acl_entry *entry; + + entry = dpaa2_switch_acl_tbl_find_entry_by_cookie(acl_tbl, cls->cookie); + if (!entry) + return 0; + + return dpaa2_switch_acl_tbl_remove_entry(acl_tbl, entry); +} diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c new file mode 100644 index 000000000000..05de37c3b64c --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -0,0 +1,3394 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DPAA2 Ethernet Switch driver + * + * Copyright 2014-2016 Freescale Semiconductor Inc. + * Copyright 2017-2021 NXP + * + */ + +#include <linux/module.h> + +#include <linux/interrupt.h> +#include <linux/msi.h> +#include <linux/kthread.h> +#include <linux/workqueue.h> +#include <linux/iommu.h> +#include <net/pkt_cls.h> + +#include <linux/fsl/mc.h> + +#include "dpaa2-switch.h" + +/* Minimal supported DPSW version */ +#define DPSW_MIN_VER_MAJOR 8 +#define DPSW_MIN_VER_MINOR 9 + +#define DEFAULT_VLAN_ID 1 + +static u16 dpaa2_switch_port_get_fdb_id(struct ethsw_port_priv *port_priv) +{ + return port_priv->fdb->fdb_id; +} + +static struct dpaa2_switch_fdb *dpaa2_switch_fdb_get_unused(struct ethsw_core *ethsw) +{ + int i; + + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) + if (!ethsw->fdbs[i].in_use) + return ðsw->fdbs[i]; + return NULL; +} + +static struct dpaa2_switch_acl_tbl * +dpaa2_switch_acl_tbl_get_unused(struct ethsw_core *ethsw) +{ + int i; + + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) + if (!ethsw->acls[i].in_use) + return ðsw->acls[i]; + return NULL; +} + +static u16 dpaa2_switch_port_set_fdb(struct ethsw_port_priv *port_priv, + struct net_device *bridge_dev) +{ + struct ethsw_port_priv *other_port_priv = NULL; + struct dpaa2_switch_fdb *fdb; + struct net_device *other_dev; + struct list_head *iter; + + /* If we leave a bridge (bridge_dev is NULL), find an unused + * FDB and use that. + */ + if (!bridge_dev) { + fdb = dpaa2_switch_fdb_get_unused(port_priv->ethsw_data); + + /* If there is no unused FDB, we must be the last port that + * leaves the last bridge, all the others are standalone. We + * can just keep the FDB that we already have. + */ + + if (!fdb) { + port_priv->fdb->bridge_dev = NULL; + return 0; + } + + port_priv->fdb = fdb; + port_priv->fdb->in_use = true; + port_priv->fdb->bridge_dev = NULL; + return 0; + } + + /* The below call to netdev_for_each_lower_dev() demands the RTNL lock + * being held. Assert on it so that it's easier to catch new code + * paths that reach this point without the RTNL lock. + */ + ASSERT_RTNL(); + + /* If part of a bridge, use the FDB of the first dpaa2 switch interface + * to be present in that bridge + */ + netdev_for_each_lower_dev(bridge_dev, other_dev, iter) { + if (!dpaa2_switch_port_dev_check(other_dev)) + continue; + + if (other_dev == port_priv->netdev) + continue; + + other_port_priv = netdev_priv(other_dev); + break; + } + + /* The current port is about to change its FDB to the one used by the + * first port that joined the bridge. + */ + if (other_port_priv) { + /* The previous FDB is about to become unused, since the + * interface is no longer standalone. + */ + port_priv->fdb->in_use = false; + port_priv->fdb->bridge_dev = NULL; + + /* Get a reference to the new FDB */ + port_priv->fdb = other_port_priv->fdb; + } + + /* Keep track of the new upper bridge device */ + port_priv->fdb->bridge_dev = bridge_dev; + + return 0; +} + +static void dpaa2_switch_fdb_get_flood_cfg(struct ethsw_core *ethsw, u16 fdb_id, + enum dpsw_flood_type type, + struct dpsw_egress_flood_cfg *cfg) +{ + int i = 0, j; + + memset(cfg, 0, sizeof(*cfg)); + + /* Add all the DPAA2 switch ports found in the same bridging domain to + * the egress flooding domain + */ + for (j = 0; j < ethsw->sw_attr.num_ifs; j++) { + if (!ethsw->ports[j]) + continue; + if (ethsw->ports[j]->fdb->fdb_id != fdb_id) + continue; + + if (type == DPSW_BROADCAST && ethsw->ports[j]->bcast_flood) + cfg->if_id[i++] = ethsw->ports[j]->idx; + else if (type == DPSW_FLOODING && ethsw->ports[j]->ucast_flood) + cfg->if_id[i++] = ethsw->ports[j]->idx; + } + + /* Add the CTRL interface to the egress flooding domain */ + cfg->if_id[i++] = ethsw->sw_attr.num_ifs; + + cfg->fdb_id = fdb_id; + cfg->flood_type = type; + cfg->num_ifs = i; +} + +static int dpaa2_switch_fdb_set_egress_flood(struct ethsw_core *ethsw, u16 fdb_id) +{ + struct dpsw_egress_flood_cfg flood_cfg; + int err; + + /* Setup broadcast flooding domain */ + dpaa2_switch_fdb_get_flood_cfg(ethsw, fdb_id, DPSW_BROADCAST, &flood_cfg); + err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle, + &flood_cfg); + if (err) { + dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err); + return err; + } + + /* Setup unknown flooding domain */ + dpaa2_switch_fdb_get_flood_cfg(ethsw, fdb_id, DPSW_FLOODING, &flood_cfg); + err = dpsw_set_egress_flood(ethsw->mc_io, 0, ethsw->dpsw_handle, + &flood_cfg); + if (err) { + dev_err(ethsw->dev, "dpsw_set_egress_flood() = %d\n", err); + return err; + } + + return 0; +} + +static void *dpaa2_iova_to_virt(struct iommu_domain *domain, + dma_addr_t iova_addr) +{ + phys_addr_t phys_addr; + + phys_addr = domain ? iommu_iova_to_phys(domain, iova_addr) : iova_addr; + + return phys_to_virt(phys_addr); +} + +static int dpaa2_switch_add_vlan(struct ethsw_port_priv *port_priv, u16 vid) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpsw_vlan_cfg vcfg = {0}; + int err; + + vcfg.fdb_id = dpaa2_switch_port_get_fdb_id(port_priv); + err = dpsw_vlan_add(ethsw->mc_io, 0, + ethsw->dpsw_handle, vid, &vcfg); + if (err) { + dev_err(ethsw->dev, "dpsw_vlan_add err %d\n", err); + return err; + } + ethsw->vlans[vid] = ETHSW_VLAN_MEMBER; + + return 0; +} + +static bool dpaa2_switch_port_is_up(struct ethsw_port_priv *port_priv) +{ + struct net_device *netdev = port_priv->netdev; + struct dpsw_link_state state; + int err; + + err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, &state); + if (err) { + netdev_err(netdev, "dpsw_if_get_link_state() err %d\n", err); + return true; + } + + WARN_ONCE(state.up > 1, "Garbage read into link_state"); + + return state.up ? true : false; +} + +static int dpaa2_switch_port_set_pvid(struct ethsw_port_priv *port_priv, u16 pvid) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct net_device *netdev = port_priv->netdev; + struct dpsw_tci_cfg tci_cfg = { 0 }; + bool up; + int err, ret; + + err = dpsw_if_get_tci(ethsw->mc_io, 0, ethsw->dpsw_handle, + port_priv->idx, &tci_cfg); + if (err) { + netdev_err(netdev, "dpsw_if_get_tci err %d\n", err); + return err; + } + + tci_cfg.vlan_id = pvid; + + /* Interface needs to be down to change PVID */ + up = dpaa2_switch_port_is_up(port_priv); + if (up) { + err = dpsw_if_disable(ethsw->mc_io, 0, + ethsw->dpsw_handle, + port_priv->idx); + if (err) { + netdev_err(netdev, "dpsw_if_disable err %d\n", err); + return err; + } + } + + err = dpsw_if_set_tci(ethsw->mc_io, 0, ethsw->dpsw_handle, + port_priv->idx, &tci_cfg); + if (err) { + netdev_err(netdev, "dpsw_if_set_tci err %d\n", err); + goto set_tci_error; + } + + /* Delete previous PVID info and mark the new one */ + port_priv->vlans[port_priv->pvid] &= ~ETHSW_VLAN_PVID; + port_priv->vlans[pvid] |= ETHSW_VLAN_PVID; + port_priv->pvid = pvid; + +set_tci_error: + if (up) { + ret = dpsw_if_enable(ethsw->mc_io, 0, + ethsw->dpsw_handle, + port_priv->idx); + if (ret) { + netdev_err(netdev, "dpsw_if_enable err %d\n", ret); + return ret; + } + } + + return err; +} + +static int dpaa2_switch_port_add_vlan(struct ethsw_port_priv *port_priv, + u16 vid, u16 flags) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct net_device *netdev = port_priv->netdev; + struct dpsw_vlan_if_cfg vcfg = {0}; + int err; + + if (port_priv->vlans[vid]) { + netdev_warn(netdev, "VLAN %d already configured\n", vid); + return -EEXIST; + } + + /* If hit, this VLAN rule will lead the packet into the FDB table + * specified in the vlan configuration below + */ + vcfg.num_ifs = 1; + vcfg.if_id[0] = port_priv->idx; + vcfg.fdb_id = dpaa2_switch_port_get_fdb_id(port_priv); + vcfg.options |= DPSW_VLAN_ADD_IF_OPT_FDB_ID; + err = dpsw_vlan_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle, vid, &vcfg); + if (err) { + netdev_err(netdev, "dpsw_vlan_add_if err %d\n", err); + return err; + } + + port_priv->vlans[vid] = ETHSW_VLAN_MEMBER; + + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) { + err = dpsw_vlan_add_if_untagged(ethsw->mc_io, 0, + ethsw->dpsw_handle, + vid, &vcfg); + if (err) { + netdev_err(netdev, + "dpsw_vlan_add_if_untagged err %d\n", err); + return err; + } + port_priv->vlans[vid] |= ETHSW_VLAN_UNTAGGED; + } + + if (flags & BRIDGE_VLAN_INFO_PVID) { + err = dpaa2_switch_port_set_pvid(port_priv, vid); + if (err) + return err; + } + + return 0; +} + +static enum dpsw_stp_state br_stp_state_to_dpsw(u8 state) +{ + switch (state) { + case BR_STATE_DISABLED: + return DPSW_STP_STATE_DISABLED; + case BR_STATE_LISTENING: + return DPSW_STP_STATE_LISTENING; + case BR_STATE_LEARNING: + return DPSW_STP_STATE_LEARNING; + case BR_STATE_FORWARDING: + return DPSW_STP_STATE_FORWARDING; + case BR_STATE_BLOCKING: + return DPSW_STP_STATE_BLOCKING; + default: + return DPSW_STP_STATE_DISABLED; + } +} + +static int dpaa2_switch_port_set_stp_state(struct ethsw_port_priv *port_priv, u8 state) +{ + struct dpsw_stp_cfg stp_cfg = {0}; + int err; + u16 vid; + + if (!netif_running(port_priv->netdev) || state == port_priv->stp_state) + return 0; /* Nothing to do */ + + stp_cfg.state = br_stp_state_to_dpsw(state); + for (vid = 0; vid <= VLAN_VID_MASK; vid++) { + if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) { + stp_cfg.vlan_id = vid; + err = dpsw_if_set_stp(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, &stp_cfg); + if (err) { + netdev_err(port_priv->netdev, + "dpsw_if_set_stp err %d\n", err); + return err; + } + } + } + + port_priv->stp_state = state; + + return 0; +} + +static int dpaa2_switch_dellink(struct ethsw_core *ethsw, u16 vid) +{ + struct ethsw_port_priv *ppriv_local = NULL; + int i, err; + + if (!ethsw->vlans[vid]) + return -ENOENT; + + err = dpsw_vlan_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, vid); + if (err) { + dev_err(ethsw->dev, "dpsw_vlan_remove err %d\n", err); + return err; + } + ethsw->vlans[vid] = 0; + + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { + ppriv_local = ethsw->ports[i]; + ppriv_local->vlans[vid] = 0; + } + + return 0; +} + +static int dpaa2_switch_port_fdb_add_uc(struct ethsw_port_priv *port_priv, + const unsigned char *addr) +{ + struct dpsw_fdb_unicast_cfg entry = {0}; + u16 fdb_id; + int err; + + entry.if_egress = port_priv->idx; + entry.type = DPSW_FDB_ENTRY_STATIC; + ether_addr_copy(entry.mac_addr, addr); + + fdb_id = dpaa2_switch_port_get_fdb_id(port_priv); + err = dpsw_fdb_add_unicast(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + fdb_id, &entry); + if (err) + netdev_err(port_priv->netdev, + "dpsw_fdb_add_unicast err %d\n", err); + return err; +} + +static int dpaa2_switch_port_fdb_del_uc(struct ethsw_port_priv *port_priv, + const unsigned char *addr) +{ + struct dpsw_fdb_unicast_cfg entry = {0}; + u16 fdb_id; + int err; + + entry.if_egress = port_priv->idx; + entry.type = DPSW_FDB_ENTRY_STATIC; + ether_addr_copy(entry.mac_addr, addr); + + fdb_id = dpaa2_switch_port_get_fdb_id(port_priv); + err = dpsw_fdb_remove_unicast(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + fdb_id, &entry); + /* Silently discard error for calling multiple times the del command */ + if (err && err != -ENXIO) + netdev_err(port_priv->netdev, + "dpsw_fdb_remove_unicast err %d\n", err); + return err; +} + +static int dpaa2_switch_port_fdb_add_mc(struct ethsw_port_priv *port_priv, + const unsigned char *addr) +{ + struct dpsw_fdb_multicast_cfg entry = {0}; + u16 fdb_id; + int err; + + ether_addr_copy(entry.mac_addr, addr); + entry.type = DPSW_FDB_ENTRY_STATIC; + entry.num_ifs = 1; + entry.if_id[0] = port_priv->idx; + + fdb_id = dpaa2_switch_port_get_fdb_id(port_priv); + err = dpsw_fdb_add_multicast(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + fdb_id, &entry); + /* Silently discard error for calling multiple times the add command */ + if (err && err != -ENXIO) + netdev_err(port_priv->netdev, "dpsw_fdb_add_multicast err %d\n", + err); + return err; +} + +static int dpaa2_switch_port_fdb_del_mc(struct ethsw_port_priv *port_priv, + const unsigned char *addr) +{ + struct dpsw_fdb_multicast_cfg entry = {0}; + u16 fdb_id; + int err; + + ether_addr_copy(entry.mac_addr, addr); + entry.type = DPSW_FDB_ENTRY_STATIC; + entry.num_ifs = 1; + entry.if_id[0] = port_priv->idx; + + fdb_id = dpaa2_switch_port_get_fdb_id(port_priv); + err = dpsw_fdb_remove_multicast(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + fdb_id, &entry); + /* Silently discard error for calling multiple times the del command */ + if (err && err != -ENAVAIL) + netdev_err(port_priv->netdev, + "dpsw_fdb_remove_multicast err %d\n", err); + return err; +} + +static void dpaa2_switch_port_get_stats(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + u64 tmp; + int err; + + err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, + DPSW_CNT_ING_FRAME, &stats->rx_packets); + if (err) + goto error; + + err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, + DPSW_CNT_EGR_FRAME, &stats->tx_packets); + if (err) + goto error; + + err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, + DPSW_CNT_ING_BYTE, &stats->rx_bytes); + if (err) + goto error; + + err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, + DPSW_CNT_EGR_BYTE, &stats->tx_bytes); + if (err) + goto error; + + err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, + DPSW_CNT_ING_FRAME_DISCARD, + &stats->rx_dropped); + if (err) + goto error; + + err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, + DPSW_CNT_ING_FLTR_FRAME, + &tmp); + if (err) + goto error; + stats->rx_dropped += tmp; + + err = dpsw_if_get_counter(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, + DPSW_CNT_EGR_FRAME_DISCARD, + &stats->tx_dropped); + if (err) + goto error; + + return; + +error: + netdev_err(netdev, "dpsw_if_get_counter err %d\n", err); +} + +static bool dpaa2_switch_port_has_offload_stats(const struct net_device *netdev, + int attr_id) +{ + return (attr_id == IFLA_OFFLOAD_XSTATS_CPU_HIT); +} + +static int dpaa2_switch_port_get_offload_stats(int attr_id, + const struct net_device *netdev, + void *sp) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + dpaa2_switch_port_get_stats((struct net_device *)netdev, sp); + return 0; + } + + return -EINVAL; +} + +static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + int err; + + err = dpsw_if_set_max_frame_length(port_priv->ethsw_data->mc_io, + 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, + (u16)ETHSW_L2_MAX_FRM(mtu)); + if (err) { + netdev_err(netdev, + "dpsw_if_set_max_frame_length() err %d\n", err); + return err; + } + + netdev->mtu = mtu; + return 0; +} + +static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct dpsw_link_state state; + int err; + + /* Interrupts are received even though no one issued an 'ifconfig up' + * on the switch interface. Ignore these link state update interrupts + */ + if (!netif_running(netdev)) + return 0; + + err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx, &state); + if (err) { + netdev_err(netdev, "dpsw_if_get_link_state() err %d\n", err); + return err; + } + + WARN_ONCE(state.up > 1, "Garbage read into link_state"); + + if (state.up != port_priv->link_state) { + if (state.up) { + netif_carrier_on(netdev); + netif_tx_start_all_queues(netdev); + } else { + netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); + } + port_priv->link_state = state.up; + } + + return 0; +} + +/* Manage all NAPI instances for the control interface. + * + * We only have one RX queue and one Tx Conf queue for all + * switch ports. Therefore, we only need to enable the NAPI instance once, the + * first time one of the switch ports runs .dev_open(). + */ + +static void dpaa2_switch_enable_ctrl_if_napi(struct ethsw_core *ethsw) +{ + int i; + + /* Access to the ethsw->napi_users relies on the RTNL lock */ + ASSERT_RTNL(); + + /* a new interface is using the NAPI instance */ + ethsw->napi_users++; + + /* if there is already a user of the instance, return */ + if (ethsw->napi_users > 1) + return; + + for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) + napi_enable(ðsw->fq[i].napi); +} + +static void dpaa2_switch_disable_ctrl_if_napi(struct ethsw_core *ethsw) +{ + int i; + + /* Access to the ethsw->napi_users relies on the RTNL lock */ + ASSERT_RTNL(); + + /* If we are not the last interface using the NAPI, return */ + ethsw->napi_users--; + if (ethsw->napi_users) + return; + + for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) + napi_disable(ðsw->fq[i].napi); +} + +static int dpaa2_switch_port_open(struct net_device *netdev) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + int err; + + /* Explicitly set carrier off, otherwise + * netif_carrier_ok() will return true and cause 'ip link show' + * to report the LOWER_UP flag, even though the link + * notification wasn't even received. + */ + netif_carrier_off(netdev); + + err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx); + if (err) { + netdev_err(netdev, "dpsw_if_enable err %d\n", err); + return err; + } + + /* sync carrier state */ + err = dpaa2_switch_port_carrier_state_sync(netdev); + if (err) { + netdev_err(netdev, + "dpaa2_switch_port_carrier_state_sync err %d\n", err); + goto err_carrier_sync; + } + + dpaa2_switch_enable_ctrl_if_napi(ethsw); + + return 0; + +err_carrier_sync: + dpsw_if_disable(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx); + return err; +} + +static int dpaa2_switch_port_stop(struct net_device *netdev) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + int err; + + err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0, + port_priv->ethsw_data->dpsw_handle, + port_priv->idx); + if (err) { + netdev_err(netdev, "dpsw_if_disable err %d\n", err); + return err; + } + + dpaa2_switch_disable_ctrl_if_napi(ethsw); + + return 0; +} + +static int dpaa2_switch_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) +{ + struct ethsw_port_priv *port_priv = netdev_priv(dev); + + ppid->id_len = 1; + ppid->id[0] = port_priv->ethsw_data->dev_id; + + return 0; +} + +static int dpaa2_switch_port_get_phys_name(struct net_device *netdev, char *name, + size_t len) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + int err; + + err = snprintf(name, len, "p%d", port_priv->idx); + if (err >= len) + return -EINVAL; + + return 0; +} + +struct ethsw_dump_ctx { + struct net_device *dev; + struct sk_buff *skb; + struct netlink_callback *cb; + int idx; +}; + +static int dpaa2_switch_fdb_dump_nl(struct fdb_dump_entry *entry, + struct ethsw_dump_ctx *dump) +{ + int is_dynamic = entry->type & DPSW_FDB_ENTRY_DINAMIC; + u32 portid = NETLINK_CB(dump->cb->skb).portid; + u32 seq = dump->cb->nlh->nlmsg_seq; + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + if (dump->idx < dump->cb->args[2]) + goto skip; + + nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, + sizeof(*ndm), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = NTF_SELF; + ndm->ndm_type = 0; + ndm->ndm_ifindex = dump->dev->ifindex; + ndm->ndm_state = is_dynamic ? NUD_REACHABLE : NUD_NOARP; + + if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, entry->mac_addr)) + goto nla_put_failure; + + nlmsg_end(dump->skb, nlh); + +skip: + dump->idx++; + return 0; + +nla_put_failure: + nlmsg_cancel(dump->skb, nlh); + return -EMSGSIZE; +} + +static int dpaa2_switch_port_fdb_valid_entry(struct fdb_dump_entry *entry, + struct ethsw_port_priv *port_priv) +{ + int idx = port_priv->idx; + int valid; + + if (entry->type & DPSW_FDB_ENTRY_TYPE_UNICAST) + valid = entry->if_info == port_priv->idx; + else + valid = entry->if_mask[idx / 8] & BIT(idx % 8); + + return valid; +} + +static int dpaa2_switch_fdb_iterate(struct ethsw_port_priv *port_priv, + dpaa2_switch_fdb_cb_t cb, void *data) +{ + struct net_device *net_dev = port_priv->netdev; + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct device *dev = net_dev->dev.parent; + struct fdb_dump_entry *fdb_entries; + struct fdb_dump_entry fdb_entry; + dma_addr_t fdb_dump_iova; + u16 num_fdb_entries; + u32 fdb_dump_size; + int err = 0, i; + u8 *dma_mem; + u16 fdb_id; + + fdb_dump_size = ethsw->sw_attr.max_fdb_entries * sizeof(fdb_entry); + dma_mem = kzalloc(fdb_dump_size, GFP_KERNEL); + if (!dma_mem) + return -ENOMEM; + + fdb_dump_iova = dma_map_single(dev, dma_mem, fdb_dump_size, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, fdb_dump_iova)) { + netdev_err(net_dev, "dma_map_single() failed\n"); + err = -ENOMEM; + goto err_map; + } + + fdb_id = dpaa2_switch_port_get_fdb_id(port_priv); + err = dpsw_fdb_dump(ethsw->mc_io, 0, ethsw->dpsw_handle, fdb_id, + fdb_dump_iova, fdb_dump_size, &num_fdb_entries); + if (err) { + netdev_err(net_dev, "dpsw_fdb_dump() = %d\n", err); + goto err_dump; + } + + dma_unmap_single(dev, fdb_dump_iova, fdb_dump_size, DMA_FROM_DEVICE); + + fdb_entries = (struct fdb_dump_entry *)dma_mem; + for (i = 0; i < num_fdb_entries; i++) { + fdb_entry = fdb_entries[i]; + + err = cb(port_priv, &fdb_entry, data); + if (err) + goto end; + } + +end: + kfree(dma_mem); + + return 0; + +err_dump: + dma_unmap_single(dev, fdb_dump_iova, fdb_dump_size, DMA_TO_DEVICE); +err_map: + kfree(dma_mem); + return err; +} + +static int dpaa2_switch_fdb_entry_dump(struct ethsw_port_priv *port_priv, + struct fdb_dump_entry *fdb_entry, + void *data) +{ + if (!dpaa2_switch_port_fdb_valid_entry(fdb_entry, port_priv)) + return 0; + + return dpaa2_switch_fdb_dump_nl(fdb_entry, data); +} + +static int dpaa2_switch_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *net_dev, + struct net_device *filter_dev, int *idx) +{ + struct ethsw_port_priv *port_priv = netdev_priv(net_dev); + struct ethsw_dump_ctx dump = { + .dev = net_dev, + .skb = skb, + .cb = cb, + .idx = *idx, + }; + int err; + + err = dpaa2_switch_fdb_iterate(port_priv, dpaa2_switch_fdb_entry_dump, &dump); + *idx = dump.idx; + + return err; +} + +static int dpaa2_switch_fdb_entry_fast_age(struct ethsw_port_priv *port_priv, + struct fdb_dump_entry *fdb_entry, + void *data __always_unused) +{ + if (!dpaa2_switch_port_fdb_valid_entry(fdb_entry, port_priv)) + return 0; + + if (!(fdb_entry->type & DPSW_FDB_ENTRY_TYPE_DYNAMIC)) + return 0; + + if (fdb_entry->type & DPSW_FDB_ENTRY_TYPE_UNICAST) + dpaa2_switch_port_fdb_del_uc(port_priv, fdb_entry->mac_addr); + else + dpaa2_switch_port_fdb_del_mc(port_priv, fdb_entry->mac_addr); + + return 0; +} + +static void dpaa2_switch_port_fast_age(struct ethsw_port_priv *port_priv) +{ + dpaa2_switch_fdb_iterate(port_priv, + dpaa2_switch_fdb_entry_fast_age, NULL); +} + +static int dpaa2_switch_port_vlan_add(struct net_device *netdev, __be16 proto, + u16 vid) +{ + struct switchdev_obj_port_vlan vlan = { + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .vid = vid, + .obj.orig_dev = netdev, + /* This API only allows programming tagged, non-PVID VIDs */ + .flags = 0, + }; + + return dpaa2_switch_port_vlans_add(netdev, &vlan); +} + +static int dpaa2_switch_port_vlan_kill(struct net_device *netdev, __be16 proto, + u16 vid) +{ + struct switchdev_obj_port_vlan vlan = { + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .vid = vid, + .obj.orig_dev = netdev, + /* This API only allows programming tagged, non-PVID VIDs */ + .flags = 0, + }; + + return dpaa2_switch_port_vlans_del(netdev, &vlan); +} + +static int dpaa2_switch_port_set_mac_addr(struct ethsw_port_priv *port_priv) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct net_device *net_dev = port_priv->netdev; + struct device *dev = net_dev->dev.parent; + u8 mac_addr[ETH_ALEN]; + int err; + + if (!(ethsw->features & ETHSW_FEATURE_MAC_ADDR)) + return 0; + + /* Get firmware address, if any */ + err = dpsw_if_get_port_mac_addr(ethsw->mc_io, 0, ethsw->dpsw_handle, + port_priv->idx, mac_addr); + if (err) { + dev_err(dev, "dpsw_if_get_port_mac_addr() failed\n"); + return err; + } + + /* First check if firmware has any address configured by bootloader */ + if (!is_zero_ether_addr(mac_addr)) { + memcpy(net_dev->dev_addr, mac_addr, net_dev->addr_len); + } else { + /* No MAC address configured, fill in net_dev->dev_addr + * with a random one + */ + eth_hw_addr_random(net_dev); + dev_dbg_once(dev, "device(s) have all-zero hwaddr, replaced with random\n"); + + /* Override NET_ADDR_RANDOM set by eth_hw_addr_random(); for all + * practical purposes, this will be our "permanent" mac address, + * at least until the next reboot. This move will also permit + * register_netdevice() to properly fill up net_dev->perm_addr. + */ + net_dev->addr_assign_type = NET_ADDR_PERM; + } + + return 0; +} + +static void dpaa2_switch_free_fd(const struct ethsw_core *ethsw, + const struct dpaa2_fd *fd) +{ + struct device *dev = ethsw->dev; + unsigned char *buffer_start; + struct sk_buff **skbh, *skb; + dma_addr_t fd_addr; + + fd_addr = dpaa2_fd_get_addr(fd); + skbh = dpaa2_iova_to_virt(ethsw->iommu_domain, fd_addr); + + skb = *skbh; + buffer_start = (unsigned char *)skbh; + + dma_unmap_single(dev, fd_addr, + skb_tail_pointer(skb) - buffer_start, + DMA_TO_DEVICE); + + /* Move on with skb release */ + dev_kfree_skb(skb); +} + +static int dpaa2_switch_build_single_fd(struct ethsw_core *ethsw, + struct sk_buff *skb, + struct dpaa2_fd *fd) +{ + struct device *dev = ethsw->dev; + struct sk_buff **skbh; + dma_addr_t addr; + u8 *buff_start; + void *hwa; + + buff_start = PTR_ALIGN(skb->data - DPAA2_SWITCH_TX_DATA_OFFSET - + DPAA2_SWITCH_TX_BUF_ALIGN, + DPAA2_SWITCH_TX_BUF_ALIGN); + + /* Clear FAS to have consistent values for TX confirmation. It is + * located in the first 8 bytes of the buffer's hardware annotation + * area + */ + hwa = buff_start + DPAA2_SWITCH_SWA_SIZE; + memset(hwa, 0, 8); + + /* Store a backpointer to the skb at the beginning of the buffer + * (in the private data area) such that we can release it + * on Tx confirm + */ + skbh = (struct sk_buff **)buff_start; + *skbh = skb; + + addr = dma_map_single(dev, buff_start, + skb_tail_pointer(skb) - buff_start, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, addr))) + return -ENOMEM; + + /* Setup the FD fields */ + memset(fd, 0, sizeof(*fd)); + + dpaa2_fd_set_addr(fd, addr); + dpaa2_fd_set_offset(fd, (u16)(skb->data - buff_start)); + dpaa2_fd_set_len(fd, skb->len); + dpaa2_fd_set_format(fd, dpaa2_fd_single); + + return 0; +} + +static netdev_tx_t dpaa2_switch_port_tx(struct sk_buff *skb, + struct net_device *net_dev) +{ + struct ethsw_port_priv *port_priv = netdev_priv(net_dev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + int retries = DPAA2_SWITCH_SWP_BUSY_RETRIES; + struct dpaa2_fd fd; + int err; + + if (unlikely(skb_headroom(skb) < DPAA2_SWITCH_NEEDED_HEADROOM)) { + struct sk_buff *ns; + + ns = skb_realloc_headroom(skb, DPAA2_SWITCH_NEEDED_HEADROOM); + if (unlikely(!ns)) { + net_err_ratelimited("%s: Error reallocating skb headroom\n", net_dev->name); + goto err_free_skb; + } + dev_consume_skb_any(skb); + skb = ns; + } + + /* We'll be holding a back-reference to the skb until Tx confirmation */ + skb = skb_unshare(skb, GFP_ATOMIC); + if (unlikely(!skb)) { + /* skb_unshare() has already freed the skb */ + net_err_ratelimited("%s: Error copying the socket buffer\n", net_dev->name); + goto err_exit; + } + + /* At this stage, we do not support non-linear skbs so just try to + * linearize the skb and if that's not working, just drop the packet. + */ + err = skb_linearize(skb); + if (err) { + net_err_ratelimited("%s: skb_linearize error (%d)!\n", net_dev->name, err); + goto err_free_skb; + } + + err = dpaa2_switch_build_single_fd(ethsw, skb, &fd); + if (unlikely(err)) { + net_err_ratelimited("%s: ethsw_build_*_fd() %d\n", net_dev->name, err); + goto err_free_skb; + } + + do { + err = dpaa2_io_service_enqueue_qd(NULL, + port_priv->tx_qdid, + 8, 0, &fd); + retries--; + } while (err == -EBUSY && retries); + + if (unlikely(err < 0)) { + dpaa2_switch_free_fd(ethsw, &fd); + goto err_exit; + } + + return NETDEV_TX_OK; + +err_free_skb: + dev_kfree_skb(skb); +err_exit: + return NETDEV_TX_OK; +} + +static int +dpaa2_switch_setup_tc_cls_flower(struct dpaa2_switch_acl_tbl *acl_tbl, + struct flow_cls_offload *f) +{ + switch (f->command) { + case FLOW_CLS_REPLACE: + return dpaa2_switch_cls_flower_replace(acl_tbl, f); + case FLOW_CLS_DESTROY: + return dpaa2_switch_cls_flower_destroy(acl_tbl, f); + default: + return -EOPNOTSUPP; + } +} + +static int +dpaa2_switch_setup_tc_cls_matchall(struct dpaa2_switch_acl_tbl *acl_tbl, + struct tc_cls_matchall_offload *f) +{ + switch (f->command) { + case TC_CLSMATCHALL_REPLACE: + return dpaa2_switch_cls_matchall_replace(acl_tbl, f); + case TC_CLSMATCHALL_DESTROY: + return dpaa2_switch_cls_matchall_destroy(acl_tbl, f); + default: + return -EOPNOTSUPP; + } +} + +static int dpaa2_switch_port_setup_tc_block_cb_ig(enum tc_setup_type type, + void *type_data, + void *cb_priv) +{ + switch (type) { + case TC_SETUP_CLSFLOWER: + return dpaa2_switch_setup_tc_cls_flower(cb_priv, type_data); + case TC_SETUP_CLSMATCHALL: + return dpaa2_switch_setup_tc_cls_matchall(cb_priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(dpaa2_switch_block_cb_list); + +static int dpaa2_switch_port_acl_tbl_bind(struct ethsw_port_priv *port_priv, + struct dpaa2_switch_acl_tbl *acl_tbl) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct net_device *netdev = port_priv->netdev; + struct dpsw_acl_if_cfg acl_if_cfg; + int err; + + if (port_priv->acl_tbl) + return -EINVAL; + + acl_if_cfg.if_id[0] = port_priv->idx; + acl_if_cfg.num_ifs = 1; + err = dpsw_acl_add_if(ethsw->mc_io, 0, ethsw->dpsw_handle, + acl_tbl->id, &acl_if_cfg); + if (err) { + netdev_err(netdev, "dpsw_acl_add_if err %d\n", err); + return err; + } + + acl_tbl->ports |= BIT(port_priv->idx); + port_priv->acl_tbl = acl_tbl; + + return 0; +} + +static int +dpaa2_switch_port_acl_tbl_unbind(struct ethsw_port_priv *port_priv, + struct dpaa2_switch_acl_tbl *acl_tbl) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct net_device *netdev = port_priv->netdev; + struct dpsw_acl_if_cfg acl_if_cfg; + int err; + + if (port_priv->acl_tbl != acl_tbl) + return -EINVAL; + + acl_if_cfg.if_id[0] = port_priv->idx; + acl_if_cfg.num_ifs = 1; + err = dpsw_acl_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle, + acl_tbl->id, &acl_if_cfg); + if (err) { + netdev_err(netdev, "dpsw_acl_add_if err %d\n", err); + return err; + } + + acl_tbl->ports &= ~BIT(port_priv->idx); + port_priv->acl_tbl = NULL; + return 0; +} + +static int dpaa2_switch_port_block_bind(struct ethsw_port_priv *port_priv, + struct dpaa2_switch_acl_tbl *acl_tbl) +{ + struct dpaa2_switch_acl_tbl *old_acl_tbl = port_priv->acl_tbl; + int err; + + /* If the port is already bound to this ACL table then do nothing. This + * can happen when this port is the first one to join a tc block + */ + if (port_priv->acl_tbl == acl_tbl) + return 0; + + err = dpaa2_switch_port_acl_tbl_unbind(port_priv, old_acl_tbl); + if (err) + return err; + + /* Mark the previous ACL table as being unused if this was the last + * port that was using it. + */ + if (old_acl_tbl->ports == 0) + old_acl_tbl->in_use = false; + + return dpaa2_switch_port_acl_tbl_bind(port_priv, acl_tbl); +} + +static int dpaa2_switch_port_block_unbind(struct ethsw_port_priv *port_priv, + struct dpaa2_switch_acl_tbl *acl_tbl) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpaa2_switch_acl_tbl *new_acl_tbl; + int err; + + /* We are the last port that leaves a block (an ACL table). + * We'll continue to use this table. + */ + if (acl_tbl->ports == BIT(port_priv->idx)) + return 0; + + err = dpaa2_switch_port_acl_tbl_unbind(port_priv, acl_tbl); + if (err) + return err; + + if (acl_tbl->ports == 0) + acl_tbl->in_use = false; + + new_acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw); + new_acl_tbl->in_use = true; + return dpaa2_switch_port_acl_tbl_bind(port_priv, new_acl_tbl); +} + +static int dpaa2_switch_setup_tc_block_bind(struct net_device *netdev, + struct flow_block_offload *f) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpaa2_switch_acl_tbl *acl_tbl; + struct flow_block_cb *block_cb; + bool register_block = false; + int err; + + block_cb = flow_block_cb_lookup(f->block, + dpaa2_switch_port_setup_tc_block_cb_ig, + ethsw); + + if (!block_cb) { + /* If the ACL table is not already known, then this port must + * be the first to join it. In this case, we can just continue + * to use our private table + */ + acl_tbl = port_priv->acl_tbl; + + block_cb = flow_block_cb_alloc(dpaa2_switch_port_setup_tc_block_cb_ig, + ethsw, acl_tbl, NULL); + if (IS_ERR(block_cb)) + return PTR_ERR(block_cb); + + register_block = true; + } else { + acl_tbl = flow_block_cb_priv(block_cb); + } + + flow_block_cb_incref(block_cb); + err = dpaa2_switch_port_block_bind(port_priv, acl_tbl); + if (err) + goto err_block_bind; + + if (register_block) { + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, + &dpaa2_switch_block_cb_list); + } + + return 0; + +err_block_bind: + if (!flow_block_cb_decref(block_cb)) + flow_block_cb_free(block_cb); + return err; +} + +static void dpaa2_switch_setup_tc_block_unbind(struct net_device *netdev, + struct flow_block_offload *f) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpaa2_switch_acl_tbl *acl_tbl; + struct flow_block_cb *block_cb; + int err; + + block_cb = flow_block_cb_lookup(f->block, + dpaa2_switch_port_setup_tc_block_cb_ig, + ethsw); + if (!block_cb) + return; + + acl_tbl = flow_block_cb_priv(block_cb); + err = dpaa2_switch_port_block_unbind(port_priv, acl_tbl); + if (!err && !flow_block_cb_decref(block_cb)) { + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + } +} + +static int dpaa2_switch_setup_tc_block(struct net_device *netdev, + struct flow_block_offload *f) +{ + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + f->driver_block_list = &dpaa2_switch_block_cb_list; + + switch (f->command) { + case FLOW_BLOCK_BIND: + return dpaa2_switch_setup_tc_block_bind(netdev, f); + case FLOW_BLOCK_UNBIND: + dpaa2_switch_setup_tc_block_unbind(netdev, f); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int dpaa2_switch_port_setup_tc(struct net_device *netdev, + enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: { + return dpaa2_switch_setup_tc_block(netdev, type_data); + } + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static const struct net_device_ops dpaa2_switch_port_ops = { + .ndo_open = dpaa2_switch_port_open, + .ndo_stop = dpaa2_switch_port_stop, + + .ndo_set_mac_address = eth_mac_addr, + .ndo_get_stats64 = dpaa2_switch_port_get_stats, + .ndo_change_mtu = dpaa2_switch_port_change_mtu, + .ndo_has_offload_stats = dpaa2_switch_port_has_offload_stats, + .ndo_get_offload_stats = dpaa2_switch_port_get_offload_stats, + .ndo_fdb_dump = dpaa2_switch_port_fdb_dump, + .ndo_vlan_rx_add_vid = dpaa2_switch_port_vlan_add, + .ndo_vlan_rx_kill_vid = dpaa2_switch_port_vlan_kill, + + .ndo_start_xmit = dpaa2_switch_port_tx, + .ndo_get_port_parent_id = dpaa2_switch_port_parent_id, + .ndo_get_phys_port_name = dpaa2_switch_port_get_phys_name, + .ndo_setup_tc = dpaa2_switch_port_setup_tc, +}; + +bool dpaa2_switch_port_dev_check(const struct net_device *netdev) +{ + return netdev->netdev_ops == &dpaa2_switch_port_ops; +} + +static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw) +{ + int i; + + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { + dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev); + dpaa2_switch_port_set_mac_addr(ethsw->ports[i]); + } +} + +static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) +{ + struct device *dev = (struct device *)arg; + struct ethsw_core *ethsw = dev_get_drvdata(dev); + + /* Mask the events and the if_id reserved bits to be cleared on read */ + u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000; + int err; + + err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, + DPSW_IRQ_INDEX_IF, &status); + if (err) { + dev_err(dev, "Can't get irq status (err %d)\n", err); + + err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, + DPSW_IRQ_INDEX_IF, 0xFFFFFFFF); + if (err) + dev_err(dev, "Can't clear irq status (err %d)\n", err); + goto out; + } + + if (status & DPSW_IRQ_EVENT_LINK_CHANGED) + dpaa2_switch_links_state_update(ethsw); + +out: + return IRQ_HANDLED; +} + +static int dpaa2_switch_setup_irqs(struct fsl_mc_device *sw_dev) +{ + struct device *dev = &sw_dev->dev; + struct ethsw_core *ethsw = dev_get_drvdata(dev); + u32 mask = DPSW_IRQ_EVENT_LINK_CHANGED; + struct fsl_mc_device_irq *irq; + int err; + + err = fsl_mc_allocate_irqs(sw_dev); + if (err) { + dev_err(dev, "MC irqs allocation failed\n"); + return err; + } + + if (WARN_ON(sw_dev->obj_desc.irq_count != DPSW_IRQ_NUM)) { + err = -EINVAL; + goto free_irq; + } + + err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle, + DPSW_IRQ_INDEX_IF, 0); + if (err) { + dev_err(dev, "dpsw_set_irq_enable err %d\n", err); + goto free_irq; + } + + irq = sw_dev->irqs[DPSW_IRQ_INDEX_IF]; + + err = devm_request_threaded_irq(dev, irq->msi_desc->irq, + NULL, + dpaa2_switch_irq0_handler_thread, + IRQF_NO_SUSPEND | IRQF_ONESHOT, + dev_name(dev), dev); + if (err) { + dev_err(dev, "devm_request_threaded_irq(): %d\n", err); + goto free_irq; + } + + err = dpsw_set_irq_mask(ethsw->mc_io, 0, ethsw->dpsw_handle, + DPSW_IRQ_INDEX_IF, mask); + if (err) { + dev_err(dev, "dpsw_set_irq_mask(): %d\n", err); + goto free_devm_irq; + } + + err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle, + DPSW_IRQ_INDEX_IF, 1); + if (err) { + dev_err(dev, "dpsw_set_irq_enable(): %d\n", err); + goto free_devm_irq; + } + + return 0; + +free_devm_irq: + devm_free_irq(dev, irq->msi_desc->irq, dev); +free_irq: + fsl_mc_free_irqs(sw_dev); + return err; +} + +static void dpaa2_switch_teardown_irqs(struct fsl_mc_device *sw_dev) +{ + struct device *dev = &sw_dev->dev; + struct ethsw_core *ethsw = dev_get_drvdata(dev); + int err; + + err = dpsw_set_irq_enable(ethsw->mc_io, 0, ethsw->dpsw_handle, + DPSW_IRQ_INDEX_IF, 0); + if (err) + dev_err(dev, "dpsw_set_irq_enable err %d\n", err); + + fsl_mc_free_irqs(sw_dev); +} + +static int dpaa2_switch_port_set_learning(struct ethsw_port_priv *port_priv, bool enable) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + enum dpsw_learning_mode learn_mode; + int err; + + if (enable) + learn_mode = DPSW_LEARNING_MODE_HW; + else + learn_mode = DPSW_LEARNING_MODE_DIS; + + err = dpsw_if_set_learning_mode(ethsw->mc_io, 0, ethsw->dpsw_handle, + port_priv->idx, learn_mode); + if (err) + netdev_err(port_priv->netdev, "dpsw_if_set_learning_mode err %d\n", err); + + if (!enable) + dpaa2_switch_port_fast_age(port_priv); + + return err; +} + +static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev, + u8 state) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + int err; + + err = dpaa2_switch_port_set_stp_state(port_priv, state); + if (err) + return err; + + switch (state) { + case BR_STATE_DISABLED: + case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: + err = dpaa2_switch_port_set_learning(port_priv, false); + break; + case BR_STATE_LEARNING: + case BR_STATE_FORWARDING: + err = dpaa2_switch_port_set_learning(port_priv, + port_priv->learn_ena); + break; + } + + return err; +} + +static int dpaa2_switch_port_flood(struct ethsw_port_priv *port_priv, + struct switchdev_brport_flags flags) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + + if (flags.mask & BR_BCAST_FLOOD) + port_priv->bcast_flood = !!(flags.val & BR_BCAST_FLOOD); + + if (flags.mask & BR_FLOOD) + port_priv->ucast_flood = !!(flags.val & BR_FLOOD); + + return dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id); +} + +static int dpaa2_switch_port_pre_bridge_flags(struct net_device *netdev, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) +{ + if (flags.mask & ~(BR_LEARNING | BR_BCAST_FLOOD | BR_FLOOD | + BR_MCAST_FLOOD)) + return -EINVAL; + + if (flags.mask & (BR_FLOOD | BR_MCAST_FLOOD)) { + bool multicast = !!(flags.val & BR_MCAST_FLOOD); + bool unicast = !!(flags.val & BR_FLOOD); + + if (unicast != multicast) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot configure multicast flooding independently of unicast"); + return -EINVAL; + } + } + + return 0; +} + +static int dpaa2_switch_port_bridge_flags(struct net_device *netdev, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + int err; + + if (flags.mask & BR_LEARNING) { + bool learn_ena = !!(flags.val & BR_LEARNING); + + err = dpaa2_switch_port_set_learning(port_priv, learn_ena); + if (err) + return err; + port_priv->learn_ena = learn_ena; + } + + if (flags.mask & (BR_BCAST_FLOOD | BR_FLOOD | BR_MCAST_FLOOD)) { + err = dpaa2_switch_port_flood(port_priv, flags); + if (err) + return err; + } + + return 0; +} + +static int dpaa2_switch_port_attr_set(struct net_device *netdev, + const struct switchdev_attr *attr, + struct netlink_ext_ack *extack) +{ + int err = 0; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + err = dpaa2_switch_port_attr_stp_state_set(netdev, + attr->u.stp_state); + break; + case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: + if (!attr->u.vlan_filtering) { + NL_SET_ERR_MSG_MOD(extack, + "The DPAA2 switch does not support VLAN-unaware operation"); + return -EOPNOTSUPP; + } + break; + case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + err = dpaa2_switch_port_pre_bridge_flags(netdev, attr->u.brport_flags, extack); + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + err = dpaa2_switch_port_bridge_flags(netdev, attr->u.brport_flags, extack); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +int dpaa2_switch_port_vlans_add(struct net_device *netdev, + const struct switchdev_obj_port_vlan *vlan) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpsw_attr *attr = ðsw->sw_attr; + int err = 0; + + /* Make sure that the VLAN is not already configured + * on the switch port + */ + if (port_priv->vlans[vlan->vid] & ETHSW_VLAN_MEMBER) + return -EEXIST; + + /* Check if there is space for a new VLAN */ + err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle, + ðsw->sw_attr); + if (err) { + netdev_err(netdev, "dpsw_get_attributes err %d\n", err); + return err; + } + if (attr->max_vlans - attr->num_vlans < 1) + return -ENOSPC; + + /* Check if there is space for a new VLAN */ + err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle, + ðsw->sw_attr); + if (err) { + netdev_err(netdev, "dpsw_get_attributes err %d\n", err); + return err; + } + if (attr->max_vlans - attr->num_vlans < 1) + return -ENOSPC; + + if (!port_priv->ethsw_data->vlans[vlan->vid]) { + /* this is a new VLAN */ + err = dpaa2_switch_add_vlan(port_priv, vlan->vid); + if (err) + return err; + + port_priv->ethsw_data->vlans[vlan->vid] |= ETHSW_VLAN_GLOBAL; + } + + return dpaa2_switch_port_add_vlan(port_priv, vlan->vid, vlan->flags); +} + +static int dpaa2_switch_port_lookup_address(struct net_device *netdev, int is_uc, + const unsigned char *addr) +{ + struct netdev_hw_addr_list *list = (is_uc) ? &netdev->uc : &netdev->mc; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(netdev); + list_for_each_entry(ha, &list->list, list) { + if (ether_addr_equal(ha->addr, addr)) { + netif_addr_unlock_bh(netdev); + return 1; + } + } + netif_addr_unlock_bh(netdev); + return 0; +} + +static int dpaa2_switch_port_mdb_add(struct net_device *netdev, + const struct switchdev_obj_port_mdb *mdb) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + int err; + + /* Check if address is already set on this port */ + if (dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr)) + return -EEXIST; + + err = dpaa2_switch_port_fdb_add_mc(port_priv, mdb->addr); + if (err) + return err; + + err = dev_mc_add(netdev, mdb->addr); + if (err) { + netdev_err(netdev, "dev_mc_add err %d\n", err); + dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr); + } + + return err; +} + +static int dpaa2_switch_port_obj_add(struct net_device *netdev, + const struct switchdev_obj *obj) +{ + int err; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = dpaa2_switch_port_vlans_add(netdev, + SWITCHDEV_OBJ_PORT_VLAN(obj)); + break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dpaa2_switch_port_mdb_add(netdev, + SWITCHDEV_OBJ_PORT_MDB(obj)); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int dpaa2_switch_port_del_vlan(struct ethsw_port_priv *port_priv, u16 vid) +{ + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct net_device *netdev = port_priv->netdev; + struct dpsw_vlan_if_cfg vcfg; + int i, err; + + if (!port_priv->vlans[vid]) + return -ENOENT; + + if (port_priv->vlans[vid] & ETHSW_VLAN_PVID) { + /* If we are deleting the PVID of a port, use VLAN 4095 instead + * as we are sure that neither the bridge nor the 8021q module + * will use it + */ + err = dpaa2_switch_port_set_pvid(port_priv, 4095); + if (err) + return err; + } + + vcfg.num_ifs = 1; + vcfg.if_id[0] = port_priv->idx; + if (port_priv->vlans[vid] & ETHSW_VLAN_UNTAGGED) { + err = dpsw_vlan_remove_if_untagged(ethsw->mc_io, 0, + ethsw->dpsw_handle, + vid, &vcfg); + if (err) { + netdev_err(netdev, + "dpsw_vlan_remove_if_untagged err %d\n", + err); + } + port_priv->vlans[vid] &= ~ETHSW_VLAN_UNTAGGED; + } + + if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) { + err = dpsw_vlan_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle, + vid, &vcfg); + if (err) { + netdev_err(netdev, + "dpsw_vlan_remove_if err %d\n", err); + return err; + } + port_priv->vlans[vid] &= ~ETHSW_VLAN_MEMBER; + + /* Delete VLAN from switch if it is no longer configured on + * any port + */ + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) + if (ethsw->ports[i]->vlans[vid] & ETHSW_VLAN_MEMBER) + return 0; /* Found a port member in VID */ + + ethsw->vlans[vid] &= ~ETHSW_VLAN_GLOBAL; + + err = dpaa2_switch_dellink(ethsw, vid); + if (err) + return err; + } + + return 0; +} + +int dpaa2_switch_port_vlans_del(struct net_device *netdev, + const struct switchdev_obj_port_vlan *vlan) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + + if (netif_is_bridge_master(vlan->obj.orig_dev)) + return -EOPNOTSUPP; + + return dpaa2_switch_port_del_vlan(port_priv, vlan->vid); +} + +static int dpaa2_switch_port_mdb_del(struct net_device *netdev, + const struct switchdev_obj_port_mdb *mdb) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + int err; + + if (!dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr)) + return -ENOENT; + + err = dpaa2_switch_port_fdb_del_mc(port_priv, mdb->addr); + if (err) + return err; + + err = dev_mc_del(netdev, mdb->addr); + if (err) { + netdev_err(netdev, "dev_mc_del err %d\n", err); + return err; + } + + return err; +} + +static int dpaa2_switch_port_obj_del(struct net_device *netdev, + const struct switchdev_obj *obj) +{ + int err; + + switch (obj->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + err = dpaa2_switch_port_vlans_del(netdev, SWITCHDEV_OBJ_PORT_VLAN(obj)); + break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dpaa2_switch_port_mdb_del(netdev, SWITCHDEV_OBJ_PORT_MDB(obj)); + break; + default: + err = -EOPNOTSUPP; + break; + } + return err; +} + +static int dpaa2_switch_port_attr_set_event(struct net_device *netdev, + struct switchdev_notifier_port_attr_info *ptr) +{ + int err; + + err = switchdev_handle_port_attr_set(netdev, ptr, + dpaa2_switch_port_dev_check, + dpaa2_switch_port_attr_set); + return notifier_from_errno(err); +} + +static int dpaa2_switch_port_bridge_join(struct net_device *netdev, + struct net_device *upper_dev) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct ethsw_port_priv *other_port_priv; + struct net_device *other_dev; + struct list_head *iter; + bool learn_ena; + int err; + + netdev_for_each_lower_dev(upper_dev, other_dev, iter) { + if (!dpaa2_switch_port_dev_check(other_dev)) + continue; + + other_port_priv = netdev_priv(other_dev); + if (other_port_priv->ethsw_data != port_priv->ethsw_data) { + netdev_err(netdev, + "Interface from a different DPSW is in the bridge already!\n"); + return -EINVAL; + } + } + + /* Delete the previously manually installed VLAN 1 */ + err = dpaa2_switch_port_del_vlan(port_priv, 1); + if (err) + return err; + + dpaa2_switch_port_set_fdb(port_priv, upper_dev); + + /* Inherit the initial bridge port learning state */ + learn_ena = br_port_flag_is_set(netdev, BR_LEARNING); + err = dpaa2_switch_port_set_learning(port_priv, learn_ena); + port_priv->learn_ena = learn_ena; + + /* Setup the egress flood policy (broadcast, unknown unicast) */ + err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id); + if (err) + goto err_egress_flood; + + return 0; + +err_egress_flood: + dpaa2_switch_port_set_fdb(port_priv, NULL); + return err; +} + +static int dpaa2_switch_port_clear_rxvlan(struct net_device *vdev, int vid, void *arg) +{ + __be16 vlan_proto = htons(ETH_P_8021Q); + + if (vdev) + vlan_proto = vlan_dev_vlan_proto(vdev); + + return dpaa2_switch_port_vlan_kill(arg, vlan_proto, vid); +} + +static int dpaa2_switch_port_restore_rxvlan(struct net_device *vdev, int vid, void *arg) +{ + __be16 vlan_proto = htons(ETH_P_8021Q); + + if (vdev) + vlan_proto = vlan_dev_vlan_proto(vdev); + + return dpaa2_switch_port_vlan_add(arg, vlan_proto, vid); +} + +static int dpaa2_switch_port_bridge_leave(struct net_device *netdev) +{ + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + struct dpaa2_switch_fdb *old_fdb = port_priv->fdb; + struct ethsw_core *ethsw = port_priv->ethsw_data; + int err; + + /* First of all, fast age any learn FDB addresses on this switch port */ + dpaa2_switch_port_fast_age(port_priv); + + /* Clear all RX VLANs installed through vlan_vid_add() either as VLAN + * upper devices or otherwise from the FDB table that we are about to + * leave + */ + err = vlan_for_each(netdev, dpaa2_switch_port_clear_rxvlan, netdev); + if (err) + netdev_err(netdev, "Unable to clear RX VLANs from old FDB table, err (%d)\n", err); + + dpaa2_switch_port_set_fdb(port_priv, NULL); + + /* Restore all RX VLANs into the new FDB table that we just joined */ + err = vlan_for_each(netdev, dpaa2_switch_port_restore_rxvlan, netdev); + if (err) + netdev_err(netdev, "Unable to restore RX VLANs to the new FDB, err (%d)\n", err); + + /* Reset the flooding state to denote that this port can send any + * packet in standalone mode. With this, we are also ensuring that any + * later bridge join will have the flooding flag on. + */ + port_priv->bcast_flood = true; + port_priv->ucast_flood = true; + + /* Setup the egress flood policy (broadcast, unknown unicast). + * When the port is not under a bridge, only the CTRL interface is part + * of the flooding domain besides the actual port + */ + err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id); + if (err) + return err; + + /* Recreate the egress flood domain of the FDB that we just left */ + err = dpaa2_switch_fdb_set_egress_flood(ethsw, old_fdb->fdb_id); + if (err) + return err; + + /* No HW learning when not under a bridge */ + err = dpaa2_switch_port_set_learning(port_priv, false); + if (err) + return err; + port_priv->learn_ena = false; + + /* Add the VLAN 1 as PVID when not under a bridge. We need this since + * the dpaa2 switch interfaces are not capable to be VLAN unaware + */ + return dpaa2_switch_port_add_vlan(port_priv, DEFAULT_VLAN_ID, + BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_PVID); +} + +static int dpaa2_switch_prevent_bridging_with_8021q_upper(struct net_device *netdev) +{ + struct net_device *upper_dev; + struct list_head *iter; + + /* RCU read lock not necessary because we have write-side protection + * (rtnl_mutex), however a non-rcu iterator does not exist. + */ + netdev_for_each_upper_dev_rcu(netdev, upper_dev, iter) + if (is_vlan_dev(upper_dev)) + return -EOPNOTSUPP; + + return 0; +} + +static int dpaa2_switch_port_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *upper_dev; + int err = 0; + + if (!dpaa2_switch_port_dev_check(netdev)) + return NOTIFY_DONE; + + extack = netdev_notifier_info_to_extack(&info->info); + + switch (event) { + case NETDEV_PRECHANGEUPPER: + upper_dev = info->upper_dev; + if (!netif_is_bridge_master(upper_dev)) + break; + + if (!br_vlan_enabled(upper_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot join a VLAN-unaware bridge"); + err = -EOPNOTSUPP; + goto out; + } + + err = dpaa2_switch_prevent_bridging_with_8021q_upper(netdev); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot join a bridge while VLAN uppers are present"); + goto out; + } + + break; + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (netif_is_bridge_master(upper_dev)) { + if (info->linking) + err = dpaa2_switch_port_bridge_join(netdev, upper_dev); + else + err = dpaa2_switch_port_bridge_leave(netdev); + } + break; + } + +out: + return notifier_from_errno(err); +} + +struct ethsw_switchdev_event_work { + struct work_struct work; + struct switchdev_notifier_fdb_info fdb_info; + struct net_device *dev; + unsigned long event; +}; + +static void dpaa2_switch_event_work(struct work_struct *work) +{ + struct ethsw_switchdev_event_work *switchdev_work = + container_of(work, struct ethsw_switchdev_event_work, work); + struct net_device *dev = switchdev_work->dev; + struct switchdev_notifier_fdb_info *fdb_info; + int err; + + rtnl_lock(); + fdb_info = &switchdev_work->fdb_info; + + switch (switchdev_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + if (!fdb_info->added_by_user || fdb_info->is_local) + break; + if (is_unicast_ether_addr(fdb_info->addr)) + err = dpaa2_switch_port_fdb_add_uc(netdev_priv(dev), + fdb_info->addr); + else + err = dpaa2_switch_port_fdb_add_mc(netdev_priv(dev), + fdb_info->addr); + if (err) + break; + fdb_info->offloaded = true; + call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev, + &fdb_info->info, NULL); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + if (!fdb_info->added_by_user || fdb_info->is_local) + break; + if (is_unicast_ether_addr(fdb_info->addr)) + dpaa2_switch_port_fdb_del_uc(netdev_priv(dev), fdb_info->addr); + else + dpaa2_switch_port_fdb_del_mc(netdev_priv(dev), fdb_info->addr); + break; + } + + rtnl_unlock(); + kfree(switchdev_work->fdb_info.addr); + kfree(switchdev_work); + dev_put(dev); +} + +/* Called under rcu_read_lock() */ +static int dpaa2_switch_port_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct ethsw_port_priv *port_priv = netdev_priv(dev); + struct ethsw_switchdev_event_work *switchdev_work; + struct switchdev_notifier_fdb_info *fdb_info = ptr; + struct ethsw_core *ethsw = port_priv->ethsw_data; + + if (event == SWITCHDEV_PORT_ATTR_SET) + return dpaa2_switch_port_attr_set_event(dev, ptr); + + if (!dpaa2_switch_port_dev_check(dev)) + return NOTIFY_DONE; + + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); + if (!switchdev_work) + return NOTIFY_BAD; + + INIT_WORK(&switchdev_work->work, dpaa2_switch_event_work); + switchdev_work->dev = dev; + switchdev_work->event = event; + + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + case SWITCHDEV_FDB_DEL_TO_DEVICE: + memcpy(&switchdev_work->fdb_info, ptr, + sizeof(switchdev_work->fdb_info)); + switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); + if (!switchdev_work->fdb_info.addr) + goto err_addr_alloc; + + ether_addr_copy((u8 *)switchdev_work->fdb_info.addr, + fdb_info->addr); + + /* Take a reference on the device to avoid being freed. */ + dev_hold(dev); + break; + default: + kfree(switchdev_work); + return NOTIFY_DONE; + } + + queue_work(ethsw->workqueue, &switchdev_work->work); + + return NOTIFY_DONE; + +err_addr_alloc: + kfree(switchdev_work); + return NOTIFY_BAD; +} + +static int dpaa2_switch_port_obj_event(unsigned long event, + struct net_device *netdev, + struct switchdev_notifier_port_obj_info *port_obj_info) +{ + int err = -EOPNOTSUPP; + + if (!dpaa2_switch_port_dev_check(netdev)) + return NOTIFY_DONE; + + switch (event) { + case SWITCHDEV_PORT_OBJ_ADD: + err = dpaa2_switch_port_obj_add(netdev, port_obj_info->obj); + break; + case SWITCHDEV_PORT_OBJ_DEL: + err = dpaa2_switch_port_obj_del(netdev, port_obj_info->obj); + break; + } + + port_obj_info->handled = true; + return notifier_from_errno(err); +} + +static int dpaa2_switch_port_blocking_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + + switch (event) { + case SWITCHDEV_PORT_OBJ_ADD: + case SWITCHDEV_PORT_OBJ_DEL: + return dpaa2_switch_port_obj_event(event, dev, ptr); + case SWITCHDEV_PORT_ATTR_SET: + return dpaa2_switch_port_attr_set_event(dev, ptr); + } + + return NOTIFY_DONE; +} + +/* Build a linear skb based on a single-buffer frame descriptor */ +static struct sk_buff *dpaa2_switch_build_linear_skb(struct ethsw_core *ethsw, + const struct dpaa2_fd *fd) +{ + u16 fd_offset = dpaa2_fd_get_offset(fd); + dma_addr_t addr = dpaa2_fd_get_addr(fd); + u32 fd_length = dpaa2_fd_get_len(fd); + struct device *dev = ethsw->dev; + struct sk_buff *skb = NULL; + void *fd_vaddr; + + fd_vaddr = dpaa2_iova_to_virt(ethsw->iommu_domain, addr); + dma_unmap_page(dev, addr, DPAA2_SWITCH_RX_BUF_SIZE, + DMA_FROM_DEVICE); + + skb = build_skb(fd_vaddr, DPAA2_SWITCH_RX_BUF_SIZE + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (unlikely(!skb)) { + dev_err(dev, "build_skb() failed\n"); + return NULL; + } + + skb_reserve(skb, fd_offset); + skb_put(skb, fd_length); + + ethsw->buf_count--; + + return skb; +} + +static void dpaa2_switch_tx_conf(struct dpaa2_switch_fq *fq, + const struct dpaa2_fd *fd) +{ + dpaa2_switch_free_fd(fq->ethsw, fd); +} + +static void dpaa2_switch_rx(struct dpaa2_switch_fq *fq, + const struct dpaa2_fd *fd) +{ + struct ethsw_core *ethsw = fq->ethsw; + struct ethsw_port_priv *port_priv; + struct net_device *netdev; + struct vlan_ethhdr *hdr; + struct sk_buff *skb; + u16 vlan_tci, vid; + int if_id, err; + + /* get switch ingress interface ID */ + if_id = upper_32_bits(dpaa2_fd_get_flc(fd)) & 0x0000FFFF; + + if (if_id >= ethsw->sw_attr.num_ifs) { + dev_err(ethsw->dev, "Frame received from unknown interface!\n"); + goto err_free_fd; + } + port_priv = ethsw->ports[if_id]; + netdev = port_priv->netdev; + + /* build the SKB based on the FD received */ + if (dpaa2_fd_get_format(fd) != dpaa2_fd_single) { + if (net_ratelimit()) { + netdev_err(netdev, "Received invalid frame format\n"); + goto err_free_fd; + } + } + + skb = dpaa2_switch_build_linear_skb(ethsw, fd); + if (unlikely(!skb)) + goto err_free_fd; + + skb_reset_mac_header(skb); + + /* Remove the VLAN header if the packet that we just received has a vid + * equal to the port PVIDs. Since the dpaa2-switch can operate only in + * VLAN-aware mode and no alterations are made on the packet when it's + * redirected/mirrored to the control interface, we are sure that there + * will always be a VLAN header present. + */ + hdr = vlan_eth_hdr(skb); + vid = ntohs(hdr->h_vlan_TCI) & VLAN_VID_MASK; + if (vid == port_priv->pvid) { + err = __skb_vlan_pop(skb, &vlan_tci); + if (err) { + dev_info(ethsw->dev, "__skb_vlan_pop() returned %d", err); + goto err_free_fd; + } + } + + skb->dev = netdev; + skb->protocol = eth_type_trans(skb, skb->dev); + + /* Setup the offload_fwd_mark only if the port is under a bridge */ + skb->offload_fwd_mark = !!(port_priv->fdb->bridge_dev); + + netif_receive_skb(skb); + + return; + +err_free_fd: + dpaa2_switch_free_fd(ethsw, fd); +} + +static void dpaa2_switch_detect_features(struct ethsw_core *ethsw) +{ + ethsw->features = 0; + + if (ethsw->major > 8 || (ethsw->major == 8 && ethsw->minor >= 6)) + ethsw->features |= ETHSW_FEATURE_MAC_ADDR; +} + +static int dpaa2_switch_setup_fqs(struct ethsw_core *ethsw) +{ + struct dpsw_ctrl_if_attr ctrl_if_attr; + struct device *dev = ethsw->dev; + int i = 0; + int err; + + err = dpsw_ctrl_if_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle, + &ctrl_if_attr); + if (err) { + dev_err(dev, "dpsw_ctrl_if_get_attributes() = %d\n", err); + return err; + } + + ethsw->fq[i].fqid = ctrl_if_attr.rx_fqid; + ethsw->fq[i].ethsw = ethsw; + ethsw->fq[i++].type = DPSW_QUEUE_RX; + + ethsw->fq[i].fqid = ctrl_if_attr.tx_err_conf_fqid; + ethsw->fq[i].ethsw = ethsw; + ethsw->fq[i++].type = DPSW_QUEUE_TX_ERR_CONF; + + return 0; +} + +/* Free buffers acquired from the buffer pool or which were meant to + * be released in the pool + */ +static void dpaa2_switch_free_bufs(struct ethsw_core *ethsw, u64 *buf_array, int count) +{ + struct device *dev = ethsw->dev; + void *vaddr; + int i; + + for (i = 0; i < count; i++) { + vaddr = dpaa2_iova_to_virt(ethsw->iommu_domain, buf_array[i]); + dma_unmap_page(dev, buf_array[i], DPAA2_SWITCH_RX_BUF_SIZE, + DMA_FROM_DEVICE); + free_pages((unsigned long)vaddr, 0); + } +} + +/* Perform a single release command to add buffers + * to the specified buffer pool + */ +static int dpaa2_switch_add_bufs(struct ethsw_core *ethsw, u16 bpid) +{ + struct device *dev = ethsw->dev; + u64 buf_array[BUFS_PER_CMD]; + struct page *page; + int retries = 0; + dma_addr_t addr; + int err; + int i; + + for (i = 0; i < BUFS_PER_CMD; i++) { + /* Allocate one page for each Rx buffer. WRIOP sees + * the entire page except for a tailroom reserved for + * skb shared info + */ + page = dev_alloc_pages(0); + if (!page) { + dev_err(dev, "buffer allocation failed\n"); + goto err_alloc; + } + + addr = dma_map_page(dev, page, 0, DPAA2_SWITCH_RX_BUF_SIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, addr)) { + dev_err(dev, "dma_map_single() failed\n"); + goto err_map; + } + buf_array[i] = addr; + } + +release_bufs: + /* In case the portal is busy, retry until successful or + * max retries hit. + */ + while ((err = dpaa2_io_service_release(NULL, bpid, + buf_array, i)) == -EBUSY) { + if (retries++ >= DPAA2_SWITCH_SWP_BUSY_RETRIES) + break; + + cpu_relax(); + } + + /* If release command failed, clean up and bail out. */ + if (err) { + dpaa2_switch_free_bufs(ethsw, buf_array, i); + return 0; + } + + return i; + +err_map: + __free_pages(page, 0); +err_alloc: + /* If we managed to allocate at least some buffers, + * release them to hardware + */ + if (i) + goto release_bufs; + + return 0; +} + +static int dpaa2_switch_refill_bp(struct ethsw_core *ethsw) +{ + int *count = ðsw->buf_count; + int new_count; + int err = 0; + + if (unlikely(*count < DPAA2_ETHSW_REFILL_THRESH)) { + do { + new_count = dpaa2_switch_add_bufs(ethsw, ethsw->bpid); + if (unlikely(!new_count)) { + /* Out of memory; abort for now, we'll + * try later on + */ + break; + } + *count += new_count; + } while (*count < DPAA2_ETHSW_NUM_BUFS); + + if (unlikely(*count < DPAA2_ETHSW_NUM_BUFS)) + err = -ENOMEM; + } + + return err; +} + +static int dpaa2_switch_seed_bp(struct ethsw_core *ethsw) +{ + int *count, i; + + for (i = 0; i < DPAA2_ETHSW_NUM_BUFS; i += BUFS_PER_CMD) { + count = ðsw->buf_count; + *count += dpaa2_switch_add_bufs(ethsw, ethsw->bpid); + + if (unlikely(*count < BUFS_PER_CMD)) + return -ENOMEM; + } + + return 0; +} + +static void dpaa2_switch_drain_bp(struct ethsw_core *ethsw) +{ + u64 buf_array[BUFS_PER_CMD]; + int ret; + + do { + ret = dpaa2_io_service_acquire(NULL, ethsw->bpid, + buf_array, BUFS_PER_CMD); + if (ret < 0) { + dev_err(ethsw->dev, + "dpaa2_io_service_acquire() = %d\n", ret); + return; + } + dpaa2_switch_free_bufs(ethsw, buf_array, ret); + + } while (ret); +} + +static int dpaa2_switch_setup_dpbp(struct ethsw_core *ethsw) +{ + struct dpsw_ctrl_if_pools_cfg dpsw_ctrl_if_pools_cfg = { 0 }; + struct device *dev = ethsw->dev; + struct fsl_mc_device *dpbp_dev; + struct dpbp_attr dpbp_attrs; + int err; + + err = fsl_mc_object_allocate(to_fsl_mc_device(dev), FSL_MC_POOL_DPBP, + &dpbp_dev); + if (err) { + if (err == -ENXIO) + err = -EPROBE_DEFER; + else + dev_err(dev, "DPBP device allocation failed\n"); + return err; + } + ethsw->dpbp_dev = dpbp_dev; + + err = dpbp_open(ethsw->mc_io, 0, dpbp_dev->obj_desc.id, + &dpbp_dev->mc_handle); + if (err) { + dev_err(dev, "dpbp_open() failed\n"); + goto err_open; + } + + err = dpbp_reset(ethsw->mc_io, 0, dpbp_dev->mc_handle); + if (err) { + dev_err(dev, "dpbp_reset() failed\n"); + goto err_reset; + } + + err = dpbp_enable(ethsw->mc_io, 0, dpbp_dev->mc_handle); + if (err) { + dev_err(dev, "dpbp_enable() failed\n"); + goto err_enable; + } + + err = dpbp_get_attributes(ethsw->mc_io, 0, dpbp_dev->mc_handle, + &dpbp_attrs); + if (err) { + dev_err(dev, "dpbp_get_attributes() failed\n"); + goto err_get_attr; + } + + dpsw_ctrl_if_pools_cfg.num_dpbp = 1; + dpsw_ctrl_if_pools_cfg.pools[0].dpbp_id = dpbp_attrs.id; + dpsw_ctrl_if_pools_cfg.pools[0].buffer_size = DPAA2_SWITCH_RX_BUF_SIZE; + dpsw_ctrl_if_pools_cfg.pools[0].backup_pool = 0; + + err = dpsw_ctrl_if_set_pools(ethsw->mc_io, 0, ethsw->dpsw_handle, + &dpsw_ctrl_if_pools_cfg); + if (err) { + dev_err(dev, "dpsw_ctrl_if_set_pools() failed\n"); + goto err_get_attr; + } + ethsw->bpid = dpbp_attrs.id; + + return 0; + +err_get_attr: + dpbp_disable(ethsw->mc_io, 0, dpbp_dev->mc_handle); +err_enable: +err_reset: + dpbp_close(ethsw->mc_io, 0, dpbp_dev->mc_handle); +err_open: + fsl_mc_object_free(dpbp_dev); + return err; +} + +static void dpaa2_switch_free_dpbp(struct ethsw_core *ethsw) +{ + dpbp_disable(ethsw->mc_io, 0, ethsw->dpbp_dev->mc_handle); + dpbp_close(ethsw->mc_io, 0, ethsw->dpbp_dev->mc_handle); + fsl_mc_object_free(ethsw->dpbp_dev); +} + +static int dpaa2_switch_alloc_rings(struct ethsw_core *ethsw) +{ + int i; + + for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) { + ethsw->fq[i].store = + dpaa2_io_store_create(DPAA2_SWITCH_STORE_SIZE, + ethsw->dev); + if (!ethsw->fq[i].store) { + dev_err(ethsw->dev, "dpaa2_io_store_create failed\n"); + while (--i >= 0) + dpaa2_io_store_destroy(ethsw->fq[i].store); + return -ENOMEM; + } + } + + return 0; +} + +static void dpaa2_switch_destroy_rings(struct ethsw_core *ethsw) +{ + int i; + + for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) + dpaa2_io_store_destroy(ethsw->fq[i].store); +} + +static int dpaa2_switch_pull_fq(struct dpaa2_switch_fq *fq) +{ + int err, retries = 0; + + /* Try to pull from the FQ while the portal is busy and we didn't hit + * the maximum number fo retries + */ + do { + err = dpaa2_io_service_pull_fq(NULL, fq->fqid, fq->store); + cpu_relax(); + } while (err == -EBUSY && retries++ < DPAA2_SWITCH_SWP_BUSY_RETRIES); + + if (unlikely(err)) + dev_err(fq->ethsw->dev, "dpaa2_io_service_pull err %d", err); + + return err; +} + +/* Consume all frames pull-dequeued into the store */ +static int dpaa2_switch_store_consume(struct dpaa2_switch_fq *fq) +{ + struct ethsw_core *ethsw = fq->ethsw; + int cleaned = 0, is_last; + struct dpaa2_dq *dq; + int retries = 0; + + do { + /* Get the next available FD from the store */ + dq = dpaa2_io_store_next(fq->store, &is_last); + if (unlikely(!dq)) { + if (retries++ >= DPAA2_SWITCH_SWP_BUSY_RETRIES) { + dev_err_once(ethsw->dev, + "No valid dequeue response\n"); + return -ETIMEDOUT; + } + continue; + } + + if (fq->type == DPSW_QUEUE_RX) + dpaa2_switch_rx(fq, dpaa2_dq_fd(dq)); + else + dpaa2_switch_tx_conf(fq, dpaa2_dq_fd(dq)); + cleaned++; + + } while (!is_last); + + return cleaned; +} + +/* NAPI poll routine */ +static int dpaa2_switch_poll(struct napi_struct *napi, int budget) +{ + int err, cleaned = 0, store_cleaned, work_done; + struct dpaa2_switch_fq *fq; + int retries = 0; + + fq = container_of(napi, struct dpaa2_switch_fq, napi); + + do { + err = dpaa2_switch_pull_fq(fq); + if (unlikely(err)) + break; + + /* Refill pool if appropriate */ + dpaa2_switch_refill_bp(fq->ethsw); + + store_cleaned = dpaa2_switch_store_consume(fq); + cleaned += store_cleaned; + + if (cleaned >= budget) { + work_done = budget; + goto out; + } + + } while (store_cleaned); + + /* We didn't consume the entire budget, so finish napi and re-enable + * data availability notifications + */ + napi_complete_done(napi, cleaned); + do { + err = dpaa2_io_service_rearm(NULL, &fq->nctx); + cpu_relax(); + } while (err == -EBUSY && retries++ < DPAA2_SWITCH_SWP_BUSY_RETRIES); + + work_done = max(cleaned, 1); +out: + + return work_done; +} + +static void dpaa2_switch_fqdan_cb(struct dpaa2_io_notification_ctx *nctx) +{ + struct dpaa2_switch_fq *fq; + + fq = container_of(nctx, struct dpaa2_switch_fq, nctx); + + napi_schedule(&fq->napi); +} + +static int dpaa2_switch_setup_dpio(struct ethsw_core *ethsw) +{ + struct dpsw_ctrl_if_queue_cfg queue_cfg; + struct dpaa2_io_notification_ctx *nctx; + int err, i, j; + + for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) { + nctx = ðsw->fq[i].nctx; + + /* Register a new software context for the FQID. + * By using NULL as the first parameter, we specify that we do + * not care on which cpu are interrupts received for this queue + */ + nctx->is_cdan = 0; + nctx->id = ethsw->fq[i].fqid; + nctx->desired_cpu = DPAA2_IO_ANY_CPU; + nctx->cb = dpaa2_switch_fqdan_cb; + err = dpaa2_io_service_register(NULL, nctx, ethsw->dev); + if (err) { + err = -EPROBE_DEFER; + goto err_register; + } + + queue_cfg.options = DPSW_CTRL_IF_QUEUE_OPT_DEST | + DPSW_CTRL_IF_QUEUE_OPT_USER_CTX; + queue_cfg.dest_cfg.dest_type = DPSW_CTRL_IF_DEST_DPIO; + queue_cfg.dest_cfg.dest_id = nctx->dpio_id; + queue_cfg.dest_cfg.priority = 0; + queue_cfg.user_ctx = nctx->qman64; + + err = dpsw_ctrl_if_set_queue(ethsw->mc_io, 0, + ethsw->dpsw_handle, + ethsw->fq[i].type, + &queue_cfg); + if (err) + goto err_set_queue; + } + + return 0; + +err_set_queue: + dpaa2_io_service_deregister(NULL, nctx, ethsw->dev); +err_register: + for (j = 0; j < i; j++) + dpaa2_io_service_deregister(NULL, ðsw->fq[j].nctx, + ethsw->dev); + + return err; +} + +static void dpaa2_switch_free_dpio(struct ethsw_core *ethsw) +{ + int i; + + for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) + dpaa2_io_service_deregister(NULL, ðsw->fq[i].nctx, + ethsw->dev); +} + +static int dpaa2_switch_ctrl_if_setup(struct ethsw_core *ethsw) +{ + int err; + + /* setup FQs for Rx and Tx Conf */ + err = dpaa2_switch_setup_fqs(ethsw); + if (err) + return err; + + /* setup the buffer pool needed on the Rx path */ + err = dpaa2_switch_setup_dpbp(ethsw); + if (err) + return err; + + err = dpaa2_switch_seed_bp(ethsw); + if (err) + goto err_free_dpbp; + + err = dpaa2_switch_alloc_rings(ethsw); + if (err) + goto err_drain_dpbp; + + err = dpaa2_switch_setup_dpio(ethsw); + if (err) + goto err_destroy_rings; + + err = dpsw_ctrl_if_enable(ethsw->mc_io, 0, ethsw->dpsw_handle); + if (err) { + dev_err(ethsw->dev, "dpsw_ctrl_if_enable err %d\n", err); + goto err_deregister_dpio; + } + + return 0; + +err_deregister_dpio: + dpaa2_switch_free_dpio(ethsw); +err_destroy_rings: + dpaa2_switch_destroy_rings(ethsw); +err_drain_dpbp: + dpaa2_switch_drain_bp(ethsw); +err_free_dpbp: + dpaa2_switch_free_dpbp(ethsw); + + return err; +} + +static int dpaa2_switch_init(struct fsl_mc_device *sw_dev) +{ + struct device *dev = &sw_dev->dev; + struct ethsw_core *ethsw = dev_get_drvdata(dev); + struct dpsw_vlan_if_cfg vcfg = {0}; + struct dpsw_tci_cfg tci_cfg = {0}; + struct dpsw_stp_cfg stp_cfg; + int err; + u16 i; + + ethsw->dev_id = sw_dev->obj_desc.id; + + err = dpsw_open(ethsw->mc_io, 0, ethsw->dev_id, ðsw->dpsw_handle); + if (err) { + dev_err(dev, "dpsw_open err %d\n", err); + return err; + } + + err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle, + ðsw->sw_attr); + if (err) { + dev_err(dev, "dpsw_get_attributes err %d\n", err); + goto err_close; + } + + err = dpsw_get_api_version(ethsw->mc_io, 0, + ðsw->major, + ðsw->minor); + if (err) { + dev_err(dev, "dpsw_get_api_version err %d\n", err); + goto err_close; + } + + /* Minimum supported DPSW version check */ + if (ethsw->major < DPSW_MIN_VER_MAJOR || + (ethsw->major == DPSW_MIN_VER_MAJOR && + ethsw->minor < DPSW_MIN_VER_MINOR)) { + dev_err(dev, "DPSW version %d:%d not supported. Use firmware 10.28.0 or greater.\n", + ethsw->major, ethsw->minor); + err = -EOPNOTSUPP; + goto err_close; + } + + if (!dpaa2_switch_supports_cpu_traffic(ethsw)) { + err = -EOPNOTSUPP; + goto err_close; + } + + dpaa2_switch_detect_features(ethsw); + + err = dpsw_reset(ethsw->mc_io, 0, ethsw->dpsw_handle); + if (err) { + dev_err(dev, "dpsw_reset err %d\n", err); + goto err_close; + } + + stp_cfg.vlan_id = DEFAULT_VLAN_ID; + stp_cfg.state = DPSW_STP_STATE_FORWARDING; + + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { + err = dpsw_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle, i); + if (err) { + dev_err(dev, "dpsw_if_disable err %d\n", err); + goto err_close; + } + + err = dpsw_if_set_stp(ethsw->mc_io, 0, ethsw->dpsw_handle, i, + &stp_cfg); + if (err) { + dev_err(dev, "dpsw_if_set_stp err %d for port %d\n", + err, i); + goto err_close; + } + + /* Switch starts with all ports configured to VLAN 1. Need to + * remove this setting to allow configuration at bridge join + */ + vcfg.num_ifs = 1; + vcfg.if_id[0] = i; + err = dpsw_vlan_remove_if_untagged(ethsw->mc_io, 0, ethsw->dpsw_handle, + DEFAULT_VLAN_ID, &vcfg); + if (err) { + dev_err(dev, "dpsw_vlan_remove_if_untagged err %d\n", + err); + goto err_close; + } + + tci_cfg.vlan_id = 4095; + err = dpsw_if_set_tci(ethsw->mc_io, 0, ethsw->dpsw_handle, i, &tci_cfg); + if (err) { + dev_err(dev, "dpsw_if_set_tci err %d\n", err); + goto err_close; + } + + err = dpsw_vlan_remove_if(ethsw->mc_io, 0, ethsw->dpsw_handle, + DEFAULT_VLAN_ID, &vcfg); + if (err) { + dev_err(dev, "dpsw_vlan_remove_if err %d\n", err); + goto err_close; + } + } + + err = dpsw_vlan_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, DEFAULT_VLAN_ID); + if (err) { + dev_err(dev, "dpsw_vlan_remove err %d\n", err); + goto err_close; + } + + ethsw->workqueue = alloc_ordered_workqueue("%s_%d_ordered", + WQ_MEM_RECLAIM, "ethsw", + ethsw->sw_attr.id); + if (!ethsw->workqueue) { + err = -ENOMEM; + goto err_close; + } + + err = dpsw_fdb_remove(ethsw->mc_io, 0, ethsw->dpsw_handle, 0); + if (err) + goto err_destroy_ordered_workqueue; + + err = dpaa2_switch_ctrl_if_setup(ethsw); + if (err) + goto err_destroy_ordered_workqueue; + + return 0; + +err_destroy_ordered_workqueue: + destroy_workqueue(ethsw->workqueue); + +err_close: + dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle); + return err; +} + +/* Add an ACL to redirect frames with specific destination MAC address to + * control interface + */ +static int dpaa2_switch_port_trap_mac_addr(struct ethsw_port_priv *port_priv, + const char *mac) +{ + struct dpaa2_switch_acl_entry acl_entry = {0}; + + /* Match on the destination MAC address */ + ether_addr_copy(acl_entry.key.match.l2_dest_mac, mac); + eth_broadcast_addr(acl_entry.key.mask.l2_dest_mac); + + /* Trap to CPU */ + acl_entry.cfg.precedence = 0; + acl_entry.cfg.result.action = DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF; + + return dpaa2_switch_acl_entry_add(port_priv->acl_tbl, &acl_entry); +} + +static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port) +{ + const char stpa[ETH_ALEN] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; + struct switchdev_obj_port_vlan vlan = { + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .vid = DEFAULT_VLAN_ID, + .flags = BRIDGE_VLAN_INFO_UNTAGGED | BRIDGE_VLAN_INFO_PVID, + }; + struct net_device *netdev = port_priv->netdev; + struct ethsw_core *ethsw = port_priv->ethsw_data; + struct dpaa2_switch_acl_tbl *acl_tbl; + struct dpsw_fdb_cfg fdb_cfg = {0}; + struct dpsw_if_attr dpsw_if_attr; + struct dpaa2_switch_fdb *fdb; + struct dpsw_acl_cfg acl_cfg; + u16 fdb_id, acl_tbl_id; + int err; + + /* Get the Tx queue for this specific port */ + err = dpsw_if_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle, + port_priv->idx, &dpsw_if_attr); + if (err) { + netdev_err(netdev, "dpsw_if_get_attributes err %d\n", err); + return err; + } + port_priv->tx_qdid = dpsw_if_attr.qdid; + + /* Create a FDB table for this particular switch port */ + fdb_cfg.num_fdb_entries = ethsw->sw_attr.max_fdb_entries / ethsw->sw_attr.num_ifs; + err = dpsw_fdb_add(ethsw->mc_io, 0, ethsw->dpsw_handle, + &fdb_id, &fdb_cfg); + if (err) { + netdev_err(netdev, "dpsw_fdb_add err %d\n", err); + return err; + } + + /* Find an unused dpaa2_switch_fdb structure and use it */ + fdb = dpaa2_switch_fdb_get_unused(ethsw); + fdb->fdb_id = fdb_id; + fdb->in_use = true; + fdb->bridge_dev = NULL; + port_priv->fdb = fdb; + + /* We need to add VLAN 1 as the PVID on this port until it is under a + * bridge since the DPAA2 switch is not able to handle the traffic in a + * VLAN unaware fashion + */ + err = dpaa2_switch_port_vlans_add(netdev, &vlan); + if (err) + return err; + + /* Setup the egress flooding domains (broadcast, unknown unicast */ + err = dpaa2_switch_fdb_set_egress_flood(ethsw, port_priv->fdb->fdb_id); + if (err) + return err; + + /* Create an ACL table to be used by this switch port */ + acl_cfg.max_entries = DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES; + err = dpsw_acl_add(ethsw->mc_io, 0, ethsw->dpsw_handle, + &acl_tbl_id, &acl_cfg); + if (err) { + netdev_err(netdev, "dpsw_acl_add err %d\n", err); + return err; + } + + acl_tbl = dpaa2_switch_acl_tbl_get_unused(ethsw); + acl_tbl->ethsw = ethsw; + acl_tbl->id = acl_tbl_id; + acl_tbl->in_use = true; + acl_tbl->num_rules = 0; + INIT_LIST_HEAD(&acl_tbl->entries); + + err = dpaa2_switch_port_acl_tbl_bind(port_priv, acl_tbl); + if (err) + return err; + + err = dpaa2_switch_port_trap_mac_addr(port_priv, stpa); + if (err) + return err; + + return err; +} + +static void dpaa2_switch_takedown(struct fsl_mc_device *sw_dev) +{ + struct device *dev = &sw_dev->dev; + struct ethsw_core *ethsw = dev_get_drvdata(dev); + int err; + + err = dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle); + if (err) + dev_warn(dev, "dpsw_close err %d\n", err); +} + +static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw) +{ + dpsw_ctrl_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle); + dpaa2_switch_free_dpio(ethsw); + dpaa2_switch_destroy_rings(ethsw); + dpaa2_switch_drain_bp(ethsw); + dpaa2_switch_free_dpbp(ethsw); +} + +static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) +{ + struct ethsw_port_priv *port_priv; + struct ethsw_core *ethsw; + struct device *dev; + int i; + + dev = &sw_dev->dev; + ethsw = dev_get_drvdata(dev); + + dpaa2_switch_ctrl_if_teardown(ethsw); + + dpaa2_switch_teardown_irqs(sw_dev); + + dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle); + + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { + port_priv = ethsw->ports[i]; + unregister_netdev(port_priv->netdev); + free_netdev(port_priv->netdev); + } + + kfree(ethsw->fdbs); + kfree(ethsw->acls); + kfree(ethsw->ports); + + dpaa2_switch_takedown(sw_dev); + + destroy_workqueue(ethsw->workqueue); + + fsl_mc_portal_free(ethsw->mc_io); + + kfree(ethsw); + + dev_set_drvdata(dev, NULL); + + return 0; +} + +static int dpaa2_switch_probe_port(struct ethsw_core *ethsw, + u16 port_idx) +{ + struct ethsw_port_priv *port_priv; + struct device *dev = ethsw->dev; + struct net_device *port_netdev; + int err; + + port_netdev = alloc_etherdev(sizeof(struct ethsw_port_priv)); + if (!port_netdev) { + dev_err(dev, "alloc_etherdev error\n"); + return -ENOMEM; + } + + port_priv = netdev_priv(port_netdev); + port_priv->netdev = port_netdev; + port_priv->ethsw_data = ethsw; + + port_priv->idx = port_idx; + port_priv->stp_state = BR_STATE_FORWARDING; + + SET_NETDEV_DEV(port_netdev, dev); + port_netdev->netdev_ops = &dpaa2_switch_port_ops; + port_netdev->ethtool_ops = &dpaa2_switch_port_ethtool_ops; + + port_netdev->needed_headroom = DPAA2_SWITCH_NEEDED_HEADROOM; + + port_priv->bcast_flood = true; + port_priv->ucast_flood = true; + + /* Set MTU limits */ + port_netdev->min_mtu = ETH_MIN_MTU; + port_netdev->max_mtu = ETHSW_MAX_FRAME_LENGTH; + + /* Populate the private port structure so that later calls to + * dpaa2_switch_port_init() can use it. + */ + ethsw->ports[port_idx] = port_priv; + + /* The DPAA2 switch's ingress path depends on the VLAN table, + * thus we are not able to disable VLAN filtering. + */ + port_netdev->features = NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_STAG_FILTER | + NETIF_F_HW_TC; + + err = dpaa2_switch_port_init(port_priv, port_idx); + if (err) + goto err_port_probe; + + err = dpaa2_switch_port_set_mac_addr(port_priv); + if (err) + goto err_port_probe; + + err = dpaa2_switch_port_set_learning(port_priv, false); + if (err) + goto err_port_probe; + port_priv->learn_ena = false; + + return 0; + +err_port_probe: + free_netdev(port_netdev); + ethsw->ports[port_idx] = NULL; + + return err; +} + +static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev) +{ + struct device *dev = &sw_dev->dev; + struct ethsw_core *ethsw; + int i, err; + + /* Allocate switch core*/ + ethsw = kzalloc(sizeof(*ethsw), GFP_KERNEL); + + if (!ethsw) + return -ENOMEM; + + ethsw->dev = dev; + ethsw->iommu_domain = iommu_get_domain_for_dev(dev); + dev_set_drvdata(dev, ethsw); + + err = fsl_mc_portal_allocate(sw_dev, FSL_MC_IO_ATOMIC_CONTEXT_PORTAL, + ðsw->mc_io); + if (err) { + if (err == -ENXIO) + err = -EPROBE_DEFER; + else + dev_err(dev, "fsl_mc_portal_allocate err %d\n", err); + goto err_free_drvdata; + } + + err = dpaa2_switch_init(sw_dev); + if (err) + goto err_free_cmdport; + + ethsw->ports = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->ports), + GFP_KERNEL); + if (!(ethsw->ports)) { + err = -ENOMEM; + goto err_takedown; + } + + ethsw->fdbs = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->fdbs), + GFP_KERNEL); + if (!ethsw->fdbs) { + err = -ENOMEM; + goto err_free_ports; + } + + ethsw->acls = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->acls), + GFP_KERNEL); + if (!ethsw->acls) { + err = -ENOMEM; + goto err_free_fdbs; + } + + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { + err = dpaa2_switch_probe_port(ethsw, i); + if (err) + goto err_free_netdev; + } + + /* Add a NAPI instance for each of the Rx queues. The first port's + * net_device will be associated with the instances since we do not have + * different queues for each switch ports. + */ + for (i = 0; i < DPAA2_SWITCH_RX_NUM_FQS; i++) + netif_napi_add(ethsw->ports[0]->netdev, + ðsw->fq[i].napi, dpaa2_switch_poll, + NAPI_POLL_WEIGHT); + + err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle); + if (err) { + dev_err(ethsw->dev, "dpsw_enable err %d\n", err); + goto err_free_netdev; + } + + /* Setup IRQs */ + err = dpaa2_switch_setup_irqs(sw_dev); + if (err) + goto err_stop; + + /* Register the netdev only when the entire setup is done and the + * switch port interfaces are ready to receive traffic + */ + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { + err = register_netdev(ethsw->ports[i]->netdev); + if (err < 0) { + dev_err(dev, "register_netdev error %d\n", err); + goto err_unregister_ports; + } + } + + return 0; + +err_unregister_ports: + for (i--; i >= 0; i--) + unregister_netdev(ethsw->ports[i]->netdev); + dpaa2_switch_teardown_irqs(sw_dev); +err_stop: + dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle); +err_free_netdev: + for (i--; i >= 0; i--) + free_netdev(ethsw->ports[i]->netdev); + kfree(ethsw->acls); +err_free_fdbs: + kfree(ethsw->fdbs); +err_free_ports: + kfree(ethsw->ports); + +err_takedown: + dpaa2_switch_takedown(sw_dev); + +err_free_cmdport: + fsl_mc_portal_free(ethsw->mc_io); + +err_free_drvdata: + kfree(ethsw); + dev_set_drvdata(dev, NULL); + + return err; +} + +static const struct fsl_mc_device_id dpaa2_switch_match_id_table[] = { + { + .vendor = FSL_MC_VENDOR_FREESCALE, + .obj_type = "dpsw", + }, + { .vendor = 0x0 } +}; +MODULE_DEVICE_TABLE(fslmc, dpaa2_switch_match_id_table); + +static struct fsl_mc_driver dpaa2_switch_drv = { + .driver = { + .name = KBUILD_MODNAME, + .owner = THIS_MODULE, + }, + .probe = dpaa2_switch_probe, + .remove = dpaa2_switch_remove, + .match_id_table = dpaa2_switch_match_id_table +}; + +static struct notifier_block dpaa2_switch_port_nb __read_mostly = { + .notifier_call = dpaa2_switch_port_netdevice_event, +}; + +static struct notifier_block dpaa2_switch_port_switchdev_nb = { + .notifier_call = dpaa2_switch_port_event, +}; + +static struct notifier_block dpaa2_switch_port_switchdev_blocking_nb = { + .notifier_call = dpaa2_switch_port_blocking_event, +}; + +static int dpaa2_switch_register_notifiers(void) +{ + int err; + + err = register_netdevice_notifier(&dpaa2_switch_port_nb); + if (err) { + pr_err("dpaa2-switch: failed to register net_device notifier (%d)\n", err); + return err; + } + + err = register_switchdev_notifier(&dpaa2_switch_port_switchdev_nb); + if (err) { + pr_err("dpaa2-switch: failed to register switchdev notifier (%d)\n", err); + goto err_switchdev_nb; + } + + err = register_switchdev_blocking_notifier(&dpaa2_switch_port_switchdev_blocking_nb); + if (err) { + pr_err("dpaa2-switch: failed to register switchdev blocking notifier (%d)\n", err); + goto err_switchdev_blocking_nb; + } + + return 0; + +err_switchdev_blocking_nb: + unregister_switchdev_notifier(&dpaa2_switch_port_switchdev_nb); +err_switchdev_nb: + unregister_netdevice_notifier(&dpaa2_switch_port_nb); + + return err; +} + +static void dpaa2_switch_unregister_notifiers(void) +{ + int err; + + err = unregister_switchdev_blocking_notifier(&dpaa2_switch_port_switchdev_blocking_nb); + if (err) + pr_err("dpaa2-switch: failed to unregister switchdev blocking notifier (%d)\n", + err); + + err = unregister_switchdev_notifier(&dpaa2_switch_port_switchdev_nb); + if (err) + pr_err("dpaa2-switch: failed to unregister switchdev notifier (%d)\n", err); + + err = unregister_netdevice_notifier(&dpaa2_switch_port_nb); + if (err) + pr_err("dpaa2-switch: failed to unregister net_device notifier (%d)\n", err); +} + +static int __init dpaa2_switch_driver_init(void) +{ + int err; + + err = fsl_mc_driver_register(&dpaa2_switch_drv); + if (err) + return err; + + err = dpaa2_switch_register_notifiers(); + if (err) { + fsl_mc_driver_unregister(&dpaa2_switch_drv); + return err; + } + + return 0; +} + +static void __exit dpaa2_switch_driver_exit(void) +{ + dpaa2_switch_unregister_notifiers(); + fsl_mc_driver_unregister(&dpaa2_switch_drv); +} + +module_init(dpaa2_switch_driver_init); +module_exit(dpaa2_switch_driver_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("DPAA2 Ethernet Switch Driver"); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h new file mode 100644 index 000000000000..bdef71f234cb --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h @@ -0,0 +1,246 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DPAA2 Ethernet Switch declarations + * + * Copyright 2014-2016 Freescale Semiconductor Inc. + * Copyright 2017-2021 NXP + * + */ + +#ifndef __ETHSW_H +#define __ETHSW_H + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/rtnetlink.h> +#include <linux/if_vlan.h> +#include <uapi/linux/if_bridge.h> +#include <net/switchdev.h> +#include <linux/if_bridge.h> +#include <linux/fsl/mc.h> +#include <net/pkt_cls.h> +#include <soc/fsl/dpaa2-io.h> + +#include "dpsw.h" + +/* Number of IRQs supported */ +#define DPSW_IRQ_NUM 2 + +/* Port is member of VLAN */ +#define ETHSW_VLAN_MEMBER 1 +/* VLAN to be treated as untagged on egress */ +#define ETHSW_VLAN_UNTAGGED 2 +/* Untagged frames will be assigned to this VLAN */ +#define ETHSW_VLAN_PVID 4 +/* VLAN configured on the switch */ +#define ETHSW_VLAN_GLOBAL 8 + +/* Maximum Frame Length supported by HW (currently 10k) */ +#define DPAA2_MFL (10 * 1024) +#define ETHSW_MAX_FRAME_LENGTH (DPAA2_MFL - VLAN_ETH_HLEN - ETH_FCS_LEN) +#define ETHSW_L2_MAX_FRM(mtu) ((mtu) + VLAN_ETH_HLEN + ETH_FCS_LEN) + +#define ETHSW_FEATURE_MAC_ADDR BIT(0) + +/* Number of receive queues (one RX and one TX_CONF) */ +#define DPAA2_SWITCH_RX_NUM_FQS 2 + +/* Hardware requires alignment for ingress/egress buffer addresses */ +#define DPAA2_SWITCH_RX_BUF_RAW_SIZE PAGE_SIZE +#define DPAA2_SWITCH_RX_BUF_TAILROOM \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +#define DPAA2_SWITCH_RX_BUF_SIZE \ + (DPAA2_SWITCH_RX_BUF_RAW_SIZE - DPAA2_SWITCH_RX_BUF_TAILROOM) + +#define DPAA2_SWITCH_STORE_SIZE 16 + +/* Buffer management */ +#define BUFS_PER_CMD 7 +#define DPAA2_ETHSW_NUM_BUFS (1024 * BUFS_PER_CMD) +#define DPAA2_ETHSW_REFILL_THRESH (DPAA2_ETHSW_NUM_BUFS * 5 / 6) + +/* Number of times to retry DPIO portal operations while waiting + * for portal to finish executing current command and become + * available. We want to avoid being stuck in a while loop in case + * hardware becomes unresponsive, but not give up too easily if + * the portal really is busy for valid reasons + */ +#define DPAA2_SWITCH_SWP_BUSY_RETRIES 1000 + +/* Hardware annotation buffer size */ +#define DPAA2_SWITCH_HWA_SIZE 64 +/* Software annotation buffer size */ +#define DPAA2_SWITCH_SWA_SIZE 64 + +#define DPAA2_SWITCH_TX_BUF_ALIGN 64 + +#define DPAA2_SWITCH_TX_DATA_OFFSET \ + (DPAA2_SWITCH_HWA_SIZE + DPAA2_SWITCH_SWA_SIZE) + +#define DPAA2_SWITCH_NEEDED_HEADROOM \ + (DPAA2_SWITCH_TX_DATA_OFFSET + DPAA2_SWITCH_TX_BUF_ALIGN) + +#define DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES 16 +#define DPAA2_ETHSW_PORT_DEFAULT_TRAPS 1 + +#define DPAA2_ETHSW_PORT_ACL_CMD_BUF_SIZE 256 + +extern const struct ethtool_ops dpaa2_switch_port_ethtool_ops; + +struct ethsw_core; + +struct dpaa2_switch_fq { + struct ethsw_core *ethsw; + enum dpsw_queue_type type; + struct dpaa2_io_store *store; + struct dpaa2_io_notification_ctx nctx; + struct napi_struct napi; + u32 fqid; +}; + +struct dpaa2_switch_fdb { + struct net_device *bridge_dev; + u16 fdb_id; + bool in_use; +}; + +struct dpaa2_switch_acl_entry { + struct list_head list; + u16 prio; + unsigned long cookie; + + struct dpsw_acl_entry_cfg cfg; + struct dpsw_acl_key key; +}; + +struct dpaa2_switch_acl_tbl { + struct list_head entries; + struct ethsw_core *ethsw; + u64 ports; + + u16 id; + u8 num_rules; + bool in_use; +}; + +static inline bool +dpaa2_switch_acl_tbl_is_full(struct dpaa2_switch_acl_tbl *acl_tbl) +{ + if ((acl_tbl->num_rules + DPAA2_ETHSW_PORT_DEFAULT_TRAPS) >= + DPAA2_ETHSW_PORT_MAX_ACL_ENTRIES) + return true; + return false; +} + +/* Per port private data */ +struct ethsw_port_priv { + struct net_device *netdev; + u16 idx; + struct ethsw_core *ethsw_data; + u8 link_state; + u8 stp_state; + + u8 vlans[VLAN_VID_MASK + 1]; + u16 pvid; + u16 tx_qdid; + + struct dpaa2_switch_fdb *fdb; + bool bcast_flood; + bool ucast_flood; + bool learn_ena; + + struct dpaa2_switch_acl_tbl *acl_tbl; +}; + +/* Switch data */ +struct ethsw_core { + struct device *dev; + struct fsl_mc_io *mc_io; + u16 dpsw_handle; + struct dpsw_attr sw_attr; + u16 major, minor; + unsigned long features; + int dev_id; + struct ethsw_port_priv **ports; + struct iommu_domain *iommu_domain; + + u8 vlans[VLAN_VID_MASK + 1]; + + struct workqueue_struct *workqueue; + + struct dpaa2_switch_fq fq[DPAA2_SWITCH_RX_NUM_FQS]; + struct fsl_mc_device *dpbp_dev; + int buf_count; + u16 bpid; + int napi_users; + + struct dpaa2_switch_fdb *fdbs; + struct dpaa2_switch_acl_tbl *acls; +}; + +static inline int dpaa2_switch_get_index(struct ethsw_core *ethsw, + struct net_device *netdev) +{ + int i; + + for (i = 0; i < ethsw->sw_attr.num_ifs; i++) + if (ethsw->ports[i]->netdev == netdev) + return ethsw->ports[i]->idx; + + return -EINVAL; +} + +static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw) +{ + if (ethsw->sw_attr.options & DPSW_OPT_CTRL_IF_DIS) { + dev_err(ethsw->dev, "Control Interface is disabled, cannot probe\n"); + return false; + } + + if (ethsw->sw_attr.flooding_cfg != DPSW_FLOODING_PER_FDB) { + dev_err(ethsw->dev, "Flooding domain is not per FDB, cannot probe\n"); + return false; + } + + if (ethsw->sw_attr.broadcast_cfg != DPSW_BROADCAST_PER_FDB) { + dev_err(ethsw->dev, "Broadcast domain is not per FDB, cannot probe\n"); + return false; + } + + if (ethsw->sw_attr.max_fdbs < ethsw->sw_attr.num_ifs) { + dev_err(ethsw->dev, "The number of FDBs is lower than the number of ports, cannot probe\n"); + return false; + } + + return true; +} + +bool dpaa2_switch_port_dev_check(const struct net_device *netdev); + +int dpaa2_switch_port_vlans_add(struct net_device *netdev, + const struct switchdev_obj_port_vlan *vlan); + +int dpaa2_switch_port_vlans_del(struct net_device *netdev, + const struct switchdev_obj_port_vlan *vlan); + +typedef int dpaa2_switch_fdb_cb_t(struct ethsw_port_priv *port_priv, + struct fdb_dump_entry *fdb_entry, + void *data); + +/* TC offload */ + +int dpaa2_switch_cls_flower_replace(struct dpaa2_switch_acl_tbl *acl_tbl, + struct flow_cls_offload *cls); + +int dpaa2_switch_cls_flower_destroy(struct dpaa2_switch_acl_tbl *acl_tbl, + struct flow_cls_offload *cls); + +int dpaa2_switch_cls_matchall_replace(struct dpaa2_switch_acl_tbl *acl_tbl, + struct tc_cls_matchall_offload *cls); + +int dpaa2_switch_cls_matchall_destroy(struct dpaa2_switch_acl_tbl *acl_tbl, + struct tc_cls_matchall_offload *cls); + +int dpaa2_switch_acl_entry_add(struct dpaa2_switch_acl_tbl *acl_tbl, + struct dpaa2_switch_acl_entry *entry); +#endif /* __ETHSW_H */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpkg.h b/drivers/net/ethernet/freescale/dpaa2/dpkg.h index 6de613b13e4d..6f596a5fbeeb 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpkg.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpkg.h @@ -13,11 +13,12 @@ /** Key Generator properties */ /** - * Number of masks per key extraction + * DPKG_NUM_OF_MASKS - Number of masks per key extraction */ #define DPKG_NUM_OF_MASKS 4 + /** - * Number of extractions per key profile + * DPKG_MAX_NUM_OF_EXTRACTS - Number of extractions per key profile */ #define DPKG_MAX_NUM_OF_EXTRACTS 10 diff --git a/drivers/net/ethernet/freescale/dpaa2/dpmac.h b/drivers/net/ethernet/freescale/dpaa2/dpmac.h index 135f143097a5..8f7ceb731282 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpmac.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpmac.h @@ -83,39 +83,21 @@ int dpmac_get_attributes(struct fsl_mc_io *mc_io, u16 token, struct dpmac_attr *attr); -/** - * DPMAC link configuration/state options - */ +/* DPMAC link configuration/state options */ -/** - * Enable auto-negotiation - */ #define DPMAC_LINK_OPT_AUTONEG BIT_ULL(0) -/** - * Enable half-duplex mode - */ #define DPMAC_LINK_OPT_HALF_DUPLEX BIT_ULL(1) -/** - * Enable pause frames - */ #define DPMAC_LINK_OPT_PAUSE BIT_ULL(2) -/** - * Enable a-symmetric pause frames - */ #define DPMAC_LINK_OPT_ASYM_PAUSE BIT_ULL(3) -/** - * Advertised link speeds - */ +/* Advertised link speeds */ #define DPMAC_ADVERTISED_10BASET_FULL BIT_ULL(0) #define DPMAC_ADVERTISED_100BASET_FULL BIT_ULL(1) #define DPMAC_ADVERTISED_1000BASET_FULL BIT_ULL(2) #define DPMAC_ADVERTISED_10000BASET_FULL BIT_ULL(4) #define DPMAC_ADVERTISED_2500BASEX_FULL BIT_ULL(5) -/** - * Advertise auto-negotiation enable - */ +/* Advertise auto-negotiation enable */ #define DPMAC_ADVERTISED_AUTONEG BIT_ULL(3) /** diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c index aa429c17c343..d6afada99fb6 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c @@ -17,6 +17,8 @@ * This function has to be called before the following functions: * - dpni_set_rx_tc_dist() * - dpni_set_qos_table() + * + * Return: '0' on Success; Error code otherwise. */ int dpni_prepare_key_cfg(const struct dpkg_profile_cfg *cfg, u8 *key_cfg_buf) { @@ -1793,6 +1795,8 @@ int dpni_get_api_version(struct fsl_mc_io *mc_io, * If cfg.enable is set to 0 the command will clear flow steering table. * The packets will be classified according to settings made in * dpni_set_rx_hash_dist() + * + * Return: '0' on Success; Error code otherwise. */ int dpni_set_rx_fs_dist(struct fsl_mc_io *mc_io, u32 cmd_flags, @@ -1826,6 +1830,8 @@ int dpni_set_rx_fs_dist(struct fsl_mc_io *mc_io, * If cfg.enable is set to 1 the packets will be classified using a hash * function based on the key received in cfg.key_cfg_iova parameter. * If cfg.enable is set to 0 the packets will be sent to the default queue + * + * Return: '0' on Success; Error code otherwise. */ int dpni_set_rx_hash_dist(struct fsl_mc_io *mc_io, u32 cmd_flags, diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h index 4e96d9362dd2..7de0562bbf59 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h @@ -10,73 +10,76 @@ struct fsl_mc_io; -/** - * Data Path Network Interface API +/* Data Path Network Interface API * Contains initialization APIs and runtime control APIs for DPNI */ /** General DPNI macros */ /** - * Maximum number of traffic classes + * DPNI_MAX_TC - Maximum number of traffic classes */ #define DPNI_MAX_TC 8 /** - * Maximum number of buffer pools per DPNI + * DPNI_MAX_DPBP - Maximum number of buffer pools per DPNI */ #define DPNI_MAX_DPBP 8 /** - * All traffic classes considered; see dpni_set_queue() + * DPNI_ALL_TCS - All traffic classes considered; see dpni_set_queue() */ #define DPNI_ALL_TCS (u8)(-1) /** - * All flows within traffic class considered; see dpni_set_queue() + * DPNI_ALL_TC_FLOWS - All flows within traffic class considered; see + * dpni_set_queue() */ #define DPNI_ALL_TC_FLOWS (u16)(-1) /** - * Generate new flow ID; see dpni_set_queue() + * DPNI_NEW_FLOW_ID - Generate new flow ID; see dpni_set_queue() */ #define DPNI_NEW_FLOW_ID (u16)(-1) /** - * Tx traffic is always released to a buffer pool on transmit, there are no - * resources allocated to have the frames confirmed back to the source after - * transmission. + * DPNI_OPT_TX_FRM_RELEASE - Tx traffic is always released to a buffer pool on + * transmit, there are no resources allocated to have the frames confirmed back + * to the source after transmission. */ #define DPNI_OPT_TX_FRM_RELEASE 0x000001 /** - * Disables support for MAC address filtering for addresses other than primary - * MAC address. This affects both unicast and multicast. Promiscuous mode can - * still be enabled/disabled for both unicast and multicast. If promiscuous mode - * is disabled, only traffic matching the primary MAC address will be accepted. + * DPNI_OPT_NO_MAC_FILTER - Disables support for MAC address filtering for + * addresses other than primary MAC address. This affects both unicast and + * multicast. Promiscuous mode can still be enabled/disabled for both unicast + * and multicast. If promiscuous mode is disabled, only traffic matching the + * primary MAC address will be accepted. */ #define DPNI_OPT_NO_MAC_FILTER 0x000002 /** - * Allocate policers for this DPNI. They can be used to rate-limit traffic per - * traffic class (TC) basis. + * DPNI_OPT_HAS_POLICING - Allocate policers for this DPNI. They can be used to + * rate-limit traffic per traffic class (TC) basis. */ #define DPNI_OPT_HAS_POLICING 0x000004 /** - * Congestion can be managed in several ways, allowing the buffer pool to - * deplete on ingress, taildrop on each queue or use congestion groups for sets - * of queues. If set, it configures a single congestion groups across all TCs. - * If reset, a congestion group is allocated for each TC. Only relevant if the - * DPNI has multiple traffic classes. + * DPNI_OPT_SHARED_CONGESTION - Congestion can be managed in several ways, + * allowing the buffer pool to deplete on ingress, taildrop on each queue or + * use congestion groups for sets of queues. If set, it configures a single + * congestion groups across all TCs. If reset, a congestion group is allocated + * for each TC. Only relevant if the DPNI has multiple traffic classes. */ #define DPNI_OPT_SHARED_CONGESTION 0x000008 /** - * Enables TCAM for Flow Steering and QoS look-ups. If not specified, all - * look-ups are exact match. Note that TCAM is not available on LS1088 and its - * variants. Setting this bit on these SoCs will trigger an error. + * DPNI_OPT_HAS_KEY_MASKING - Enables TCAM for Flow Steering and QoS look-ups. + * If not specified, all look-ups are exact match. Note that TCAM is not + * available on LS1088 and its variants. Setting this bit on these SoCs will + * trigger an error. */ #define DPNI_OPT_HAS_KEY_MASKING 0x000010 /** - * Disables the flow steering table. + * DPNI_OPT_NO_FS - Disables the flow steering table. */ #define DPNI_OPT_NO_FS 0x000020 /** - * Flow steering table is shared between all traffic classes + * DPNI_OPT_SHARED_FS - Flow steering table is shared between all traffic + * classes */ #define DPNI_OPT_SHARED_FS 0x001000 @@ -129,20 +132,14 @@ int dpni_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); -/** - * DPNI IRQ Index and Events - */ +/* DPNI IRQ Index and Events */ -/** - * IRQ index - */ #define DPNI_IRQ_INDEX 0 -/** - * IRQ events: - * indicates a change in link state - * indicates a change in endpoint - */ + +/* DPNI_IRQ_EVENT_LINK_CHANGED - indicates a change in link state */ #define DPNI_IRQ_EVENT_LINK_CHANGED 0x00000001 + +/* DPNI_IRQ_EVENT_ENDPOINT_CHANGED - indicates a change in endpoint */ #define DPNI_IRQ_EVENT_ENDPOINT_CHANGED 0x00000002 int dpni_set_irq_enable(struct fsl_mc_io *mc_io, @@ -222,32 +219,30 @@ int dpni_get_attributes(struct fsl_mc_io *mc_io, u16 token, struct dpni_attr *attr); -/** - * DPNI errors - */ +/* DPNI errors */ /** - * Extract out of frame header error + * DPNI_ERROR_EOFHE - Extract out of frame header error */ #define DPNI_ERROR_EOFHE 0x00020000 /** - * Frame length error + * DPNI_ERROR_FLE - Frame length error */ #define DPNI_ERROR_FLE 0x00002000 /** - * Frame physical error + * DPNI_ERROR_FPE - Frame physical error */ #define DPNI_ERROR_FPE 0x00001000 /** - * Parsing header error + * DPNI_ERROR_PHE - Parsing header error */ #define DPNI_ERROR_PHE 0x00000020 /** - * Parser L3 checksum error + * DPNI_ERROR_L3CE - Parser L3 checksum error */ #define DPNI_ERROR_L3CE 0x00000004 /** - * Parser L3 checksum error + * DPNI_ERROR_L4CE - Parser L3 checksum error */ #define DPNI_ERROR_L4CE 0x00000001 @@ -281,36 +276,35 @@ int dpni_set_errors_behavior(struct fsl_mc_io *mc_io, u16 token, struct dpni_error_cfg *cfg); -/** - * DPNI buffer layout modification options - */ +/* DPNI buffer layout modification options */ /** - * Select to modify the time-stamp setting + * DPNI_BUF_LAYOUT_OPT_TIMESTAMP - Select to modify the time-stamp setting */ #define DPNI_BUF_LAYOUT_OPT_TIMESTAMP 0x00000001 /** - * Select to modify the parser-result setting; not applicable for Tx + * DPNI_BUF_LAYOUT_OPT_PARSER_RESULT - Select to modify the parser-result + * setting; not applicable for Tx */ #define DPNI_BUF_LAYOUT_OPT_PARSER_RESULT 0x00000002 /** - * Select to modify the frame-status setting + * DPNI_BUF_LAYOUT_OPT_FRAME_STATUS - Select to modify the frame-status setting */ #define DPNI_BUF_LAYOUT_OPT_FRAME_STATUS 0x00000004 /** - * Select to modify the private-data-size setting + * DPNI_BUF_LAYOUT_OPT_PRIVATE_DATA_SIZE - Select to modify the private-data-size setting */ #define DPNI_BUF_LAYOUT_OPT_PRIVATE_DATA_SIZE 0x00000008 /** - * Select to modify the data-alignment setting + * DPNI_BUF_LAYOUT_OPT_DATA_ALIGN - Select to modify the data-alignment setting */ #define DPNI_BUF_LAYOUT_OPT_DATA_ALIGN 0x00000010 /** - * Select to modify the data-head-room setting + * DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM - Select to modify the data-head-room setting */ #define DPNI_BUF_LAYOUT_OPT_DATA_HEAD_ROOM 0x00000020 /** - * Select to modify the data-tail-room setting + * DPNI_BUF_LAYOUT_OPT_DATA_TAIL_ROOM - Select to modify the data-tail-room setting */ #define DPNI_BUF_LAYOUT_OPT_DATA_TAIL_ROOM 0x00000040 @@ -343,7 +337,8 @@ struct dpni_buffer_layout { * @DPNI_QUEUE_TX: Tx queue * @DPNI_QUEUE_TX_CONFIRM: Tx confirmation queue * @DPNI_QUEUE_RX_ERR: Rx error queue - */enum dpni_queue_type { + */ +enum dpni_queue_type { DPNI_QUEUE_RX, DPNI_QUEUE_TX, DPNI_QUEUE_TX_CONFIRM, @@ -424,7 +419,7 @@ int dpni_get_tx_data_offset(struct fsl_mc_io *mc_io, * lack of buffers * @page_2.egress_discarded_frames: Egress discarded frame count * @page_2.egress_confirmed_frames: Egress confirmed frame count - * @page3: Page_3 statistics structure + * @page_3: Page_3 statistics structure * @page_3.egress_dequeue_bytes: Cumulative count of the number of bytes * dequeued from egress FQs * @page_3.egress_dequeue_frames: Cumulative count of the number of frames @@ -501,30 +496,14 @@ int dpni_get_statistics(struct fsl_mc_io *mc_io, u8 page, union dpni_statistics *stat); -/** - * Enable auto-negotiation - */ #define DPNI_LINK_OPT_AUTONEG 0x0000000000000001ULL -/** - * Enable half-duplex mode - */ #define DPNI_LINK_OPT_HALF_DUPLEX 0x0000000000000002ULL -/** - * Enable pause frames - */ #define DPNI_LINK_OPT_PAUSE 0x0000000000000004ULL -/** - * Enable a-symmetric pause frames - */ #define DPNI_LINK_OPT_ASYM_PAUSE 0x0000000000000008ULL - -/** - * Enable priority flow control pause frames - */ #define DPNI_LINK_OPT_PFC_PAUSE 0x0000000000000010ULL /** - * struct - Structure representing DPNI link configuration + * struct dpni_link_cfg - Structure representing DPNI link configuration * @rate: Rate * @options: Mask of available options; use 'DPNI_LINK_OPT_<X>' values */ @@ -687,8 +666,8 @@ int dpni_set_rx_tc_dist(struct fsl_mc_io *mc_io, const struct dpni_rx_tc_dist_cfg *cfg); /** - * When used for fs_miss_flow_id in function dpni_set_rx_dist, - * will signal to dpni to drop all unclassified frames + * DPNI_FS_MISS_DROP - When used for fs_miss_flow_id in function + * dpni_set_rx_dist, will signal to dpni to drop all unclassified frames */ #define DPNI_FS_MISS_DROP ((uint16_t)-1) @@ -766,7 +745,7 @@ enum dpni_dest { /** * struct dpni_queue - Queue structure - * @destination - Destination structure + * @destination: - Destination structure * @destination.id: ID of the destination, only relevant if DEST_TYPE is > 0. * Identifies either a DPIO or a DPCON object. * Not relevant for Tx queues. @@ -837,9 +816,7 @@ struct dpni_queue_id { u16 qdbin; }; -/** - * Set User Context - */ +/* Set User Context */ #define DPNI_QUEUE_OPT_USER_CTX 0x00000001 #define DPNI_QUEUE_OPT_DEST 0x00000002 #define DPNI_QUEUE_OPT_FLC 0x00000004 @@ -904,9 +881,9 @@ struct dpni_dest_cfg { /* DPNI congestion options */ /** - * This congestion will trigger flow control or priority flow control. - * This will have effect only if flow control is enabled with - * dpni_set_link_cfg(). + * DPNI_CONG_OPT_FLOW_CONTROL - This congestion will trigger flow control or + * priority flow control. This will have effect only if flow control is + * enabled with dpni_set_link_cfg(). */ #define DPNI_CONG_OPT_FLOW_CONTROL 0x00000040 @@ -990,23 +967,24 @@ struct dpni_rule_cfg { }; /** - * Discard matching traffic. If set, this takes precedence over any other - * configuration and matching traffic is always discarded. + * DPNI_FS_OPT_DISCARD - Discard matching traffic. If set, this takes + * precedence over any other configuration and matching traffic is always + * discarded. */ #define DPNI_FS_OPT_DISCARD 0x1 /** - * Set FLC value. If set, flc member of struct dpni_fs_action_cfg is used to - * override the FLC value set per queue. + * DPNI_FS_OPT_SET_FLC - Set FLC value. If set, flc member of struct + * dpni_fs_action_cfg is used to override the FLC value set per queue. * For more details check the Frame Descriptor section in the hardware * documentation. */ #define DPNI_FS_OPT_SET_FLC 0x2 /** - * Indicates whether the 6 lowest significant bits of FLC are used for stash - * control. If set, the 6 least significant bits in value are interpreted as - * follows: + * DPNI_FS_OPT_SET_STASH_CONTROL - Indicates whether the 6 lowest significant + * bits of FLC are used for stash control. If set, the 6 least significant bits + * in value are interpreted as follows: * - bits 0-1: indicates the number of 64 byte units of context that are * stashed. FLC value is interpreted as a memory address in this case, * excluding the 6 LS bits. @@ -1068,7 +1046,7 @@ int dpni_get_api_version(struct fsl_mc_io *mc_io, u16 *major_ver, u16 *minor_ver); /** - * struct dpni_tx_shaping - Structure representing DPNI tx shaping configuration + * struct dpni_tx_shaping_cfg - Structure representing DPNI tx shaping configuration * @rate_limit: Rate in Mbps * @max_burst_size: Burst size in bytes (up to 64KB) */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h index 05c413719e55..01d77c685a5b 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dprtc.h +++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h @@ -13,9 +13,6 @@ struct fsl_mc_io; -/** - * Number of irq's - */ #define DPRTC_MAX_IRQ_NUM 1 #define DPRTC_IRQ_INDEX 0 diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h new file mode 100644 index 000000000000..cb13e740f72b --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw-cmd.h @@ -0,0 +1,537 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2014-2016 Freescale Semiconductor Inc. + * Copyright 2017-2021 NXP + * + */ + +#ifndef __FSL_DPSW_CMD_H +#define __FSL_DPSW_CMD_H + +#include "dpsw.h" + +/* DPSW Version */ +#define DPSW_VER_MAJOR 8 +#define DPSW_VER_MINOR 9 + +#define DPSW_CMD_BASE_VERSION 1 +#define DPSW_CMD_VERSION_2 2 +#define DPSW_CMD_ID_OFFSET 4 + +#define DPSW_CMD_ID(id) (((id) << DPSW_CMD_ID_OFFSET) | DPSW_CMD_BASE_VERSION) +#define DPSW_CMD_V2(id) (((id) << DPSW_CMD_ID_OFFSET) | DPSW_CMD_VERSION_2) + +/* Command IDs */ +#define DPSW_CMDID_CLOSE DPSW_CMD_ID(0x800) +#define DPSW_CMDID_OPEN DPSW_CMD_ID(0x802) + +#define DPSW_CMDID_GET_API_VERSION DPSW_CMD_ID(0xa02) + +#define DPSW_CMDID_ENABLE DPSW_CMD_ID(0x002) +#define DPSW_CMDID_DISABLE DPSW_CMD_ID(0x003) +#define DPSW_CMDID_GET_ATTR DPSW_CMD_V2(0x004) +#define DPSW_CMDID_RESET DPSW_CMD_ID(0x005) + +#define DPSW_CMDID_SET_IRQ_ENABLE DPSW_CMD_ID(0x012) + +#define DPSW_CMDID_SET_IRQ_MASK DPSW_CMD_ID(0x014) + +#define DPSW_CMDID_GET_IRQ_STATUS DPSW_CMD_ID(0x016) +#define DPSW_CMDID_CLEAR_IRQ_STATUS DPSW_CMD_ID(0x017) + +#define DPSW_CMDID_IF_SET_TCI DPSW_CMD_ID(0x030) +#define DPSW_CMDID_IF_SET_STP DPSW_CMD_ID(0x031) + +#define DPSW_CMDID_IF_GET_COUNTER DPSW_CMD_V2(0x034) + +#define DPSW_CMDID_IF_ENABLE DPSW_CMD_ID(0x03D) +#define DPSW_CMDID_IF_DISABLE DPSW_CMD_ID(0x03E) + +#define DPSW_CMDID_IF_GET_ATTR DPSW_CMD_ID(0x042) + +#define DPSW_CMDID_IF_SET_MAX_FRAME_LENGTH DPSW_CMD_ID(0x044) + +#define DPSW_CMDID_IF_GET_LINK_STATE DPSW_CMD_ID(0x046) + +#define DPSW_CMDID_IF_GET_TCI DPSW_CMD_ID(0x04A) + +#define DPSW_CMDID_IF_SET_LINK_CFG DPSW_CMD_ID(0x04C) + +#define DPSW_CMDID_VLAN_ADD DPSW_CMD_ID(0x060) +#define DPSW_CMDID_VLAN_ADD_IF DPSW_CMD_V2(0x061) +#define DPSW_CMDID_VLAN_ADD_IF_UNTAGGED DPSW_CMD_ID(0x062) + +#define DPSW_CMDID_VLAN_REMOVE_IF DPSW_CMD_ID(0x064) +#define DPSW_CMDID_VLAN_REMOVE_IF_UNTAGGED DPSW_CMD_ID(0x065) +#define DPSW_CMDID_VLAN_REMOVE_IF_FLOODING DPSW_CMD_ID(0x066) +#define DPSW_CMDID_VLAN_REMOVE DPSW_CMD_ID(0x067) + +#define DPSW_CMDID_FDB_ADD DPSW_CMD_ID(0x082) +#define DPSW_CMDID_FDB_REMOVE DPSW_CMD_ID(0x083) +#define DPSW_CMDID_FDB_ADD_UNICAST DPSW_CMD_ID(0x084) +#define DPSW_CMDID_FDB_REMOVE_UNICAST DPSW_CMD_ID(0x085) +#define DPSW_CMDID_FDB_ADD_MULTICAST DPSW_CMD_ID(0x086) +#define DPSW_CMDID_FDB_REMOVE_MULTICAST DPSW_CMD_ID(0x087) +#define DPSW_CMDID_FDB_DUMP DPSW_CMD_ID(0x08A) + +#define DPSW_CMDID_ACL_ADD DPSW_CMD_ID(0x090) +#define DPSW_CMDID_ACL_REMOVE DPSW_CMD_ID(0x091) +#define DPSW_CMDID_ACL_ADD_ENTRY DPSW_CMD_ID(0x092) +#define DPSW_CMDID_ACL_REMOVE_ENTRY DPSW_CMD_ID(0x093) +#define DPSW_CMDID_ACL_ADD_IF DPSW_CMD_ID(0x094) +#define DPSW_CMDID_ACL_REMOVE_IF DPSW_CMD_ID(0x095) + +#define DPSW_CMDID_IF_GET_PORT_MAC_ADDR DPSW_CMD_ID(0x0A7) + +#define DPSW_CMDID_CTRL_IF_GET_ATTR DPSW_CMD_ID(0x0A0) +#define DPSW_CMDID_CTRL_IF_SET_POOLS DPSW_CMD_ID(0x0A1) +#define DPSW_CMDID_CTRL_IF_ENABLE DPSW_CMD_ID(0x0A2) +#define DPSW_CMDID_CTRL_IF_DISABLE DPSW_CMD_ID(0x0A3) +#define DPSW_CMDID_CTRL_IF_SET_QUEUE DPSW_CMD_ID(0x0A6) + +#define DPSW_CMDID_SET_EGRESS_FLOOD DPSW_CMD_ID(0x0AC) +#define DPSW_CMDID_IF_SET_LEARNING_MODE DPSW_CMD_ID(0x0AD) + +/* Macros for accessing command fields smaller than 1byte */ +#define DPSW_MASK(field) \ + GENMASK(DPSW_##field##_SHIFT + DPSW_##field##_SIZE - 1, \ + DPSW_##field##_SHIFT) +#define dpsw_set_field(var, field, val) \ + ((var) |= (((val) << DPSW_##field##_SHIFT) & DPSW_MASK(field))) +#define dpsw_get_field(var, field) \ + (((var) & DPSW_MASK(field)) >> DPSW_##field##_SHIFT) +#define dpsw_get_bit(var, bit) \ + (((var) >> (bit)) & GENMASK(0, 0)) + +#pragma pack(push, 1) +struct dpsw_cmd_open { + __le32 dpsw_id; +}; + +#define DPSW_COMPONENT_TYPE_SHIFT 0 +#define DPSW_COMPONENT_TYPE_SIZE 4 + +struct dpsw_cmd_create { + /* cmd word 0 */ + __le16 num_ifs; + u8 max_fdbs; + u8 max_meters_per_if; + /* from LSB: only the first 4 bits */ + u8 component_type; + u8 pad[3]; + /* cmd word 1 */ + __le16 max_vlans; + __le16 max_fdb_entries; + __le16 fdb_aging_time; + __le16 max_fdb_mc_groups; + /* cmd word 2 */ + __le64 options; +}; + +struct dpsw_cmd_destroy { + __le32 dpsw_id; +}; + +#define DPSW_ENABLE_SHIFT 0 +#define DPSW_ENABLE_SIZE 1 + +struct dpsw_rsp_is_enabled { + /* from LSB: enable:1 */ + u8 enabled; +}; + +struct dpsw_cmd_set_irq_enable { + u8 enable_state; + u8 pad[3]; + u8 irq_index; +}; + +struct dpsw_cmd_get_irq_enable { + __le32 pad; + u8 irq_index; +}; + +struct dpsw_rsp_get_irq_enable { + u8 enable_state; +}; + +struct dpsw_cmd_set_irq_mask { + __le32 mask; + u8 irq_index; +}; + +struct dpsw_cmd_get_irq_mask { + __le32 pad; + u8 irq_index; +}; + +struct dpsw_rsp_get_irq_mask { + __le32 mask; +}; + +struct dpsw_cmd_get_irq_status { + __le32 status; + u8 irq_index; +}; + +struct dpsw_rsp_get_irq_status { + __le32 status; +}; + +struct dpsw_cmd_clear_irq_status { + __le32 status; + u8 irq_index; +}; + +#define DPSW_COMPONENT_TYPE_SHIFT 0 +#define DPSW_COMPONENT_TYPE_SIZE 4 + +#define DPSW_FLOODING_CFG_SHIFT 0 +#define DPSW_FLOODING_CFG_SIZE 4 + +#define DPSW_BROADCAST_CFG_SHIFT 4 +#define DPSW_BROADCAST_CFG_SIZE 4 + +struct dpsw_rsp_get_attr { + /* cmd word 0 */ + __le16 num_ifs; + u8 max_fdbs; + u8 num_fdbs; + __le16 max_vlans; + __le16 num_vlans; + /* cmd word 1 */ + __le16 max_fdb_entries; + __le16 fdb_aging_time; + __le32 dpsw_id; + /* cmd word 2 */ + __le16 mem_size; + __le16 max_fdb_mc_groups; + u8 max_meters_per_if; + /* from LSB only the first 4 bits */ + u8 component_type; + /* [0:3] - flooding configuration + * [4:7] - broadcast configuration + */ + u8 repl_cfg; + u8 pad; + /* cmd word 3 */ + __le64 options; +}; + +#define DPSW_VLAN_ID_SHIFT 0 +#define DPSW_VLAN_ID_SIZE 12 +#define DPSW_DEI_SHIFT 12 +#define DPSW_DEI_SIZE 1 +#define DPSW_PCP_SHIFT 13 +#define DPSW_PCP_SIZE 3 + +struct dpsw_cmd_if_set_tci { + __le16 if_id; + /* from LSB: VLAN_ID:12 DEI:1 PCP:3 */ + __le16 conf; +}; + +struct dpsw_cmd_if_get_tci { + __le16 if_id; +}; + +struct dpsw_rsp_if_get_tci { + __le16 pad; + __le16 vlan_id; + u8 dei; + u8 pcp; +}; + +#define DPSW_STATE_SHIFT 0 +#define DPSW_STATE_SIZE 4 + +struct dpsw_cmd_if_set_stp { + __le16 if_id; + __le16 vlan_id; + /* only the first LSB 4 bits */ + u8 state; +}; + +#define DPSW_COUNTER_TYPE_SHIFT 0 +#define DPSW_COUNTER_TYPE_SIZE 5 + +struct dpsw_cmd_if_get_counter { + __le16 if_id; + /* from LSB: type:5 */ + u8 type; +}; + +struct dpsw_rsp_if_get_counter { + __le64 pad; + __le64 counter; +}; + +struct dpsw_cmd_if { + __le16 if_id; +}; + +#define DPSW_ADMIT_UNTAGGED_SHIFT 0 +#define DPSW_ADMIT_UNTAGGED_SIZE 4 +#define DPSW_ENABLED_SHIFT 5 +#define DPSW_ENABLED_SIZE 1 +#define DPSW_ACCEPT_ALL_VLAN_SHIFT 6 +#define DPSW_ACCEPT_ALL_VLAN_SIZE 1 + +struct dpsw_rsp_if_get_attr { + /* cmd word 0 */ + /* from LSB: admit_untagged:4 enabled:1 accept_all_vlan:1 */ + u8 conf; + u8 pad1; + u8 num_tcs; + u8 pad2; + __le16 qdid; + /* cmd word 1 */ + __le32 options; + __le32 pad3; + /* cmd word 2 */ + __le32 rate; +}; + +struct dpsw_cmd_if_set_max_frame_length { + __le16 if_id; + __le16 frame_length; +}; + +struct dpsw_cmd_if_set_link_cfg { + /* cmd word 0 */ + __le16 if_id; + u8 pad[6]; + /* cmd word 1 */ + __le32 rate; + __le32 pad1; + /* cmd word 2 */ + __le64 options; +}; + +struct dpsw_cmd_if_get_link_state { + __le16 if_id; +}; + +#define DPSW_UP_SHIFT 0 +#define DPSW_UP_SIZE 1 + +struct dpsw_rsp_if_get_link_state { + /* cmd word 0 */ + __le32 pad0; + u8 up; + u8 pad1[3]; + /* cmd word 1 */ + __le32 rate; + __le32 pad2; + /* cmd word 2 */ + __le64 options; +}; + +struct dpsw_vlan_add { + __le16 fdb_id; + __le16 vlan_id; +}; + +struct dpsw_cmd_vlan_add_if { + /* cmd word 0 */ + __le16 options; + __le16 vlan_id; + __le16 fdb_id; + __le16 pad0; + /* cmd word 1-4 */ + __le64 if_id; +}; + +struct dpsw_cmd_vlan_manage_if { + /* cmd word 0 */ + __le16 pad0; + __le16 vlan_id; + __le32 pad1; + /* cmd word 1-4 */ + __le64 if_id; +}; + +struct dpsw_cmd_vlan_remove { + __le16 pad; + __le16 vlan_id; +}; + +struct dpsw_cmd_fdb_add { + __le32 pad; + __le16 fdb_ageing_time; + __le16 num_fdb_entries; +}; + +struct dpsw_rsp_fdb_add { + __le16 fdb_id; +}; + +struct dpsw_cmd_fdb_remove { + __le16 fdb_id; +}; + +#define DPSW_ENTRY_TYPE_SHIFT 0 +#define DPSW_ENTRY_TYPE_SIZE 4 + +struct dpsw_cmd_fdb_unicast_op { + /* cmd word 0 */ + __le16 fdb_id; + u8 mac_addr[6]; + /* cmd word 1 */ + __le16 if_egress; + /* only the first 4 bits from LSB */ + u8 type; +}; + +struct dpsw_cmd_fdb_multicast_op { + /* cmd word 0 */ + __le16 fdb_id; + __le16 num_ifs; + /* only the first 4 bits from LSB */ + u8 type; + u8 pad[3]; + /* cmd word 1 */ + u8 mac_addr[6]; + __le16 pad2; + /* cmd word 2-5 */ + __le64 if_id; +}; + +struct dpsw_cmd_fdb_dump { + __le16 fdb_id; + __le16 pad0; + __le32 pad1; + __le64 iova_addr; + __le32 iova_size; +}; + +struct dpsw_rsp_fdb_dump { + __le16 num_entries; +}; + +struct dpsw_rsp_ctrl_if_get_attr { + __le64 pad; + __le32 rx_fqid; + __le32 rx_err_fqid; + __le32 tx_err_conf_fqid; +}; + +#define DPSW_BACKUP_POOL(val, order) (((val) & 0x1) << (order)) +struct dpsw_cmd_ctrl_if_set_pools { + u8 num_dpbp; + u8 backup_pool_mask; + __le16 pad; + __le32 dpbp_id[DPSW_MAX_DPBP]; + __le16 buffer_size[DPSW_MAX_DPBP]; +}; + +#define DPSW_DEST_TYPE_SHIFT 0 +#define DPSW_DEST_TYPE_SIZE 4 + +struct dpsw_cmd_ctrl_if_set_queue { + __le32 dest_id; + u8 dest_priority; + u8 pad; + /* from LSB: dest_type:4 */ + u8 dest_type; + u8 qtype; + __le64 user_ctx; + __le32 options; +}; + +struct dpsw_rsp_get_api_version { + __le16 version_major; + __le16 version_minor; +}; + +struct dpsw_rsp_if_get_mac_addr { + __le16 pad; + u8 mac_addr[6]; +}; + +struct dpsw_cmd_set_egress_flood { + __le16 fdb_id; + u8 flood_type; + u8 pad[5]; + __le64 if_id; +}; + +#define DPSW_LEARNING_MODE_SHIFT 0 +#define DPSW_LEARNING_MODE_SIZE 4 + +struct dpsw_cmd_if_set_learning_mode { + __le16 if_id; + /* only the first 4 bits from LSB */ + u8 mode; +}; + +struct dpsw_cmd_acl_add { + __le16 pad; + __le16 max_entries; +}; + +struct dpsw_rsp_acl_add { + __le16 acl_id; +}; + +struct dpsw_cmd_acl_remove { + __le16 acl_id; +}; + +struct dpsw_cmd_acl_if { + __le16 acl_id; + __le16 num_ifs; + __le32 pad; + __le64 if_id; +}; + +struct dpsw_prep_acl_entry { + u8 match_l2_dest_mac[6]; + __le16 match_l2_tpid; + + u8 match_l2_source_mac[6]; + __le16 match_l2_vlan_id; + + __le32 match_l3_dest_ip; + __le32 match_l3_source_ip; + + __le16 match_l4_dest_port; + __le16 match_l4_source_port; + __le16 match_l2_ether_type; + u8 match_l2_pcp_dei; + u8 match_l3_dscp; + + u8 mask_l2_dest_mac[6]; + __le16 mask_l2_tpid; + + u8 mask_l2_source_mac[6]; + __le16 mask_l2_vlan_id; + + __le32 mask_l3_dest_ip; + __le32 mask_l3_source_ip; + + __le16 mask_l4_dest_port; + __le16 mask_l4_source_port; + __le16 mask_l2_ether_type; + u8 mask_l2_pcp_dei; + u8 mask_l3_dscp; + + u8 match_l3_protocol; + u8 mask_l3_protocol; +}; + +#define DPSW_RESULT_ACTION_SHIFT 0 +#define DPSW_RESULT_ACTION_SIZE 4 + +struct dpsw_cmd_acl_entry { + __le16 acl_id; + __le16 result_if_id; + __le32 precedence; + /* from LSB only the first 4 bits */ + u8 result_action; + u8 pad[7]; + __le64 pad2[4]; + __le64 key_iova; +}; +#pragma pack(pop) +#endif /* __FSL_DPSW_CMD_H */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.c b/drivers/net/ethernet/freescale/dpaa2/dpsw.c new file mode 100644 index 000000000000..6352d6d1ecba --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.c @@ -0,0 +1,1581 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2014-2016 Freescale Semiconductor Inc. + * Copyright 2017-2021 NXP + * + */ + +#include <linux/fsl/mc.h> +#include "dpsw.h" +#include "dpsw-cmd.h" + +static void build_if_id_bitmap(__le64 *bmap, const u16 *id, const u16 num_ifs) +{ + int i; + + for (i = 0; (i < num_ifs) && (i < DPSW_MAX_IF); i++) { + if (id[i] < DPSW_MAX_IF) + bmap[id[i] / 64] |= cpu_to_le64(BIT_MASK(id[i] % 64)); + } +} + +/** + * dpsw_open() - Open a control session for the specified object + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @dpsw_id: DPSW unique ID + * @token: Returned token; use in subsequent API calls + * + * This function can be used to open a control session for an + * already created object; an object may have been declared in + * the DPL or by calling the dpsw_create() function. + * This function returns a unique authentication token, + * associated with the specific object ID and the specific MC + * portal; this token must be used in all subsequent commands for + * this specific object + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_open(struct fsl_mc_io *mc_io, u32 cmd_flags, int dpsw_id, u16 *token) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_open *cmd_params; + int err; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_OPEN, + cmd_flags, + 0); + cmd_params = (struct dpsw_cmd_open *)cmd.params; + cmd_params->dpsw_id = cpu_to_le32(dpsw_id); + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + *token = mc_cmd_hdr_read_token(&cmd); + + return 0; +} + +/** + * dpsw_close() - Close the control session of the object + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * + * After this function is called, no further operations are + * allowed on the object without opening a new control session. + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_CLOSE, + cmd_flags, + token); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_enable() - Enable DPSW functionality + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ENABLE, + cmd_flags, + token); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_disable() - Disable DPSW functionality + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_DISABLE, + cmd_flags, + token); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_reset() - Reset the DPSW, returns the object to initial state. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_RESET, + cmd_flags, + token); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_set_irq_enable() - Set overall interrupt state. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPCI object + * @irq_index: The interrupt index to configure + * @en: Interrupt state - enable = 1, disable = 0 + * + * Allows GPP software to control when interrupts are generated. + * Each interrupt can have up to 32 causes. The enable/disable control's the + * overall interrupt state. if the interrupt is disabled no causes will cause + * an interrupt + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_set_irq_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u8 irq_index, u8 en) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_set_irq_enable *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_IRQ_ENABLE, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_set_irq_enable *)cmd.params; + dpsw_set_field(cmd_params->enable_state, ENABLE, en); + cmd_params->irq_index = irq_index; + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_set_irq_mask() - Set interrupt mask. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPCI object + * @irq_index: The interrupt index to configure + * @mask: Event mask to trigger interrupt; + * each bit: + * 0 = ignore event + * 1 = consider event for asserting IRQ + * + * Every interrupt can have up to 32 causes and the interrupt model supports + * masking/unmasking each cause independently + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_set_irq_mask(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u8 irq_index, u32 mask) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_set_irq_mask *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_IRQ_MASK, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_set_irq_mask *)cmd.params; + cmd_params->mask = cpu_to_le32(mask); + cmd_params->irq_index = irq_index; + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_get_irq_status() - Get the current status of any pending interrupts + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @irq_index: The interrupt index to configure + * @status: Returned interrupts status - one bit per cause: + * 0 = no interrupt pending + * 1 = interrupt pending + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_get_irq_status(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u8 irq_index, u32 *status) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_get_irq_status *cmd_params; + struct dpsw_rsp_get_irq_status *rsp_params; + int err; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_IRQ_STATUS, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_get_irq_status *)cmd.params; + cmd_params->status = cpu_to_le32(*status); + cmd_params->irq_index = irq_index; + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + rsp_params = (struct dpsw_rsp_get_irq_status *)cmd.params; + *status = le32_to_cpu(rsp_params->status); + + return 0; +} + +/** + * dpsw_clear_irq_status() - Clear a pending interrupt's status + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPCI object + * @irq_index: The interrupt index to configure + * @status: bits to clear (W1C) - one bit per cause: + * 0 = don't change + * 1 = clear status bit + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_clear_irq_status(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u8 irq_index, u32 status) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_clear_irq_status *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_CLEAR_IRQ_STATUS, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_clear_irq_status *)cmd.params; + cmd_params->status = cpu_to_le32(status); + cmd_params->irq_index = irq_index; + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_get_attributes() - Retrieve DPSW attributes + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @attr: Returned DPSW attributes + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + struct dpsw_attr *attr) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_rsp_get_attr *rsp_params; + int err; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_ATTR, + cmd_flags, + token); + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + rsp_params = (struct dpsw_rsp_get_attr *)cmd.params; + attr->num_ifs = le16_to_cpu(rsp_params->num_ifs); + attr->max_fdbs = rsp_params->max_fdbs; + attr->num_fdbs = rsp_params->num_fdbs; + attr->max_vlans = le16_to_cpu(rsp_params->max_vlans); + attr->num_vlans = le16_to_cpu(rsp_params->num_vlans); + attr->max_fdb_entries = le16_to_cpu(rsp_params->max_fdb_entries); + attr->fdb_aging_time = le16_to_cpu(rsp_params->fdb_aging_time); + attr->id = le32_to_cpu(rsp_params->dpsw_id); + attr->mem_size = le16_to_cpu(rsp_params->mem_size); + attr->max_fdb_mc_groups = le16_to_cpu(rsp_params->max_fdb_mc_groups); + attr->max_meters_per_if = rsp_params->max_meters_per_if; + attr->options = le64_to_cpu(rsp_params->options); + attr->component_type = dpsw_get_field(rsp_params->component_type, COMPONENT_TYPE); + attr->flooding_cfg = dpsw_get_field(rsp_params->repl_cfg, FLOODING_CFG); + attr->broadcast_cfg = dpsw_get_field(rsp_params->repl_cfg, BROADCAST_CFG); + return 0; +} + +/** + * dpsw_if_set_link_cfg() - Set the link configuration. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface id + * @cfg: Link configuration + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id, + struct dpsw_link_cfg *cfg) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_if_set_link_cfg *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_LINK_CFG, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if_set_link_cfg *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + cmd_params->rate = cpu_to_le32(cfg->rate); + cmd_params->options = cpu_to_le64(cfg->options); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_if_get_link_state - Return the link state + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface id + * @state: Link state 1 - linkup, 0 - link down or disconnected + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_if_get_link_state(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, struct dpsw_link_state *state) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_if_get_link_state *cmd_params; + struct dpsw_rsp_if_get_link_state *rsp_params; + int err; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_LINK_STATE, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if_get_link_state *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + rsp_params = (struct dpsw_rsp_if_get_link_state *)cmd.params; + state->rate = le32_to_cpu(rsp_params->rate); + state->options = le64_to_cpu(rsp_params->options); + state->up = dpsw_get_field(rsp_params->up, UP); + + return 0; +} + +/** + * dpsw_if_set_tci() - Set default VLAN Tag Control Information (TCI) + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface Identifier + * @cfg: Tag Control Information Configuration + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_if_set_tci(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id, + const struct dpsw_tci_cfg *cfg) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_if_set_tci *cmd_params; + u16 tmp_conf = 0; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_TCI, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if_set_tci *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + dpsw_set_field(tmp_conf, VLAN_ID, cfg->vlan_id); + dpsw_set_field(tmp_conf, DEI, cfg->dei); + dpsw_set_field(tmp_conf, PCP, cfg->pcp); + cmd_params->conf = cpu_to_le16(tmp_conf); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_if_get_tci() - Get default VLAN Tag Control Information (TCI) + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface Identifier + * @cfg: Tag Control Information Configuration + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_if_get_tci(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id, + struct dpsw_tci_cfg *cfg) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_if_get_tci *cmd_params; + struct dpsw_rsp_if_get_tci *rsp_params; + int err; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_TCI, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if_get_tci *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + rsp_params = (struct dpsw_rsp_if_get_tci *)cmd.params; + cfg->pcp = rsp_params->pcp; + cfg->dei = rsp_params->dei; + cfg->vlan_id = le16_to_cpu(rsp_params->vlan_id); + + return 0; +} + +/** + * dpsw_if_set_stp() - Function sets Spanning Tree Protocol (STP) state. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface Identifier + * @cfg: STP State configuration parameters + * + * The following STP states are supported - + * blocking, listening, learning, forwarding and disabled. + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_if_set_stp(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id, + const struct dpsw_stp_cfg *cfg) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_if_set_stp *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_STP, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if_set_stp *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + cmd_params->vlan_id = cpu_to_le16(cfg->vlan_id); + dpsw_set_field(cmd_params->state, STATE, cfg->state); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_if_get_counter() - Get specific counter of particular interface + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface Identifier + * @type: Counter type + * @counter: return value + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_if_get_counter(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, enum dpsw_counter type, u64 *counter) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_if_get_counter *cmd_params; + struct dpsw_rsp_if_get_counter *rsp_params; + int err; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_COUNTER, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if_get_counter *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + dpsw_set_field(cmd_params->type, COUNTER_TYPE, type); + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + rsp_params = (struct dpsw_rsp_if_get_counter *)cmd.params; + *counter = le64_to_cpu(rsp_params->counter); + + return 0; +} + +/** + * dpsw_if_enable() - Enable Interface + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface Identifier + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_if *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_ENABLE, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_if_disable() - Disable Interface + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface Identifier + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_if *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_DISABLE, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_if_get_attributes() - Function obtains attributes of interface + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface Identifier + * @attr: Returned interface attributes + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, struct dpsw_if_attr *attr) +{ + struct dpsw_rsp_if_get_attr *rsp_params; + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_if *cmd_params; + int err; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_ATTR, cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + rsp_params = (struct dpsw_rsp_if_get_attr *)cmd.params; + attr->num_tcs = rsp_params->num_tcs; + attr->rate = le32_to_cpu(rsp_params->rate); + attr->options = le32_to_cpu(rsp_params->options); + attr->qdid = le16_to_cpu(rsp_params->qdid); + attr->enabled = dpsw_get_field(rsp_params->conf, ENABLED); + attr->accept_all_vlan = dpsw_get_field(rsp_params->conf, + ACCEPT_ALL_VLAN); + attr->admit_untagged = dpsw_get_field(rsp_params->conf, + ADMIT_UNTAGGED); + + return 0; +} + +/** + * dpsw_if_set_max_frame_length() - Set Maximum Receive frame length. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface Identifier + * @frame_length: Maximum Frame Length + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, u16 frame_length) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_if_set_max_frame_length *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_MAX_FRAME_LENGTH, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if_set_max_frame_length *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + cmd_params->frame_length = cpu_to_le16(frame_length); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_vlan_add() - Adding new VLAN to DPSW. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @vlan_id: VLAN Identifier + * @cfg: VLAN configuration + * + * Only VLAN ID and FDB ID are required parameters here. + * 12 bit VLAN ID is defined in IEEE802.1Q. + * Adding a duplicate VLAN ID is not allowed. + * FDB ID can be shared across multiple VLANs. Shared learning + * is obtained by calling dpsw_vlan_add for multiple VLAN IDs + * with same fdb_id + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_vlan_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id, const struct dpsw_vlan_cfg *cfg) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_vlan_add *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD, + cmd_flags, + token); + cmd_params = (struct dpsw_vlan_add *)cmd.params; + cmd_params->fdb_id = cpu_to_le16(cfg->fdb_id); + cmd_params->vlan_id = cpu_to_le16(vlan_id); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_vlan_add_if() - Adding a set of interfaces to an existing VLAN. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @vlan_id: VLAN Identifier + * @cfg: Set of interfaces to add + * + * It adds only interfaces not belonging to this VLAN yet, + * otherwise an error is generated and an entire command is + * ignored. This function can be called numerous times always + * providing required interfaces delta. + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_vlan_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg) +{ + struct dpsw_cmd_vlan_add_if *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD_IF, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_vlan_add_if *)cmd.params; + cmd_params->vlan_id = cpu_to_le16(vlan_id); + cmd_params->options = cpu_to_le16(cfg->options); + cmd_params->fdb_id = cpu_to_le16(cfg->fdb_id); + build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_vlan_add_if_untagged() - Defining a set of interfaces that should be + * transmitted as untagged. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @vlan_id: VLAN Identifier + * @cfg: Set of interfaces that should be transmitted as untagged + * + * These interfaces should already belong to this VLAN. + * By default all interfaces are transmitted as tagged. + * Providing un-existing interface or untagged interface that is + * configured untagged already generates an error and the entire + * command is ignored. + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_vlan_manage_if *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_ADD_IF_UNTAGGED, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params; + cmd_params->vlan_id = cpu_to_le16(vlan_id); + build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_vlan_remove_if() - Remove interfaces from an existing VLAN. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @vlan_id: VLAN Identifier + * @cfg: Set of interfaces that should be removed + * + * Interfaces must belong to this VLAN, otherwise an error + * is returned and an the command is ignored + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_vlan_manage_if *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE_IF, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params; + cmd_params->vlan_id = cpu_to_le16(vlan_id); + build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_vlan_remove_if_untagged() - Define a set of interfaces that should be + * converted from transmitted as untagged to transmit as tagged. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @vlan_id: VLAN Identifier + * @cfg: Set of interfaces that should be removed + * + * Interfaces provided by API have to belong to this VLAN and + * configured untagged, otherwise an error is returned and the + * command is ignored + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_vlan_manage_if *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE_IF_UNTAGGED, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_vlan_manage_if *)cmd.params; + cmd_params->vlan_id = cpu_to_le16(vlan_id); + build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_vlan_remove() - Remove an entire VLAN + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @vlan_id: VLAN Identifier + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_vlan_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_vlan_remove *cmd_params; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_VLAN_REMOVE, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_vlan_remove *)cmd.params; + cmd_params->vlan_id = cpu_to_le16(vlan_id); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_fdb_add() - Add FDB to switch and Returns handle to FDB table for + * the reference + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @fdb_id: Returned Forwarding Database Identifier + * @cfg: FDB Configuration + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_fdb_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *fdb_id, + const struct dpsw_fdb_cfg *cfg) +{ + struct dpsw_cmd_fdb_add *cmd_params; + struct dpsw_rsp_fdb_add *rsp_params; + struct fsl_mc_command cmd = { 0 }; + int err; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_fdb_add *)cmd.params; + cmd_params->fdb_ageing_time = cpu_to_le16(cfg->fdb_ageing_time); + cmd_params->num_fdb_entries = cpu_to_le16(cfg->num_fdb_entries); + + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + rsp_params = (struct dpsw_rsp_fdb_add *)cmd.params; + *fdb_id = le16_to_cpu(rsp_params->fdb_id); + + return 0; +} + +/** + * dpsw_fdb_remove() - Remove FDB from switch + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @fdb_id: Forwarding Database Identifier + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_fdb_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id) +{ + struct dpsw_cmd_fdb_remove *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_fdb_remove *)cmd.params; + cmd_params->fdb_id = cpu_to_le16(fdb_id); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_fdb_add_unicast() - Function adds an unicast entry into MAC lookup table + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @fdb_id: Forwarding Database Identifier + * @cfg: Unicast entry configuration + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 fdb_id, const struct dpsw_fdb_unicast_cfg *cfg) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_fdb_unicast_op *cmd_params; + int i; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD_UNICAST, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_fdb_unicast_op *)cmd.params; + cmd_params->fdb_id = cpu_to_le16(fdb_id); + cmd_params->if_egress = cpu_to_le16(cfg->if_egress); + for (i = 0; i < 6; i++) + cmd_params->mac_addr[i] = cfg->mac_addr[5 - i]; + dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_fdb_dump() - Dump the content of FDB table into memory. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @fdb_id: Forwarding Database Identifier + * @iova_addr: Data will be stored here as an array of struct fdb_dump_entry + * @iova_size: Memory size allocated at iova_addr + * @num_entries:Number of entries written at iova_addr + * + * Return: Completion status. '0' on Success; Error code otherwise. + * + * The memory allocated at iova_addr must be initialized with zero before + * command execution. If the FDB table does not fit into memory MC will stop + * after the memory is filled up. + * The struct fdb_dump_entry array must be parsed until the end of memory + * area or until an entry with mac_addr set to zero is found. + */ +int dpsw_fdb_dump(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id, + u64 iova_addr, u32 iova_size, u16 *num_entries) +{ + struct dpsw_cmd_fdb_dump *cmd_params; + struct dpsw_rsp_fdb_dump *rsp_params; + struct fsl_mc_command cmd = { 0 }; + int err; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_DUMP, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_fdb_dump *)cmd.params; + cmd_params->fdb_id = cpu_to_le16(fdb_id); + cmd_params->iova_addr = cpu_to_le64(iova_addr); + cmd_params->iova_size = cpu_to_le32(iova_size); + + /* send command to mc */ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + rsp_params = (struct dpsw_rsp_fdb_dump *)cmd.params; + *num_entries = le16_to_cpu(rsp_params->num_entries); + + return 0; +} + +/** + * dpsw_fdb_remove_unicast() - removes an entry from MAC lookup table + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @fdb_id: Forwarding Database Identifier + * @cfg: Unicast entry configuration + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 fdb_id, const struct dpsw_fdb_unicast_cfg *cfg) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_fdb_unicast_op *cmd_params; + int i; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE_UNICAST, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_fdb_unicast_op *)cmd.params; + cmd_params->fdb_id = cpu_to_le16(fdb_id); + for (i = 0; i < 6; i++) + cmd_params->mac_addr[i] = cfg->mac_addr[5 - i]; + cmd_params->if_egress = cpu_to_le16(cfg->if_egress); + dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_fdb_add_multicast() - Add a set of egress interfaces to multi-cast group + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @fdb_id: Forwarding Database Identifier + * @cfg: Multicast entry configuration + * + * If group doesn't exist, it will be created. + * It adds only interfaces not belonging to this multicast group + * yet, otherwise error will be generated and the command is + * ignored. + * This function may be called numerous times always providing + * required interfaces delta. + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_fdb_multicast_op *cmd_params; + int i; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_ADD_MULTICAST, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_fdb_multicast_op *)cmd.params; + cmd_params->fdb_id = cpu_to_le16(fdb_id); + cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs); + dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type); + build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs); + for (i = 0; i < 6; i++) + cmd_params->mac_addr[i] = cfg->mac_addr[5 - i]; + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_fdb_remove_multicast() - Removing interfaces from an existing multicast + * group. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @fdb_id: Forwarding Database Identifier + * @cfg: Multicast entry configuration + * + * Interfaces provided by this API have to exist in the group, + * otherwise an error will be returned and an entire command + * ignored. If there is no interface left in the group, + * an entire group is deleted + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_fdb_multicast_op *cmd_params; + int i; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_FDB_REMOVE_MULTICAST, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_fdb_multicast_op *)cmd.params; + cmd_params->fdb_id = cpu_to_le16(fdb_id); + cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs); + dpsw_set_field(cmd_params->type, ENTRY_TYPE, cfg->type); + build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs); + for (i = 0; i < 6; i++) + cmd_params->mac_addr[i] = cfg->mac_addr[5 - i]; + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_ctrl_if_get_attributes() - Obtain control interface attributes + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @attr: Returned control interface attributes + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, + u16 token, struct dpsw_ctrl_if_attr *attr) +{ + struct dpsw_rsp_ctrl_if_get_attr *rsp_params; + struct fsl_mc_command cmd = { 0 }; + int err; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_GET_ATTR, + cmd_flags, token); + + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + rsp_params = (struct dpsw_rsp_ctrl_if_get_attr *)cmd.params; + attr->rx_fqid = le32_to_cpu(rsp_params->rx_fqid); + attr->rx_err_fqid = le32_to_cpu(rsp_params->rx_err_fqid); + attr->tx_err_conf_fqid = le32_to_cpu(rsp_params->tx_err_conf_fqid); + + return 0; +} + +/** + * dpsw_ctrl_if_set_pools() - Set control interface buffer pools + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @cfg: Buffer pools configuration + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + const struct dpsw_ctrl_if_pools_cfg *cfg) +{ + struct dpsw_cmd_ctrl_if_set_pools *cmd_params; + struct fsl_mc_command cmd = { 0 }; + int i; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_SET_POOLS, + cmd_flags, token); + cmd_params = (struct dpsw_cmd_ctrl_if_set_pools *)cmd.params; + cmd_params->num_dpbp = cfg->num_dpbp; + for (i = 0; i < DPSW_MAX_DPBP; i++) { + cmd_params->dpbp_id[i] = cpu_to_le32(cfg->pools[i].dpbp_id); + cmd_params->buffer_size[i] = + cpu_to_le16(cfg->pools[i].buffer_size); + cmd_params->backup_pool_mask |= + DPSW_BACKUP_POOL(cfg->pools[i].backup_pool, i); + } + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_ctrl_if_set_queue() - Set Rx queue configuration + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of dpsw object + * @qtype: dpsw_queue_type of the targeted queue + * @cfg: Rx queue configuration + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_ctrl_if_set_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + enum dpsw_queue_type qtype, + const struct dpsw_ctrl_if_queue_cfg *cfg) +{ + struct dpsw_cmd_ctrl_if_set_queue *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_SET_QUEUE, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_ctrl_if_set_queue *)cmd.params; + cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id); + cmd_params->dest_priority = cfg->dest_cfg.priority; + cmd_params->qtype = qtype; + cmd_params->user_ctx = cpu_to_le64(cfg->user_ctx); + cmd_params->options = cpu_to_le32(cfg->options); + dpsw_set_field(cmd_params->dest_type, + DEST_TYPE, + cfg->dest_cfg.dest_type); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_get_api_version() - Get Data Path Switch API version + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @major_ver: Major version of data path switch API + * @minor_ver: Minor version of data path switch API + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_get_api_version(struct fsl_mc_io *mc_io, u32 cmd_flags, + u16 *major_ver, u16 *minor_ver) +{ + struct fsl_mc_command cmd = { 0 }; + struct dpsw_rsp_get_api_version *rsp_params; + int err; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_GET_API_VERSION, + cmd_flags, + 0); + + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + rsp_params = (struct dpsw_rsp_get_api_version *)cmd.params; + *major_ver = le16_to_cpu(rsp_params->version_major); + *minor_ver = le16_to_cpu(rsp_params->version_minor); + + return 0; +} + +/** + * dpsw_if_get_port_mac_addr() - Retrieve MAC address associated to the physical port + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: Interface Identifier + * @mac_addr: MAC address of the physical port, if any, otherwise 0 + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_if_get_port_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, u8 mac_addr[6]) +{ + struct dpsw_rsp_if_get_mac_addr *rsp_params; + struct fsl_mc_command cmd = { 0 }; + struct dpsw_cmd_if *cmd_params; + int err, i; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_GET_PORT_MAC_ADDR, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + + /* send command to mc*/ + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + /* retrieve response parameters */ + rsp_params = (struct dpsw_rsp_if_get_mac_addr *)cmd.params; + for (i = 0; i < 6; i++) + mac_addr[5 - i] = rsp_params->mac_addr[i]; + + return 0; +} + +/** + * dpsw_ctrl_if_enable() - Enable control interface + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_ctrl_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_ENABLE, cmd_flags, + token); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_ctrl_if_disable() - Function disables control interface + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token) +{ + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_CTRL_IF_DISABLE, + cmd_flags, + token); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_set_egress_flood() - Set egress parameters associated with an FDB ID + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @cfg: Egress flooding configuration + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + const struct dpsw_egress_flood_cfg *cfg) +{ + struct dpsw_cmd_set_egress_flood *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_SET_EGRESS_FLOOD, cmd_flags, token); + cmd_params = (struct dpsw_cmd_set_egress_flood *)cmd.params; + cmd_params->fdb_id = cpu_to_le16(cfg->fdb_id); + cmd_params->flood_type = cfg->flood_type; + build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_if_set_learning_mode() - Configure the learning mode on an interface. + * If this API is used, it will take precedence over the FDB configuration. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @if_id: InterfaceID + * @mode: Learning mode + * + * Return: Completion status. '0' on Success; Error code otherwise. + */ +int dpsw_if_set_learning_mode(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, enum dpsw_learning_mode mode) +{ + struct dpsw_cmd_if_set_learning_mode *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_IF_SET_LEARNING_MODE, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_if_set_learning_mode *)cmd.params; + cmd_params->if_id = cpu_to_le16(if_id); + dpsw_set_field(cmd_params->mode, LEARNING_MODE, mode); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_acl_add() - Create an ACL table + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @acl_id: Returned ACL ID, for future references + * @cfg: ACL configuration + * + * Create Access Control List table. Multiple ACLs can be created and + * co-exist in L2 switch + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_acl_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *acl_id, + const struct dpsw_acl_cfg *cfg) +{ + struct dpsw_cmd_acl_add *cmd_params; + struct dpsw_rsp_acl_add *rsp_params; + struct fsl_mc_command cmd = { 0 }; + int err; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_ADD, cmd_flags, token); + cmd_params = (struct dpsw_cmd_acl_add *)cmd.params; + cmd_params->max_entries = cpu_to_le16(cfg->max_entries); + + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + rsp_params = (struct dpsw_rsp_acl_add *)cmd.params; + *acl_id = le16_to_cpu(rsp_params->acl_id); + + return 0; +} + +/** + * dpsw_acl_remove() - Remove an ACL table from L2 switch. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @acl_id: ACL ID + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_acl_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id) +{ + struct dpsw_cmd_acl_remove *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_REMOVE, cmd_flags, + token); + cmd_params = (struct dpsw_cmd_acl_remove *)cmd.params; + cmd_params->acl_id = cpu_to_le16(acl_id); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_acl_add_if() - Associate interface/interfaces with an ACL table. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @acl_id: ACL ID + * @cfg: Interfaces list + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_acl_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_if_cfg *cfg) +{ + struct dpsw_cmd_acl_if *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_ADD_IF, cmd_flags, + token); + cmd_params = (struct dpsw_cmd_acl_if *)cmd.params; + cmd_params->acl_id = cpu_to_le16(acl_id); + cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs); + build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_acl_remove_if() - De-associate interface/interfaces from an ACL table + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @acl_id: ACL ID + * @cfg: Interfaces list + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_acl_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_if_cfg *cfg) +{ + struct dpsw_cmd_acl_if *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_REMOVE_IF, cmd_flags, + token); + cmd_params = (struct dpsw_cmd_acl_if *)cmd.params; + cmd_params->acl_id = cpu_to_le16(acl_id); + cmd_params->num_ifs = cpu_to_le16(cfg->num_ifs); + build_if_id_bitmap(&cmd_params->if_id, cfg->if_id, cfg->num_ifs); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_acl_prepare_entry_cfg() - Setup an ACL entry + * @key: Key + * @entry_cfg_buf: Zeroed 256 bytes of memory before mapping it to DMA + * + * This function has to be called before adding or removing acl_entry + * + */ +void dpsw_acl_prepare_entry_cfg(const struct dpsw_acl_key *key, + u8 *entry_cfg_buf) +{ + struct dpsw_prep_acl_entry *ext_params; + int i; + + ext_params = (struct dpsw_prep_acl_entry *)entry_cfg_buf; + + for (i = 0; i < 6; i++) { + ext_params->match_l2_dest_mac[i] = key->match.l2_dest_mac[5 - i]; + ext_params->match_l2_source_mac[i] = key->match.l2_source_mac[5 - i]; + ext_params->mask_l2_dest_mac[i] = key->mask.l2_dest_mac[5 - i]; + ext_params->mask_l2_source_mac[i] = key->mask.l2_source_mac[5 - i]; + } + + ext_params->match_l2_tpid = cpu_to_le16(key->match.l2_tpid); + ext_params->match_l2_vlan_id = cpu_to_le16(key->match.l2_vlan_id); + ext_params->match_l3_dest_ip = cpu_to_le32(key->match.l3_dest_ip); + ext_params->match_l3_source_ip = cpu_to_le32(key->match.l3_source_ip); + ext_params->match_l4_dest_port = cpu_to_le16(key->match.l4_dest_port); + ext_params->match_l4_source_port = cpu_to_le16(key->match.l4_source_port); + ext_params->match_l2_ether_type = cpu_to_le16(key->match.l2_ether_type); + ext_params->match_l2_pcp_dei = key->match.l2_pcp_dei; + ext_params->match_l3_dscp = key->match.l3_dscp; + + ext_params->mask_l2_tpid = cpu_to_le16(key->mask.l2_tpid); + ext_params->mask_l2_vlan_id = cpu_to_le16(key->mask.l2_vlan_id); + ext_params->mask_l3_dest_ip = cpu_to_le32(key->mask.l3_dest_ip); + ext_params->mask_l3_source_ip = cpu_to_le32(key->mask.l3_source_ip); + ext_params->mask_l4_dest_port = cpu_to_le16(key->mask.l4_dest_port); + ext_params->mask_l4_source_port = cpu_to_le16(key->mask.l4_source_port); + ext_params->mask_l2_ether_type = cpu_to_le16(key->mask.l2_ether_type); + ext_params->mask_l2_pcp_dei = key->mask.l2_pcp_dei; + ext_params->mask_l3_dscp = key->mask.l3_dscp; + ext_params->match_l3_protocol = key->match.l3_protocol; + ext_params->mask_l3_protocol = key->mask.l3_protocol; +} + +/** + * dpsw_acl_add_entry() - Add a rule to the ACL table. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @acl_id: ACL ID + * @cfg: Entry configuration + * + * warning: This function has to be called after dpsw_acl_prepare_entry_cfg() + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_entry_cfg *cfg) +{ + struct dpsw_cmd_acl_entry *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_ADD_ENTRY, cmd_flags, + token); + cmd_params = (struct dpsw_cmd_acl_entry *)cmd.params; + cmd_params->acl_id = cpu_to_le16(acl_id); + cmd_params->result_if_id = cpu_to_le16(cfg->result.if_id); + cmd_params->precedence = cpu_to_le32(cfg->precedence); + cmd_params->key_iova = cpu_to_le64(cfg->key_iova); + dpsw_set_field(cmd_params->result_action, + RESULT_ACTION, + cfg->result.action); + + return mc_send_command(mc_io, &cmd); +} + +/** + * dpsw_acl_remove_entry() - Removes an entry from ACL. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPSW object + * @acl_id: ACL ID + * @cfg: Entry configuration + * + * warning: This function has to be called after dpsw_acl_set_entry_cfg() + * + * Return: '0' on Success; Error code otherwise. + */ +int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_entry_cfg *cfg) +{ + struct dpsw_cmd_acl_entry *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPSW_CMDID_ACL_REMOVE_ENTRY, + cmd_flags, + token); + cmd_params = (struct dpsw_cmd_acl_entry *)cmd.params; + cmd_params->acl_id = cpu_to_le16(acl_id); + cmd_params->result_if_id = cpu_to_le16(cfg->result.if_id); + cmd_params->precedence = cpu_to_le32(cfg->precedence); + cmd_params->key_iova = cpu_to_le64(cfg->key_iova); + dpsw_set_field(cmd_params->result_action, + RESULT_ACTION, + cfg->result.action); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h new file mode 100644 index 000000000000..5ef221a25b02 --- /dev/null +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h @@ -0,0 +1,755 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2014-2016 Freescale Semiconductor Inc. + * Copyright 2017-2021 NXP + * + */ + +#ifndef __FSL_DPSW_H +#define __FSL_DPSW_H + +/* Data Path L2-Switch API + * Contains API for handling DPSW topology and functionality + */ + +struct fsl_mc_io; + +/* DPSW general definitions */ + +#define DPSW_MAX_PRIORITIES 8 + +#define DPSW_MAX_IF 64 + +int dpsw_open(struct fsl_mc_io *mc_io, u32 cmd_flags, int dpsw_id, u16 *token); + +int dpsw_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); + +/* DPSW options */ + +/** + * DPSW_OPT_FLOODING_DIS - Flooding was disabled at device create + */ +#define DPSW_OPT_FLOODING_DIS 0x0000000000000001ULL +/** + * DPSW_OPT_MULTICAST_DIS - Multicast was disabled at device create + */ +#define DPSW_OPT_MULTICAST_DIS 0x0000000000000004ULL +/** + * DPSW_OPT_CTRL_IF_DIS - Control interface support is disabled + */ +#define DPSW_OPT_CTRL_IF_DIS 0x0000000000000010ULL + +/** + * enum dpsw_component_type - component type of a bridge + * @DPSW_COMPONENT_TYPE_C_VLAN: A C-VLAN component of an + * enterprise VLAN bridge or of a Provider Bridge used + * to process C-tagged frames + * @DPSW_COMPONENT_TYPE_S_VLAN: An S-VLAN component of a + * Provider Bridge + * + */ +enum dpsw_component_type { + DPSW_COMPONENT_TYPE_C_VLAN = 0, + DPSW_COMPONENT_TYPE_S_VLAN +}; + +/** + * enum dpsw_flooding_cfg - flooding configuration requested + * @DPSW_FLOODING_PER_VLAN: Flooding replicators are allocated per VLAN and + * interfaces present in each of them can be configured using + * dpsw_vlan_add_if_flooding()/dpsw_vlan_remove_if_flooding(). + * This is the default configuration. + * + * @DPSW_FLOODING_PER_FDB: Flooding replicators are allocated per FDB and + * interfaces present in each of them can be configured using + * dpsw_set_egress_flood(). + */ +enum dpsw_flooding_cfg { + DPSW_FLOODING_PER_VLAN = 0, + DPSW_FLOODING_PER_FDB, +}; + +/** + * enum dpsw_broadcast_cfg - broadcast configuration requested + * @DPSW_BROADCAST_PER_OBJECT: There is only one broadcast replicator per DPSW + * object. This is the default configuration. + * @DPSW_BROADCAST_PER_FDB: Broadcast replicators are allocated per FDB and + * interfaces present in each of them can be configured using + * dpsw_set_egress_flood(). + */ +enum dpsw_broadcast_cfg { + DPSW_BROADCAST_PER_OBJECT = 0, + DPSW_BROADCAST_PER_FDB, +}; + +int dpsw_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); + +int dpsw_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); + +int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); + +/* DPSW IRQ Index and Events */ + +#define DPSW_IRQ_INDEX_IF 0x0000 +#define DPSW_IRQ_INDEX_L2SW 0x0001 + +/** + * DPSW_IRQ_EVENT_LINK_CHANGED - Indicates that the link state changed + */ +#define DPSW_IRQ_EVENT_LINK_CHANGED 0x0001 + +/** + * struct dpsw_irq_cfg - IRQ configuration + * @addr: Address that must be written to signal a message-based interrupt + * @val: Value to write into irq_addr address + * @irq_num: A user defined number associated with this IRQ + */ +struct dpsw_irq_cfg { + u64 addr; + u32 val; + int irq_num; +}; + +int dpsw_set_irq_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u8 irq_index, u8 en); + +int dpsw_set_irq_mask(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u8 irq_index, u32 mask); + +int dpsw_get_irq_status(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u8 irq_index, u32 *status); + +int dpsw_clear_irq_status(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u8 irq_index, u32 status); + +/** + * struct dpsw_attr - Structure representing DPSW attributes + * @id: DPSW object ID + * @options: Enable/Disable DPSW features + * @max_vlans: Maximum Number of VLANs + * @max_meters_per_if: Number of meters per interface + * @max_fdbs: Maximum Number of FDBs + * @max_fdb_entries: Number of FDB entries for default FDB table; + * 0 - indicates default 1024 entries. + * @fdb_aging_time: Default FDB aging time for default FDB table; + * 0 - indicates default 300 seconds + * @max_fdb_mc_groups: Number of multicast groups in each FDB table; + * 0 - indicates default 32 + * @mem_size: DPSW frame storage memory size + * @num_ifs: Number of interfaces + * @num_vlans: Current number of VLANs + * @num_fdbs: Current number of FDBs + * @component_type: Component type of this bridge + * @flooding_cfg: Flooding configuration (PER_VLAN - default, PER_FDB) + * @broadcast_cfg: Broadcast configuration (PER_OBJECT - default, PER_FDB) + */ +struct dpsw_attr { + int id; + u64 options; + u16 max_vlans; + u8 max_meters_per_if; + u8 max_fdbs; + u16 max_fdb_entries; + u16 fdb_aging_time; + u16 max_fdb_mc_groups; + u16 num_ifs; + u16 mem_size; + u16 num_vlans; + u8 num_fdbs; + enum dpsw_component_type component_type; + enum dpsw_flooding_cfg flooding_cfg; + enum dpsw_broadcast_cfg broadcast_cfg; +}; + +int dpsw_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + struct dpsw_attr *attr); + +/** + * struct dpsw_ctrl_if_attr - Control interface attributes + * @rx_fqid: Receive FQID + * @rx_err_fqid: Receive error FQID + * @tx_err_conf_fqid: Transmit error and confirmation FQID + */ +struct dpsw_ctrl_if_attr { + u32 rx_fqid; + u32 rx_err_fqid; + u32 tx_err_conf_fqid; +}; + +int dpsw_ctrl_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, + u16 token, struct dpsw_ctrl_if_attr *attr); + +enum dpsw_queue_type { + DPSW_QUEUE_RX, + DPSW_QUEUE_TX_ERR_CONF, + DPSW_QUEUE_RX_ERR, +}; + +#define DPSW_MAX_DPBP 8 + +/** + * struct dpsw_ctrl_if_pools_cfg - Control interface buffer pools configuration + * @num_dpbp: Number of DPBPs + * @pools: Array of buffer pools parameters; The number of valid entries + * must match 'num_dpbp' value + * @pools.dpbp_id: DPBP object ID + * @pools.buffer_size: Buffer size + * @pools.backup_pool: Backup pool + */ +struct dpsw_ctrl_if_pools_cfg { + u8 num_dpbp; + struct { + int dpbp_id; + u16 buffer_size; + int backup_pool; + } pools[DPSW_MAX_DPBP]; +}; + +int dpsw_ctrl_if_set_pools(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + const struct dpsw_ctrl_if_pools_cfg *cfg); + +#define DPSW_CTRL_IF_QUEUE_OPT_USER_CTX 0x00000001 +#define DPSW_CTRL_IF_QUEUE_OPT_DEST 0x00000002 + +enum dpsw_ctrl_if_dest { + DPSW_CTRL_IF_DEST_NONE = 0, + DPSW_CTRL_IF_DEST_DPIO = 1, +}; + +struct dpsw_ctrl_if_dest_cfg { + enum dpsw_ctrl_if_dest dest_type; + int dest_id; + u8 priority; +}; + +struct dpsw_ctrl_if_queue_cfg { + u32 options; + u64 user_ctx; + struct dpsw_ctrl_if_dest_cfg dest_cfg; +}; + +int dpsw_ctrl_if_set_queue(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + enum dpsw_queue_type qtype, + const struct dpsw_ctrl_if_queue_cfg *cfg); + +int dpsw_ctrl_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); + +int dpsw_ctrl_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); + +/** + * enum dpsw_action - Action selection for special/control frames + * @DPSW_ACTION_DROP: Drop frame + * @DPSW_ACTION_REDIRECT: Redirect frame to control port + */ +enum dpsw_action { + DPSW_ACTION_DROP = 0, + DPSW_ACTION_REDIRECT = 1 +}; + +#define DPSW_LINK_OPT_AUTONEG 0x0000000000000001ULL +#define DPSW_LINK_OPT_HALF_DUPLEX 0x0000000000000002ULL +#define DPSW_LINK_OPT_PAUSE 0x0000000000000004ULL +#define DPSW_LINK_OPT_ASYM_PAUSE 0x0000000000000008ULL + +/** + * struct dpsw_link_cfg - Structure representing DPSW link configuration + * @rate: Rate + * @options: Mask of available options; use 'DPSW_LINK_OPT_<X>' values + */ +struct dpsw_link_cfg { + u32 rate; + u64 options; +}; + +int dpsw_if_set_link_cfg(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id, + struct dpsw_link_cfg *cfg); + +/** + * struct dpsw_link_state - Structure representing DPSW link state + * @rate: Rate + * @options: Mask of available options; use 'DPSW_LINK_OPT_<X>' values + * @up: 0 - covers two cases: down and disconnected, 1 - up + */ +struct dpsw_link_state { + u32 rate; + u64 options; + u8 up; +}; + +int dpsw_if_get_link_state(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, struct dpsw_link_state *state); + +/** + * struct dpsw_tci_cfg - Tag Control Information (TCI) configuration + * @pcp: Priority Code Point (PCP): a 3-bit field which refers + * to the IEEE 802.1p priority + * @dei: Drop Eligible Indicator (DEI): a 1-bit field. May be used + * separately or in conjunction with PCP to indicate frames + * eligible to be dropped in the presence of congestion + * @vlan_id: VLAN Identifier (VID): a 12-bit field specifying the VLAN + * to which the frame belongs. The hexadecimal values + * of 0x000 and 0xFFF are reserved; + * all other values may be used as VLAN identifiers, + * allowing up to 4,094 VLANs + */ +struct dpsw_tci_cfg { + u8 pcp; + u8 dei; + u16 vlan_id; +}; + +int dpsw_if_set_tci(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id, + const struct dpsw_tci_cfg *cfg); + +int dpsw_if_get_tci(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id, + struct dpsw_tci_cfg *cfg); + +/** + * enum dpsw_stp_state - Spanning Tree Protocol (STP) states + * @DPSW_STP_STATE_DISABLED: Disabled state + * @DPSW_STP_STATE_LISTENING: Listening state + * @DPSW_STP_STATE_LEARNING: Learning state + * @DPSW_STP_STATE_FORWARDING: Forwarding state + * @DPSW_STP_STATE_BLOCKING: Blocking state + * + */ +enum dpsw_stp_state { + DPSW_STP_STATE_DISABLED = 0, + DPSW_STP_STATE_LISTENING = 1, + DPSW_STP_STATE_LEARNING = 2, + DPSW_STP_STATE_FORWARDING = 3, + DPSW_STP_STATE_BLOCKING = 0 +}; + +/** + * struct dpsw_stp_cfg - Spanning Tree Protocol (STP) Configuration + * @vlan_id: VLAN ID STP state + * @state: STP state + */ +struct dpsw_stp_cfg { + u16 vlan_id; + enum dpsw_stp_state state; +}; + +int dpsw_if_set_stp(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id, + const struct dpsw_stp_cfg *cfg); + +/** + * enum dpsw_accepted_frames - Types of frames to accept + * @DPSW_ADMIT_ALL: The device accepts VLAN tagged, untagged and + * priority tagged frames + * @DPSW_ADMIT_ONLY_VLAN_TAGGED: The device discards untagged frames or + * Priority-Tagged frames received on this interface. + * + */ +enum dpsw_accepted_frames { + DPSW_ADMIT_ALL = 1, + DPSW_ADMIT_ONLY_VLAN_TAGGED = 3 +}; + +/** + * enum dpsw_counter - Counters types + * @DPSW_CNT_ING_FRAME: Counts ingress frames + * @DPSW_CNT_ING_BYTE: Counts ingress bytes + * @DPSW_CNT_ING_FLTR_FRAME: Counts filtered ingress frames + * @DPSW_CNT_ING_FRAME_DISCARD: Counts discarded ingress frame + * @DPSW_CNT_ING_MCAST_FRAME: Counts ingress multicast frames + * @DPSW_CNT_ING_MCAST_BYTE: Counts ingress multicast bytes + * @DPSW_CNT_ING_BCAST_FRAME: Counts ingress broadcast frames + * @DPSW_CNT_ING_BCAST_BYTES: Counts ingress broadcast bytes + * @DPSW_CNT_EGR_FRAME: Counts egress frames + * @DPSW_CNT_EGR_BYTE: Counts egress bytes + * @DPSW_CNT_EGR_FRAME_DISCARD: Counts discarded egress frames + * @DPSW_CNT_EGR_STP_FRAME_DISCARD: Counts egress STP discarded frames + * @DPSW_CNT_ING_NO_BUFF_DISCARD: Counts ingress no buffer discarded frames + */ +enum dpsw_counter { + DPSW_CNT_ING_FRAME = 0x0, + DPSW_CNT_ING_BYTE = 0x1, + DPSW_CNT_ING_FLTR_FRAME = 0x2, + DPSW_CNT_ING_FRAME_DISCARD = 0x3, + DPSW_CNT_ING_MCAST_FRAME = 0x4, + DPSW_CNT_ING_MCAST_BYTE = 0x5, + DPSW_CNT_ING_BCAST_FRAME = 0x6, + DPSW_CNT_ING_BCAST_BYTES = 0x7, + DPSW_CNT_EGR_FRAME = 0x8, + DPSW_CNT_EGR_BYTE = 0x9, + DPSW_CNT_EGR_FRAME_DISCARD = 0xa, + DPSW_CNT_EGR_STP_FRAME_DISCARD = 0xb, + DPSW_CNT_ING_NO_BUFF_DISCARD = 0xc, +}; + +int dpsw_if_get_counter(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, enum dpsw_counter type, u64 *counter); + +int dpsw_if_enable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id); + +int dpsw_if_disable(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 if_id); + +/** + * struct dpsw_if_attr - Structure representing DPSW interface attributes + * @num_tcs: Number of traffic classes + * @rate: Transmit rate in bits per second + * @options: Interface configuration options (bitmap) + * @enabled: Indicates if interface is enabled + * @accept_all_vlan: The device discards/accepts incoming frames + * for VLANs that do not include this interface + * @admit_untagged: When set to 'DPSW_ADMIT_ONLY_VLAN_TAGGED', the device + * discards untagged frames or priority-tagged frames received on + * this interface; + * When set to 'DPSW_ADMIT_ALL', untagged frames or priority- + * tagged frames received on this interface are accepted + * @qdid: control frames transmit qdid + */ +struct dpsw_if_attr { + u8 num_tcs; + u32 rate; + u32 options; + int enabled; + int accept_all_vlan; + enum dpsw_accepted_frames admit_untagged; + u16 qdid; +}; + +int dpsw_if_get_attributes(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, struct dpsw_if_attr *attr); + +int dpsw_if_set_max_frame_length(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, u16 frame_length); + +/** + * struct dpsw_vlan_cfg - VLAN Configuration + * @fdb_id: Forwarding Data Base + */ +struct dpsw_vlan_cfg { + u16 fdb_id; +}; + +int dpsw_vlan_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id, const struct dpsw_vlan_cfg *cfg); + +#define DPSW_VLAN_ADD_IF_OPT_FDB_ID 0x0001 + +/** + * struct dpsw_vlan_if_cfg - Set of VLAN Interfaces + * @num_ifs: The number of interfaces that are assigned to the egress + * list for this VLAN + * @if_id: The set of interfaces that are + * assigned to the egress list for this VLAN + * @options: Options map for this command (DPSW_VLAN_ADD_IF_OPT_FDB_ID) + * @fdb_id: FDB id to be used by this VLAN on these specific interfaces + * (taken into account only if the DPSW_VLAN_ADD_IF_OPT_FDB_ID is + * specified in the options field) + */ +struct dpsw_vlan_if_cfg { + u16 num_ifs; + u16 options; + u16 if_id[DPSW_MAX_IF]; + u16 fdb_id; +}; + +int dpsw_vlan_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg); + +int dpsw_vlan_add_if_untagged(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg); + +int dpsw_vlan_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg); + +int dpsw_vlan_remove_if_untagged(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id, const struct dpsw_vlan_if_cfg *cfg); + +int dpsw_vlan_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id); + +/** + * enum dpsw_fdb_entry_type - FDB Entry type - Static/Dynamic + * @DPSW_FDB_ENTRY_STATIC: Static entry + * @DPSW_FDB_ENTRY_DINAMIC: Dynamic entry + */ +enum dpsw_fdb_entry_type { + DPSW_FDB_ENTRY_STATIC = 0, + DPSW_FDB_ENTRY_DINAMIC = 1 +}; + +/** + * struct dpsw_fdb_unicast_cfg - Unicast entry configuration + * @type: Select static or dynamic entry + * @mac_addr: MAC address + * @if_egress: Egress interface ID + */ +struct dpsw_fdb_unicast_cfg { + enum dpsw_fdb_entry_type type; + u8 mac_addr[6]; + u16 if_egress; +}; + +int dpsw_fdb_add_unicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 fdb_id, const struct dpsw_fdb_unicast_cfg *cfg); + +int dpsw_fdb_remove_unicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 fdb_id, const struct dpsw_fdb_unicast_cfg *cfg); + +#define DPSW_FDB_ENTRY_TYPE_DYNAMIC BIT(0) +#define DPSW_FDB_ENTRY_TYPE_UNICAST BIT(1) + +/** + * struct fdb_dump_entry - fdb snapshot entry + * @mac_addr: MAC address + * @type: bit0 - DINAMIC(1)/STATIC(0), bit1 - UNICAST(1)/MULTICAST(0) + * @if_info: unicast - egress interface, multicast - number of egress interfaces + * @if_mask: multicast - egress interface mask + */ +struct fdb_dump_entry { + u8 mac_addr[6]; + u8 type; + u8 if_info; + u8 if_mask[8]; +}; + +int dpsw_fdb_dump(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id, + u64 iova_addr, u32 iova_size, u16 *num_entries); + +/** + * struct dpsw_fdb_multicast_cfg - Multi-cast entry configuration + * @type: Select static or dynamic entry + * @mac_addr: MAC address + * @num_ifs: Number of external and internal interfaces + * @if_id: Egress interface IDs + */ +struct dpsw_fdb_multicast_cfg { + enum dpsw_fdb_entry_type type; + u8 mac_addr[6]; + u16 num_ifs; + u16 if_id[DPSW_MAX_IF]; +}; + +int dpsw_fdb_add_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg); + +int dpsw_fdb_remove_multicast(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 fdb_id, const struct dpsw_fdb_multicast_cfg *cfg); + +/** + * enum dpsw_learning_mode - Auto-learning modes + * @DPSW_LEARNING_MODE_DIS: Disable Auto-learning + * @DPSW_LEARNING_MODE_HW: Enable HW auto-Learning + * @DPSW_LEARNING_MODE_NON_SECURE: Enable None secure learning by CPU + * @DPSW_LEARNING_MODE_SECURE: Enable secure learning by CPU + * + * NONE - SECURE LEARNING + * SMAC found DMAC found CTLU Action + * v v Forward frame to + * 1. DMAC destination + * - v Forward frame to + * 1. DMAC destination + * 2. Control interface + * v - Forward frame to + * 1. Flooding list of interfaces + * - - Forward frame to + * 1. Flooding list of interfaces + * 2. Control interface + * SECURE LEARING + * SMAC found DMAC found CTLU Action + * v v Forward frame to + * 1. DMAC destination + * - v Forward frame to + * 1. Control interface + * v - Forward frame to + * 1. Flooding list of interfaces + * - - Forward frame to + * 1. Control interface + */ +enum dpsw_learning_mode { + DPSW_LEARNING_MODE_DIS = 0, + DPSW_LEARNING_MODE_HW = 1, + DPSW_LEARNING_MODE_NON_SECURE = 2, + DPSW_LEARNING_MODE_SECURE = 3 +}; + +/** + * struct dpsw_fdb_attr - FDB Attributes + * @max_fdb_entries: Number of FDB entries + * @fdb_ageing_time: Ageing time in seconds + * @learning_mode: Learning mode + * @num_fdb_mc_groups: Current number of multicast groups + * @max_fdb_mc_groups: Maximum number of multicast groups + */ +struct dpsw_fdb_attr { + u16 max_fdb_entries; + u16 fdb_ageing_time; + enum dpsw_learning_mode learning_mode; + u16 num_fdb_mc_groups; + u16 max_fdb_mc_groups; +}; + +int dpsw_get_api_version(struct fsl_mc_io *mc_io, u32 cmd_flags, + u16 *major_ver, u16 *minor_ver); + +int dpsw_if_get_port_mac_addr(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, u8 mac_addr[6]); + +/** + * struct dpsw_fdb_cfg - FDB Configuration + * @num_fdb_entries: Number of FDB entries + * @fdb_ageing_time: Ageing time in seconds + */ +struct dpsw_fdb_cfg { + u16 num_fdb_entries; + u16 fdb_ageing_time; +}; + +int dpsw_fdb_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *fdb_id, + const struct dpsw_fdb_cfg *cfg); + +int dpsw_fdb_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 fdb_id); + +/** + * enum dpsw_flood_type - Define the flood type of a DPSW object + * @DPSW_BROADCAST: Broadcast flooding + * @DPSW_FLOODING: Unknown flooding + */ +enum dpsw_flood_type { + DPSW_BROADCAST = 0, + DPSW_FLOODING, +}; + +struct dpsw_egress_flood_cfg { + u16 fdb_id; + enum dpsw_flood_type flood_type; + u16 num_ifs; + u16 if_id[DPSW_MAX_IF]; +}; + +int dpsw_set_egress_flood(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + const struct dpsw_egress_flood_cfg *cfg); + +int dpsw_if_set_learning_mode(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 if_id, enum dpsw_learning_mode mode); + +/** + * struct dpsw_acl_cfg - ACL Configuration + * @max_entries: Number of ACL rules + */ +struct dpsw_acl_cfg { + u16 max_entries; +}; + +int dpsw_acl_add(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, u16 *acl_id, + const struct dpsw_acl_cfg *cfg); + +int dpsw_acl_remove(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id); + +/** + * struct dpsw_acl_if_cfg - List of interfaces to associate with an ACL table + * @num_ifs: Number of interfaces + * @if_id: List of interfaces + */ +struct dpsw_acl_if_cfg { + u16 num_ifs; + u16 if_id[DPSW_MAX_IF]; +}; + +int dpsw_acl_add_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_if_cfg *cfg); + +int dpsw_acl_remove_if(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_if_cfg *cfg); + +/** + * struct dpsw_acl_fields - ACL fields. + * @l2_dest_mac: Destination MAC address: BPDU, Multicast, Broadcast, Unicast, + * slow protocols, MVRP, STP + * @l2_source_mac: Source MAC address + * @l2_tpid: Layer 2 (Ethernet) protocol type, used to identify the following + * protocols: MPLS, PTP, PFC, ARP, Jumbo frames, LLDP, IEEE802.1ae, + * Q-in-Q, IPv4, IPv6, PPPoE + * @l2_pcp_dei: indicate which protocol is encapsulated in the payload + * @l2_vlan_id: layer 2 VLAN ID + * @l2_ether_type: layer 2 Ethernet type + * @l3_dscp: Layer 3 differentiated services code point + * @l3_protocol: Tells the Network layer at the destination host, to which + * Protocol this packet belongs to. The following protocol are + * supported: ICMP, IGMP, IPv4 (encapsulation), TCP, IPv6 + * (encapsulation), GRE, PTP + * @l3_source_ip: Source IPv4 IP + * @l3_dest_ip: Destination IPv4 IP + * @l4_source_port: Source TCP/UDP Port + * @l4_dest_port: Destination TCP/UDP Port + */ +struct dpsw_acl_fields { + u8 l2_dest_mac[6]; + u8 l2_source_mac[6]; + u16 l2_tpid; + u8 l2_pcp_dei; + u16 l2_vlan_id; + u16 l2_ether_type; + u8 l3_dscp; + u8 l3_protocol; + u32 l3_source_ip; + u32 l3_dest_ip; + u16 l4_source_port; + u16 l4_dest_port; +}; + +/** + * struct dpsw_acl_key - ACL key + * @match: Match fields + * @mask: Mask: b'1 - valid, b'0 don't care + */ +struct dpsw_acl_key { + struct dpsw_acl_fields match; + struct dpsw_acl_fields mask; +}; + +/** + * enum dpsw_acl_action - action to be run on the ACL rule match + * @DPSW_ACL_ACTION_DROP: Drop frame + * @DPSW_ACL_ACTION_REDIRECT: Redirect to certain port + * @DPSW_ACL_ACTION_ACCEPT: Accept frame + * @DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF: Redirect to control interface + */ +enum dpsw_acl_action { + DPSW_ACL_ACTION_DROP, + DPSW_ACL_ACTION_REDIRECT, + DPSW_ACL_ACTION_ACCEPT, + DPSW_ACL_ACTION_REDIRECT_TO_CTRL_IF +}; + +/** + * struct dpsw_acl_result - ACL action + * @action: Action should be taken when ACL entry hit + * @if_id: Interface IDs to redirect frame. Valid only if redirect selected for + * action + */ +struct dpsw_acl_result { + enum dpsw_acl_action action; + u16 if_id; +}; + +/** + * struct dpsw_acl_entry_cfg - ACL entry + * @key_iova: I/O virtual address of DMA-able memory filled with key after call + * to dpsw_acl_prepare_entry_cfg() + * @result: Required action when entry hit occurs + * @precedence: Precedence inside ACL 0 is lowest; This priority can not change + * during the lifetime of a Policy. It is user responsibility to + * space the priorities according to consequent rule additions. + */ +struct dpsw_acl_entry_cfg { + u64 key_iova; + struct dpsw_acl_result result; + int precedence; +}; + +void dpsw_acl_prepare_entry_cfg(const struct dpsw_acl_key *key, + u8 *entry_cfg_buf); + +int dpsw_acl_add_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_entry_cfg *cfg); + +int dpsw_acl_remove_entry(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 acl_id, const struct dpsw_acl_entry_cfg *cfg); +#endif /* __FSL_DPSW_H */ diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index ab92382c399a..cdc0ff89388a 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -2,6 +2,7 @@ config FSL_ENETC tristate "ENETC PF driver" depends on PCI && PCI_MSI + select FSL_ENETC_IERB select FSL_ENETC_MDIO select PHYLINK select PCS_LYNX @@ -25,6 +26,14 @@ config FSL_ENETC_VF If compiled as module (M), the module name is fsl-enetc-vf. +config FSL_ENETC_IERB + tristate "ENETC IERB driver" + help + This driver configures the Integrated Endpoint Register Block on NXP + LS1028A. + + If compiled as module (M), the module name is fsl-enetc-ierb. + config FSL_ENETC_MDIO tristate "ENETC MDIO driver" depends on PCI && MDIO_DEVRES && MDIO_BUS diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile index 74f7ac253b8b..a139f2e9d59f 100644 --- a/drivers/net/ethernet/freescale/enetc/Makefile +++ b/drivers/net/ethernet/freescale/enetc/Makefile @@ -11,6 +11,9 @@ obj-$(CONFIG_FSL_ENETC_VF) += fsl-enetc-vf.o fsl-enetc-vf-y := enetc_vf.o $(common-objs) fsl-enetc-vf-$(CONFIG_FSL_ENETC_QOS) += enetc_qos.o +obj-$(CONFIG_FSL_ENETC_IERB) += fsl-enetc-ierb.o +fsl-enetc-ierb-y := enetc_ierb.o + obj-$(CONFIG_FSL_ENETC_MDIO) += fsl-enetc-mdio.o fsl-enetc-mdio-y := enetc_pci_mdio.o enetc_mdio.o diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 09471329f3a3..3ca93adb9662 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2,88 +2,138 @@ /* Copyright 2017-2019 NXP */ #include "enetc.h" +#include <linux/bpf_trace.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/vmalloc.h> +#include <linux/ptp_classify.h> +#include <net/pkt_sched.h> -/* ENETC overhead: optional extension BD + 1 BD gap */ -#define ENETC_TXBDS_NEEDED(val) ((val) + 2) -/* max # of chained Tx BDs is 15, including head and extension BD */ -#define ENETC_MAX_SKB_FRAGS 13 -#define ENETC_TXBDS_MAX_NEEDED ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1) - -static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, - int active_offloads); - -netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev) +static int enetc_num_stack_tx_queues(struct enetc_ndev_priv *priv) { - struct enetc_ndev_priv *priv = netdev_priv(ndev); - struct enetc_bdr *tx_ring; - int count; + int num_tx_rings = priv->num_tx_rings; + int i; - tx_ring = priv->tx_ring[skb->queue_mapping]; + for (i = 0; i < priv->num_rx_rings; i++) + if (priv->rx_ring[i]->xdp.prog) + return num_tx_rings - num_possible_cpus(); - if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS)) - if (unlikely(skb_linearize(skb))) - goto drop_packet_err; + return num_tx_rings; +} - count = skb_shinfo(skb)->nr_frags + 1; /* fragments + head */ - if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(count)) { - netif_stop_subqueue(ndev, tx_ring->index); - return NETDEV_TX_BUSY; - } +static struct enetc_bdr *enetc_rx_ring_from_xdp_tx_ring(struct enetc_ndev_priv *priv, + struct enetc_bdr *tx_ring) +{ + int index = &priv->tx_ring[tx_ring->index] - priv->xdp_tx_ring; - enetc_lock_mdio(); - count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads); - enetc_unlock_mdio(); + return priv->rx_ring[index]; +} - if (unlikely(!count)) - goto drop_packet_err; +static struct sk_buff *enetc_tx_swbd_get_skb(struct enetc_tx_swbd *tx_swbd) +{ + if (tx_swbd->is_xdp_tx || tx_swbd->is_xdp_redirect) + return NULL; - if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_MAX_NEEDED) - netif_stop_subqueue(ndev, tx_ring->index); + return tx_swbd->skb; +} - return NETDEV_TX_OK; +static struct xdp_frame * +enetc_tx_swbd_get_xdp_frame(struct enetc_tx_swbd *tx_swbd) +{ + if (tx_swbd->is_xdp_redirect) + return tx_swbd->xdp_frame; -drop_packet_err: - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; + return NULL; } static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring, struct enetc_tx_swbd *tx_swbd) { + /* For XDP_TX, pages come from RX, whereas for the other contexts where + * we have is_dma_page_set, those come from skb_frag_dma_map. We need + * to match the DMA mapping length, so we need to differentiate those. + */ if (tx_swbd->is_dma_page) dma_unmap_page(tx_ring->dev, tx_swbd->dma, - tx_swbd->len, DMA_TO_DEVICE); + tx_swbd->is_xdp_tx ? PAGE_SIZE : tx_swbd->len, + tx_swbd->dir); else dma_unmap_single(tx_ring->dev, tx_swbd->dma, - tx_swbd->len, DMA_TO_DEVICE); + tx_swbd->len, tx_swbd->dir); tx_swbd->dma = 0; } -static void enetc_free_tx_skb(struct enetc_bdr *tx_ring, - struct enetc_tx_swbd *tx_swbd) +static void enetc_free_tx_frame(struct enetc_bdr *tx_ring, + struct enetc_tx_swbd *tx_swbd) { + struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd); + struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd); + if (tx_swbd->dma) enetc_unmap_tx_buff(tx_ring, tx_swbd); - if (tx_swbd->skb) { - dev_kfree_skb_any(tx_swbd->skb); + if (xdp_frame) { + xdp_return_frame(tx_swbd->xdp_frame); + tx_swbd->xdp_frame = NULL; + } else if (skb) { + dev_kfree_skb_any(skb); tx_swbd->skb = NULL; } } -static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, - int active_offloads) +/* Let H/W know BD ring has been updated */ +static void enetc_update_tx_ring_tail(struct enetc_bdr *tx_ring) +{ + /* includes wmb() */ + enetc_wr_reg_hot(tx_ring->tpir, tx_ring->next_to_use); +} + +static int enetc_ptp_parse(struct sk_buff *skb, u8 *udp, + u8 *msgtype, u8 *twostep, + u16 *correction_offset, u16 *body_offset) +{ + unsigned int ptp_class; + struct ptp_header *hdr; + unsigned int type; + u8 *base; + + ptp_class = ptp_classify_raw(skb); + if (ptp_class == PTP_CLASS_NONE) + return -EINVAL; + + hdr = ptp_parse_header(skb, ptp_class); + if (!hdr) + return -EINVAL; + + type = ptp_class & PTP_CLASS_PMASK; + if (type == PTP_CLASS_IPV4 || type == PTP_CLASS_IPV6) + *udp = 1; + else + *udp = 0; + + *msgtype = ptp_get_msgtype(hdr, ptp_class); + *twostep = hdr->flag_field[0] & 0x2; + + base = skb_mac_header(skb); + *correction_offset = (u8 *)&hdr->correction - base; + *body_offset = (u8 *)hdr + sizeof(struct ptp_header) - base; + + return 0; +} + +static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) { + bool do_vlan, do_onestep_tstamp = false, do_twostep_tstamp = false; + struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev); + struct enetc_hw *hw = &priv->si->hw; struct enetc_tx_swbd *tx_swbd; - skb_frag_t *frag; int len = skb_headlen(skb); union enetc_tx_bd temp_bd; + u8 msgtype, twostep, udp; union enetc_tx_bd *txbd; - bool do_vlan, do_tstamp; + u16 offset1, offset2; int i, count = 0; + skb_frag_t *frag; unsigned int f; dma_addr_t dma; u8 flags = 0; @@ -104,15 +154,25 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, tx_swbd->dma = dma; tx_swbd->len = len; tx_swbd->is_dma_page = 0; + tx_swbd->dir = DMA_TO_DEVICE; count++; do_vlan = skb_vlan_tag_present(skb); - do_tstamp = (active_offloads & ENETC_F_TX_TSTAMP) && - (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP); - tx_swbd->do_tstamp = do_tstamp; - tx_swbd->check_wb = tx_swbd->do_tstamp; + if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { + if (enetc_ptp_parse(skb, &udp, &msgtype, &twostep, &offset1, + &offset2) || + msgtype != PTP_MSGTYPE_SYNC || twostep) + WARN_ONCE(1, "Bad packet for one-step timestamping\n"); + else + do_onestep_tstamp = true; + } else if (skb->cb[0] & ENETC_F_TX_TSTAMP) { + do_twostep_tstamp = true; + } - if (do_vlan || do_tstamp) + tx_swbd->do_twostep_tstamp = do_twostep_tstamp; + tx_swbd->check_wb = tx_swbd->do_twostep_tstamp; + + if (do_vlan || do_onestep_tstamp || do_twostep_tstamp) flags |= ENETC_TXBD_FLAGS_EX; if (tx_ring->tsd_enable) @@ -149,7 +209,40 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, e_flags |= ENETC_TXBD_E_FLAGS_VLAN_INS; } - if (do_tstamp) { + if (do_onestep_tstamp) { + u32 lo, hi, val; + u64 sec, nsec; + u8 *data; + + lo = enetc_rd_hot(hw, ENETC_SICTR0); + hi = enetc_rd_hot(hw, ENETC_SICTR1); + sec = (u64)hi << 32 | lo; + nsec = do_div(sec, 1000000000); + + /* Configure extension BD */ + temp_bd.ext.tstamp = cpu_to_le32(lo & 0x3fffffff); + e_flags |= ENETC_TXBD_E_FLAGS_ONE_STEP_PTP; + + /* Update originTimestamp field of Sync packet + * - 48 bits seconds field + * - 32 bits nanseconds field + */ + data = skb_mac_header(skb); + *(__be16 *)(data + offset2) = + htons((sec >> 32) & 0xffff); + *(__be32 *)(data + offset2 + 2) = + htonl(sec & 0xffffffff); + *(__be32 *)(data + offset2 + 6) = htonl(nsec); + + /* Configure single-step register */ + val = ENETC_PM0_SINGLE_STEP_EN; + val |= ENETC_SET_SINGLE_STEP_OFFSET(offset1); + if (udp) + val |= ENETC_PM0_SINGLE_STEP_CH; + + enetc_port_wr(hw, ENETC_PM0_SINGLE_STEP, val); + enetc_port_wr(hw, ENETC_PM1_SINGLE_STEP, val); + } else if (do_twostep_tstamp) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; e_flags |= ENETC_TXBD_E_FLAGS_TWO_STEP_PTP; } @@ -186,6 +279,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, tx_swbd->dma = dma; tx_swbd->len = len; tx_swbd->is_dma_page = 1; + tx_swbd->dir = DMA_TO_DEVICE; count++; } @@ -194,6 +288,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, temp_bd.flags = flags; *txbd = temp_bd; + tx_ring->tx_swbd[i].is_eof = true; tx_ring->tx_swbd[i].skb = skb; enetc_bdr_idx_inc(tx_ring, &i); @@ -201,8 +296,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, skb_tx_timestamp(skb); - /* let H/W know BD ring has been updated */ - enetc_wr_reg_hot(tx_ring->tpir, i); /* includes wmb() */ + enetc_update_tx_ring_tail(tx_ring); return count; @@ -211,7 +305,7 @@ dma_err: do { tx_swbd = &tx_ring->tx_swbd[i]; - enetc_free_tx_skb(tx_ring, tx_swbd); + enetc_free_tx_frame(tx_ring, tx_swbd); if (i == 0) i = tx_ring->bd_count; i--; @@ -220,6 +314,76 @@ dma_err: return 0; } +static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, + struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_bdr *tx_ring; + int count; + + /* Queue one-step Sync packet if already locked */ + if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { + if (test_and_set_bit_lock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS, + &priv->flags)) { + skb_queue_tail(&priv->tx_skbs, skb); + return NETDEV_TX_OK; + } + } + + tx_ring = priv->tx_ring[skb->queue_mapping]; + + if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS)) + if (unlikely(skb_linearize(skb))) + goto drop_packet_err; + + count = skb_shinfo(skb)->nr_frags + 1; /* fragments + head */ + if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(count)) { + netif_stop_subqueue(ndev, tx_ring->index); + return NETDEV_TX_BUSY; + } + + enetc_lock_mdio(); + count = enetc_map_tx_buffs(tx_ring, skb); + enetc_unlock_mdio(); + + if (unlikely(!count)) + goto drop_packet_err; + + if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_MAX_NEEDED) + netif_stop_subqueue(ndev, tx_ring->index); + + return NETDEV_TX_OK; + +drop_packet_err: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + u8 udp, msgtype, twostep; + u16 offset1, offset2; + + /* Mark tx timestamp type on skb->cb[0] if requires */ + if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + (priv->active_offloads & ENETC_F_TX_TSTAMP_MASK)) { + skb->cb[0] = priv->active_offloads & ENETC_F_TX_TSTAMP_MASK; + } else { + skb->cb[0] = 0; + } + + /* Fall back to two-step timestamp if not one-step Sync packet */ + if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { + if (enetc_ptp_parse(skb, &udp, &msgtype, &twostep, + &offset1, &offset2) || + msgtype != PTP_MSGTYPE_SYNC || twostep != 0) + skb->cb[0] = ENETC_F_TX_TSTAMP; + } + + return enetc_start_xmit(skb, ndev); +} + static irqreturn_t enetc_msix(int irq, void *data) { struct enetc_int_vector *v = data; @@ -241,10 +405,6 @@ static irqreturn_t enetc_msix(int irq, void *data) return IRQ_HANDLED; } -static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget); -static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, - struct napi_struct *napi, int work_limit); - static void enetc_rx_dim_work(struct work_struct *w) { struct dim *dim = container_of(w, struct dim, work); @@ -273,55 +433,30 @@ static void enetc_rx_net_dim(struct enetc_int_vector *v) net_dim(&v->rx_dim, dim_sample); } -static int enetc_poll(struct napi_struct *napi, int budget) +static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci) { - struct enetc_int_vector - *v = container_of(napi, struct enetc_int_vector, napi); - bool complete = true; - int work_done; - int i; - - enetc_lock_mdio(); - - for (i = 0; i < v->count_tx_rings; i++) - if (!enetc_clean_tx_ring(&v->tx_ring[i], budget)) - complete = false; - - work_done = enetc_clean_rx_ring(&v->rx_ring, napi, budget); - if (work_done == budget) - complete = false; - if (work_done) - v->rx_napi_work = true; - - if (!complete) { - enetc_unlock_mdio(); - return budget; - } - - napi_complete_done(napi, work_done); - - if (likely(v->rx_dim_en)) - enetc_rx_net_dim(v); - - v->rx_napi_work = false; - - /* enable interrupts */ - enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE); - - for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS) - enetc_wr_reg_hot(v->tbier_base + ENETC_BDR_OFF(i), - ENETC_TBIER_TXTIE); + int pi = enetc_rd_reg_hot(tx_ring->tcir) & ENETC_TBCIR_IDX_MASK; - enetc_unlock_mdio(); + return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi; +} - return work_done; +static bool enetc_page_reusable(struct page *page) +{ + return (!page_is_pfmemalloc(page) && page_ref_count(page) == 1); } -static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci) +static void enetc_reuse_page(struct enetc_bdr *rx_ring, + struct enetc_rx_swbd *old) { - int pi = enetc_rd_reg_hot(tx_ring->tcir) & ENETC_TBCIR_IDX_MASK; + struct enetc_rx_swbd *new; - return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi; + new = &rx_ring->rx_swbd[rx_ring->next_to_alloc]; + + /* next buf that may reuse a page */ + enetc_bdr_idx_inc(rx_ring, &rx_ring->next_to_alloc); + + /* copy page reference */ + *new = *old; } static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd, @@ -344,23 +479,58 @@ static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp) if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) { memset(&shhwtstamps, 0, sizeof(shhwtstamps)); shhwtstamps.hwtstamp = ns_to_ktime(tstamp); - /* Ensure skb_mstamp_ns, which might have been populated with - * the txtime, is not mistaken for a software timestamp, - * because this will prevent the dispatch of our hardware - * timestamp to the socket. - */ - skb->tstamp = ktime_set(0, 0); + skb_txtime_consumed(skb); skb_tstamp_tx(skb, &shhwtstamps); } } +static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring, + struct enetc_tx_swbd *tx_swbd) +{ + struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev); + struct enetc_rx_swbd rx_swbd = { + .dma = tx_swbd->dma, + .page = tx_swbd->page, + .page_offset = tx_swbd->page_offset, + .dir = tx_swbd->dir, + .len = tx_swbd->len, + }; + struct enetc_bdr *rx_ring; + + rx_ring = enetc_rx_ring_from_xdp_tx_ring(priv, tx_ring); + + if (likely(enetc_swbd_unused(rx_ring))) { + enetc_reuse_page(rx_ring, &rx_swbd); + + /* sync for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, rx_swbd.dma, + rx_swbd.page_offset, + ENETC_RXB_DMA_SIZE_XDP, + rx_swbd.dir); + + rx_ring->stats.recycles++; + } else { + /* RX ring is already full, we need to unmap and free the + * page, since there's nothing useful we can do with it. + */ + rx_ring->stats.recycle_failures++; + + dma_unmap_page(rx_ring->dev, rx_swbd.dma, PAGE_SIZE, + rx_swbd.dir); + __free_page(rx_swbd.page); + } + + rx_ring->xdp.xdp_tx_in_flight--; +} + static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) { struct net_device *ndev = tx_ring->ndev; + struct enetc_ndev_priv *priv = netdev_priv(ndev); int tx_frm_cnt = 0, tx_byte_cnt = 0; struct enetc_tx_swbd *tx_swbd; int i, bds_to_clean; - bool do_tstamp; + bool do_twostep_tstamp; u64 tstamp = 0; i = tx_ring->next_to_clean; @@ -368,10 +538,12 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) bds_to_clean = enetc_bd_ready_count(tx_ring, i); - do_tstamp = false; + do_twostep_tstamp = false; while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) { - bool is_eof = !!tx_swbd->skb; + struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd); + struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd); + bool is_eof = tx_swbd->is_eof; if (unlikely(tx_swbd->check_wb)) { struct enetc_ndev_priv *priv = netdev_priv(ndev); @@ -380,26 +552,40 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) txbd = ENETC_TXBD(*tx_ring, i); if (txbd->flags & ENETC_TXBD_FLAGS_W && - tx_swbd->do_tstamp) { + tx_swbd->do_twostep_tstamp) { enetc_get_tx_tstamp(&priv->si->hw, txbd, &tstamp); - do_tstamp = true; + do_twostep_tstamp = true; } } - if (likely(tx_swbd->dma)) + if (tx_swbd->is_xdp_tx) + enetc_recycle_xdp_tx_buff(tx_ring, tx_swbd); + else if (likely(tx_swbd->dma)) enetc_unmap_tx_buff(tx_ring, tx_swbd); - if (is_eof) { - if (unlikely(do_tstamp)) { - enetc_tstamp_tx(tx_swbd->skb, tstamp); - do_tstamp = false; + if (xdp_frame) { + xdp_return_frame(xdp_frame); + } else if (skb) { + if (unlikely(tx_swbd->skb->cb[0] & + ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) { + /* Start work to release lock for next one-step + * timestamping packet. And send one skb in + * tx_skbs queue if has. + */ + schedule_work(&priv->tx_onestep_tstamp); + } else if (unlikely(do_twostep_tstamp)) { + enetc_tstamp_tx(skb, tstamp); + do_twostep_tstamp = false; } - napi_consume_skb(tx_swbd->skb, napi_budget); - tx_swbd->skb = NULL; + napi_consume_skb(skb, napi_budget); } tx_byte_cnt += tx_swbd->len; + /* Scrub the swbd here so we don't have to do that + * when we reuse it during xmit + */ + memset(tx_swbd, 0, sizeof(*tx_swbd)); bds_to_clean--; tx_swbd++; @@ -437,6 +623,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) static bool enetc_new_page(struct enetc_bdr *rx_ring, struct enetc_rx_swbd *rx_swbd) { + bool xdp = !!(rx_ring->xdp.prog); struct page *page; dma_addr_t addr; @@ -444,7 +631,10 @@ static bool enetc_new_page(struct enetc_bdr *rx_ring, if (unlikely(!page)) return false; - addr = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + /* For XDP_TX, we forgo dma_unmap -> dma_map */ + rx_swbd->dir = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + + addr = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, rx_swbd->dir); if (unlikely(dma_mapping_error(rx_ring->dev, addr))) { __free_page(page); @@ -453,7 +643,7 @@ static bool enetc_new_page(struct enetc_bdr *rx_ring, rx_swbd->dma = addr; rx_swbd->page = page; - rx_swbd->page_offset = ENETC_RXB_PAD; + rx_swbd->page_offset = rx_ring->buffer_offset; return true; } @@ -483,18 +673,16 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt) /* clear 'R" as well */ rxbd->r.lstatus = 0; - rxbd = enetc_rxbd_next(rx_ring, rxbd, i); - rx_swbd++; - i++; - if (unlikely(i == rx_ring->bd_count)) { - i = 0; - rx_swbd = rx_ring->rx_swbd; - } + enetc_rxbd_next(rx_ring, &rxbd, &i); + rx_swbd = &rx_ring->rx_swbd[i]; } if (likely(j)) { rx_ring->next_to_alloc = i; /* keep track from page reuse */ rx_ring->next_to_use = i; + + /* update ENETC's consumer index */ + enetc_wr_reg_hot(rx_ring->rcir, rx_ring->next_to_use); } return j; @@ -570,32 +758,10 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring, #endif } -static void enetc_process_skb(struct enetc_bdr *rx_ring, - struct sk_buff *skb) -{ - skb_record_rx_queue(skb, rx_ring->index); - skb->protocol = eth_type_trans(skb, rx_ring->ndev); -} - -static bool enetc_page_reusable(struct page *page) -{ - return (!page_is_pfmemalloc(page) && page_ref_count(page) == 1); -} - -static void enetc_reuse_page(struct enetc_bdr *rx_ring, - struct enetc_rx_swbd *old) -{ - struct enetc_rx_swbd *new; - - new = &rx_ring->rx_swbd[rx_ring->next_to_alloc]; - - /* next buf that may reuse a page */ - enetc_bdr_idx_inc(rx_ring, &rx_ring->next_to_alloc); - - /* copy page reference */ - *new = *old; -} - +/* This gets called during the non-XDP NAPI poll cycle as well as on XDP_PASS, + * so it needs to work with both DMA_FROM_DEVICE as well as DMA_BIDIRECTIONAL + * mapped buffers. + */ static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring, int i, u16 size) { @@ -603,30 +769,39 @@ static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring, dma_sync_single_range_for_cpu(rx_ring->dev, rx_swbd->dma, rx_swbd->page_offset, - size, DMA_FROM_DEVICE); + size, rx_swbd->dir); return rx_swbd; } +/* Reuse the current page without performing half-page buffer flipping */ static void enetc_put_rx_buff(struct enetc_bdr *rx_ring, struct enetc_rx_swbd *rx_swbd) { + size_t buffer_size = ENETC_RXB_TRUESIZE - rx_ring->buffer_offset; + + enetc_reuse_page(rx_ring, rx_swbd); + + dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma, + rx_swbd->page_offset, + buffer_size, rx_swbd->dir); + + rx_swbd->page = NULL; +} + +/* Reuse the current page by performing half-page buffer flipping */ +static void enetc_flip_rx_buff(struct enetc_bdr *rx_ring, + struct enetc_rx_swbd *rx_swbd) +{ if (likely(enetc_page_reusable(rx_swbd->page))) { rx_swbd->page_offset ^= ENETC_RXB_TRUESIZE; page_ref_inc(rx_swbd->page); - enetc_reuse_page(rx_ring, rx_swbd); - - /* sync for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma, - rx_swbd->page_offset, - ENETC_RXB_DMA_SIZE, - DMA_FROM_DEVICE); + enetc_put_rx_buff(rx_ring, rx_swbd); } else { - dma_unmap_page(rx_ring->dev, rx_swbd->dma, - PAGE_SIZE, DMA_FROM_DEVICE); + dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE, + rx_swbd->dir); + rx_swbd->page = NULL; } - - rx_swbd->page = NULL; } static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring, @@ -637,16 +812,16 @@ static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring, void *ba; ba = page_address(rx_swbd->page) + rx_swbd->page_offset; - skb = build_skb(ba - ENETC_RXB_PAD, ENETC_RXB_TRUESIZE); + skb = build_skb(ba - rx_ring->buffer_offset, ENETC_RXB_TRUESIZE); if (unlikely(!skb)) { rx_ring->stats.rx_alloc_errs++; return NULL; } - skb_reserve(skb, ENETC_RXB_PAD); + skb_reserve(skb, rx_ring->buffer_offset); __skb_put(skb, size); - enetc_put_rx_buff(rx_ring, rx_swbd); + enetc_flip_rx_buff(rx_ring, rx_swbd); return skb; } @@ -659,7 +834,72 @@ static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_swbd->page, rx_swbd->page_offset, size, ENETC_RXB_TRUESIZE); - enetc_put_rx_buff(rx_ring, rx_swbd); + enetc_flip_rx_buff(rx_ring, rx_swbd); +} + +static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring, + u32 bd_status, + union enetc_rx_bd **rxbd, int *i) +{ + if (likely(!(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK)))) + return false; + + enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]); + enetc_rxbd_next(rx_ring, rxbd, i); + + while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { + dma_rmb(); + bd_status = le32_to_cpu((*rxbd)->r.lstatus); + + enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]); + enetc_rxbd_next(rx_ring, rxbd, i); + } + + rx_ring->ndev->stats.rx_dropped++; + rx_ring->ndev->stats.rx_errors++; + + return true; +} + +static struct sk_buff *enetc_build_skb(struct enetc_bdr *rx_ring, + u32 bd_status, union enetc_rx_bd **rxbd, + int *i, int *cleaned_cnt, int buffer_size) +{ + struct sk_buff *skb; + u16 size; + + size = le16_to_cpu((*rxbd)->r.buf_len); + skb = enetc_map_rx_buff_to_skb(rx_ring, *i, size); + if (!skb) + return NULL; + + enetc_get_offloads(rx_ring, *rxbd, skb); + + (*cleaned_cnt)++; + + enetc_rxbd_next(rx_ring, rxbd, i); + + /* not last BD in frame? */ + while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { + bd_status = le32_to_cpu((*rxbd)->r.lstatus); + size = buffer_size; + + if (bd_status & ENETC_RXBD_LSTATUS_F) { + dma_rmb(); + size = le16_to_cpu((*rxbd)->r.buf_len); + } + + enetc_add_rx_buff_to_skb(rx_ring, *i, size, skb); + + (*cleaned_cnt)++; + + enetc_rxbd_next(rx_ring, rxbd, i); + } + + skb_record_rx_queue(skb, rx_ring->index); + skb->protocol = eth_type_trans(skb, rx_ring->ndev); + + return skb; } #define ENETC_RXBD_BUNDLE 16 /* # of BDs to update at once */ @@ -678,15 +918,10 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, union enetc_rx_bd *rxbd; struct sk_buff *skb; u32 bd_status; - u16 size; - if (cleaned_cnt >= ENETC_RXBD_BUNDLE) { - int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt); - - /* update ENETC's consumer index */ - enetc_wr_reg_hot(rx_ring->rcir, rx_ring->next_to_use); - cleaned_cnt -= count; - } + if (cleaned_cnt >= ENETC_RXBD_BUNDLE) + cleaned_cnt -= enetc_refill_rx_ring(rx_ring, + cleaned_cnt); rxbd = enetc_rxbd(rx_ring, i); bd_status = le32_to_cpu(rxbd->r.lstatus); @@ -695,73 +930,511 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index)); dma_rmb(); /* for reading other rxbd fields */ - size = le16_to_cpu(rxbd->r.buf_len); - skb = enetc_map_rx_buff_to_skb(rx_ring, i, size); + + if (enetc_check_bd_errors_and_consume(rx_ring, bd_status, + &rxbd, &i)) + break; + + skb = enetc_build_skb(rx_ring, bd_status, &rxbd, &i, + &cleaned_cnt, ENETC_RXB_DMA_SIZE); if (!skb) break; - enetc_get_offloads(rx_ring, rxbd, skb); + rx_byte_cnt += skb->len; + rx_frm_cnt++; - cleaned_cnt++; + napi_gro_receive(napi, skb); + } - rxbd = enetc_rxbd_next(rx_ring, rxbd, i); - if (unlikely(++i == rx_ring->bd_count)) - i = 0; + rx_ring->next_to_clean = i; - if (unlikely(bd_status & - ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) { - dev_kfree_skb(skb); - while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { - dma_rmb(); - bd_status = le32_to_cpu(rxbd->r.lstatus); + rx_ring->stats.packets += rx_frm_cnt; + rx_ring->stats.bytes += rx_byte_cnt; - rxbd = enetc_rxbd_next(rx_ring, rxbd, i); - if (unlikely(++i == rx_ring->bd_count)) - i = 0; - } + return rx_frm_cnt; +} + +static void enetc_xdp_map_tx_buff(struct enetc_bdr *tx_ring, int i, + struct enetc_tx_swbd *tx_swbd, + int frm_len) +{ + union enetc_tx_bd *txbd = ENETC_TXBD(*tx_ring, i); + + prefetchw(txbd); + + enetc_clear_tx_bd(txbd); + txbd->addr = cpu_to_le64(tx_swbd->dma + tx_swbd->page_offset); + txbd->buf_len = cpu_to_le16(tx_swbd->len); + txbd->frm_len = cpu_to_le16(frm_len); + + memcpy(&tx_ring->tx_swbd[i], tx_swbd, sizeof(*tx_swbd)); +} + +/* Puts in the TX ring one XDP frame, mapped as an array of TX software buffer + * descriptors. + */ +static bool enetc_xdp_tx(struct enetc_bdr *tx_ring, + struct enetc_tx_swbd *xdp_tx_arr, int num_tx_swbd) +{ + struct enetc_tx_swbd *tmp_tx_swbd = xdp_tx_arr; + int i, k, frm_len = tmp_tx_swbd->len; + + if (unlikely(enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(num_tx_swbd))) + return false; + + while (unlikely(!tmp_tx_swbd->is_eof)) { + tmp_tx_swbd++; + frm_len += tmp_tx_swbd->len; + } + + i = tx_ring->next_to_use; + + for (k = 0; k < num_tx_swbd; k++) { + struct enetc_tx_swbd *xdp_tx_swbd = &xdp_tx_arr[k]; + + enetc_xdp_map_tx_buff(tx_ring, i, xdp_tx_swbd, frm_len); + + /* last BD needs 'F' bit set */ + if (xdp_tx_swbd->is_eof) { + union enetc_tx_bd *txbd = ENETC_TXBD(*tx_ring, i); + + txbd->flags = ENETC_TXBD_FLAGS_F; + } + + enetc_bdr_idx_inc(tx_ring, &i); + } + + tx_ring->next_to_use = i; + + return true; +} + +static int enetc_xdp_frame_to_xdp_tx_swbd(struct enetc_bdr *tx_ring, + struct enetc_tx_swbd *xdp_tx_arr, + struct xdp_frame *xdp_frame) +{ + struct enetc_tx_swbd *xdp_tx_swbd = &xdp_tx_arr[0]; + struct skb_shared_info *shinfo; + void *data = xdp_frame->data; + int len = xdp_frame->len; + skb_frag_t *frag; + dma_addr_t dma; + unsigned int f; + int n = 0; + + dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx_ring->dev, dma))) { + netdev_err(tx_ring->ndev, "DMA map error\n"); + return -1; + } + + xdp_tx_swbd->dma = dma; + xdp_tx_swbd->dir = DMA_TO_DEVICE; + xdp_tx_swbd->len = len; + xdp_tx_swbd->is_xdp_redirect = true; + xdp_tx_swbd->is_eof = false; + xdp_tx_swbd->xdp_frame = NULL; + + n++; + xdp_tx_swbd = &xdp_tx_arr[n]; + + shinfo = xdp_get_shared_info_from_frame(xdp_frame); + + for (f = 0, frag = &shinfo->frags[0]; f < shinfo->nr_frags; + f++, frag++) { + data = skb_frag_address(frag); + len = skb_frag_size(frag); + + dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(tx_ring->dev, dma))) { + /* Undo the DMA mapping for all fragments */ + while (--n >= 0) + enetc_unmap_tx_buff(tx_ring, &xdp_tx_arr[n]); + + netdev_err(tx_ring->ndev, "DMA map error\n"); + return -1; + } + + xdp_tx_swbd->dma = dma; + xdp_tx_swbd->dir = DMA_TO_DEVICE; + xdp_tx_swbd->len = len; + xdp_tx_swbd->is_xdp_redirect = true; + xdp_tx_swbd->is_eof = false; + xdp_tx_swbd->xdp_frame = NULL; + + n++; + xdp_tx_swbd = &xdp_tx_arr[n]; + } + + xdp_tx_arr[n - 1].is_eof = true; + xdp_tx_arr[n - 1].xdp_frame = xdp_frame; + + return n; +} + +int enetc_xdp_xmit(struct net_device *ndev, int num_frames, + struct xdp_frame **frames, u32 flags) +{ + struct enetc_tx_swbd xdp_redirect_arr[ENETC_MAX_SKB_FRAGS] = {0}; + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_bdr *tx_ring; + int xdp_tx_bd_cnt, i, k; + int xdp_tx_frm_cnt = 0; + + enetc_lock_mdio(); + + tx_ring = priv->xdp_tx_ring[smp_processor_id()]; - rx_ring->ndev->stats.rx_dropped++; - rx_ring->ndev->stats.rx_errors++; + prefetchw(ENETC_TXBD(*tx_ring, tx_ring->next_to_use)); + for (k = 0; k < num_frames; k++) { + xdp_tx_bd_cnt = enetc_xdp_frame_to_xdp_tx_swbd(tx_ring, + xdp_redirect_arr, + frames[k]); + if (unlikely(xdp_tx_bd_cnt < 0)) + break; + + if (unlikely(!enetc_xdp_tx(tx_ring, xdp_redirect_arr, + xdp_tx_bd_cnt))) { + for (i = 0; i < xdp_tx_bd_cnt; i++) + enetc_unmap_tx_buff(tx_ring, + &xdp_redirect_arr[i]); + tx_ring->stats.xdp_tx_drops++; break; } - /* not last BD in frame? */ - while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { - bd_status = le32_to_cpu(rxbd->r.lstatus); - size = ENETC_RXB_DMA_SIZE; + xdp_tx_frm_cnt++; + } - if (bd_status & ENETC_RXBD_LSTATUS_F) { - dma_rmb(); - size = le16_to_cpu(rxbd->r.buf_len); - } + if (unlikely((flags & XDP_XMIT_FLUSH) || k != xdp_tx_frm_cnt)) + enetc_update_tx_ring_tail(tx_ring); + + tx_ring->stats.xdp_tx += xdp_tx_frm_cnt; + + enetc_unlock_mdio(); + + return xdp_tx_frm_cnt; +} + +static void enetc_map_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i, + struct xdp_buff *xdp_buff, u16 size) +{ + struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size); + void *hard_start = page_address(rx_swbd->page) + rx_swbd->page_offset; + struct skb_shared_info *shinfo; + + /* To be used for XDP_TX */ + rx_swbd->len = size; - enetc_add_rx_buff_to_skb(rx_ring, i, size, skb); + xdp_prepare_buff(xdp_buff, hard_start - rx_ring->buffer_offset, + rx_ring->buffer_offset, size, false); - cleaned_cnt++; + shinfo = xdp_get_shared_info_from_buff(xdp_buff); + shinfo->nr_frags = 0; +} + +static void enetc_add_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i, + u16 size, struct xdp_buff *xdp_buff) +{ + struct skb_shared_info *shinfo = xdp_get_shared_info_from_buff(xdp_buff); + struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size); + skb_frag_t *frag = &shinfo->frags[shinfo->nr_frags]; + + /* To be used for XDP_TX */ + rx_swbd->len = size; + + skb_frag_off_set(frag, rx_swbd->page_offset); + skb_frag_size_set(frag, size); + __skb_frag_set_page(frag, rx_swbd->page); + + shinfo->nr_frags++; +} + +static void enetc_build_xdp_buff(struct enetc_bdr *rx_ring, u32 bd_status, + union enetc_rx_bd **rxbd, int *i, + int *cleaned_cnt, struct xdp_buff *xdp_buff) +{ + u16 size = le16_to_cpu((*rxbd)->r.buf_len); + + xdp_init_buff(xdp_buff, ENETC_RXB_TRUESIZE, &rx_ring->xdp.rxq); + + enetc_map_rx_buff_to_xdp(rx_ring, *i, xdp_buff, size); + (*cleaned_cnt)++; + enetc_rxbd_next(rx_ring, rxbd, i); - rxbd = enetc_rxbd_next(rx_ring, rxbd, i); - if (unlikely(++i == rx_ring->bd_count)) - i = 0; + /* not last BD in frame? */ + while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { + bd_status = le32_to_cpu((*rxbd)->r.lstatus); + size = ENETC_RXB_DMA_SIZE_XDP; + + if (bd_status & ENETC_RXBD_LSTATUS_F) { + dma_rmb(); + size = le16_to_cpu((*rxbd)->r.buf_len); } - rx_byte_cnt += skb->len; + enetc_add_rx_buff_to_xdp(rx_ring, *i, size, xdp_buff); + (*cleaned_cnt)++; + enetc_rxbd_next(rx_ring, rxbd, i); + } +} - enetc_process_skb(rx_ring, skb); +/* Convert RX buffer descriptors to TX buffer descriptors. These will be + * recycled back into the RX ring in enetc_clean_tx_ring. + */ +static int enetc_rx_swbd_to_xdp_tx_swbd(struct enetc_tx_swbd *xdp_tx_arr, + struct enetc_bdr *rx_ring, + int rx_ring_first, int rx_ring_last) +{ + int n = 0; + + for (; rx_ring_first != rx_ring_last; + n++, enetc_bdr_idx_inc(rx_ring, &rx_ring_first)) { + struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first]; + struct enetc_tx_swbd *tx_swbd = &xdp_tx_arr[n]; + + /* No need to dma_map, we already have DMA_BIDIRECTIONAL */ + tx_swbd->dma = rx_swbd->dma; + tx_swbd->dir = rx_swbd->dir; + tx_swbd->page = rx_swbd->page; + tx_swbd->page_offset = rx_swbd->page_offset; + tx_swbd->len = rx_swbd->len; + tx_swbd->is_dma_page = true; + tx_swbd->is_xdp_tx = true; + tx_swbd->is_eof = false; + } - napi_gro_receive(napi, skb); + /* We rely on caller providing an rx_ring_last > rx_ring_first */ + xdp_tx_arr[n - 1].is_eof = true; + + return n; +} + +static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first, + int rx_ring_last) +{ + while (rx_ring_first != rx_ring_last) { + enetc_put_rx_buff(rx_ring, + &rx_ring->rx_swbd[rx_ring_first]); + enetc_bdr_idx_inc(rx_ring, &rx_ring_first); + } + rx_ring->stats.xdp_drops++; +} + +static void enetc_xdp_free(struct enetc_bdr *rx_ring, int rx_ring_first, + int rx_ring_last) +{ + while (rx_ring_first != rx_ring_last) { + struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first]; + + if (rx_swbd->page) { + dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE, + rx_swbd->dir); + __free_page(rx_swbd->page); + rx_swbd->page = NULL; + } + enetc_bdr_idx_inc(rx_ring, &rx_ring_first); + } + rx_ring->stats.xdp_redirect_failures++; +} + +static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, + struct napi_struct *napi, int work_limit, + struct bpf_prog *prog) +{ + int xdp_tx_bd_cnt, xdp_tx_frm_cnt = 0, xdp_redirect_frm_cnt = 0; + struct enetc_tx_swbd xdp_tx_arr[ENETC_MAX_SKB_FRAGS] = {0}; + struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev); + int rx_frm_cnt = 0, rx_byte_cnt = 0; + struct enetc_bdr *tx_ring; + int cleaned_cnt, i; + u32 xdp_act; + + cleaned_cnt = enetc_bd_unused(rx_ring); + /* next descriptor to process */ + i = rx_ring->next_to_clean; + + while (likely(rx_frm_cnt < work_limit)) { + union enetc_rx_bd *rxbd, *orig_rxbd; + int orig_i, orig_cleaned_cnt; + struct xdp_buff xdp_buff; + struct sk_buff *skb; + int tmp_orig_i, err; + u32 bd_status; + + rxbd = enetc_rxbd(rx_ring, i); + bd_status = le32_to_cpu(rxbd->r.lstatus); + if (!bd_status) + break; + + enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index)); + dma_rmb(); /* for reading other rxbd fields */ + + if (enetc_check_bd_errors_and_consume(rx_ring, bd_status, + &rxbd, &i)) + break; + + orig_rxbd = rxbd; + orig_cleaned_cnt = cleaned_cnt; + orig_i = i; + + enetc_build_xdp_buff(rx_ring, bd_status, &rxbd, &i, + &cleaned_cnt, &xdp_buff); + + xdp_act = bpf_prog_run_xdp(prog, &xdp_buff); + + switch (xdp_act) { + default: + bpf_warn_invalid_xdp_action(xdp_act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(rx_ring->ndev, prog, xdp_act); + fallthrough; + case XDP_DROP: + enetc_xdp_drop(rx_ring, orig_i, i); + break; + case XDP_PASS: + rxbd = orig_rxbd; + cleaned_cnt = orig_cleaned_cnt; + i = orig_i; + + skb = enetc_build_skb(rx_ring, bd_status, &rxbd, + &i, &cleaned_cnt, + ENETC_RXB_DMA_SIZE_XDP); + if (unlikely(!skb)) + goto out; + + napi_gro_receive(napi, skb); + break; + case XDP_TX: + tx_ring = priv->xdp_tx_ring[rx_ring->index]; + xdp_tx_bd_cnt = enetc_rx_swbd_to_xdp_tx_swbd(xdp_tx_arr, + rx_ring, + orig_i, i); + + if (!enetc_xdp_tx(tx_ring, xdp_tx_arr, xdp_tx_bd_cnt)) { + enetc_xdp_drop(rx_ring, orig_i, i); + tx_ring->stats.xdp_tx_drops++; + } else { + tx_ring->stats.xdp_tx += xdp_tx_bd_cnt; + rx_ring->xdp.xdp_tx_in_flight += xdp_tx_bd_cnt; + xdp_tx_frm_cnt++; + /* The XDP_TX enqueue was successful, so we + * need to scrub the RX software BDs because + * the ownership of the buffers no longer + * belongs to the RX ring, and we must prevent + * enetc_refill_rx_ring() from reusing + * rx_swbd->page. + */ + while (orig_i != i) { + rx_ring->rx_swbd[orig_i].page = NULL; + enetc_bdr_idx_inc(rx_ring, &orig_i); + } + } + break; + case XDP_REDIRECT: + /* xdp_return_frame does not support S/G in the sense + * that it leaks the fragments (__xdp_return should not + * call page_frag_free only for the initial buffer). + * Until XDP_REDIRECT gains support for S/G let's keep + * the code structure in place, but dead. We drop the + * S/G frames ourselves to avoid memory leaks which + * would otherwise leave the kernel OOM. + */ + if (unlikely(cleaned_cnt - orig_cleaned_cnt != 1)) { + enetc_xdp_drop(rx_ring, orig_i, i); + rx_ring->stats.xdp_redirect_sg++; + break; + } + + tmp_orig_i = orig_i; + + while (orig_i != i) { + enetc_flip_rx_buff(rx_ring, + &rx_ring->rx_swbd[orig_i]); + enetc_bdr_idx_inc(rx_ring, &orig_i); + } + + err = xdp_do_redirect(rx_ring->ndev, &xdp_buff, prog); + if (unlikely(err)) { + enetc_xdp_free(rx_ring, tmp_orig_i, i); + } else { + xdp_redirect_frm_cnt++; + rx_ring->stats.xdp_redirect++; + } + } rx_frm_cnt++; } +out: rx_ring->next_to_clean = i; rx_ring->stats.packets += rx_frm_cnt; rx_ring->stats.bytes += rx_byte_cnt; + if (xdp_redirect_frm_cnt) + xdp_do_flush_map(); + + if (xdp_tx_frm_cnt) + enetc_update_tx_ring_tail(tx_ring); + + if (cleaned_cnt > rx_ring->xdp.xdp_tx_in_flight) + enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring) - + rx_ring->xdp.xdp_tx_in_flight); + return rx_frm_cnt; } +static int enetc_poll(struct napi_struct *napi, int budget) +{ + struct enetc_int_vector + *v = container_of(napi, struct enetc_int_vector, napi); + struct enetc_bdr *rx_ring = &v->rx_ring; + struct bpf_prog *prog; + bool complete = true; + int work_done; + int i; + + enetc_lock_mdio(); + + for (i = 0; i < v->count_tx_rings; i++) + if (!enetc_clean_tx_ring(&v->tx_ring[i], budget)) + complete = false; + + prog = rx_ring->xdp.prog; + if (prog) + work_done = enetc_clean_rx_ring_xdp(rx_ring, napi, budget, prog); + else + work_done = enetc_clean_rx_ring(rx_ring, napi, budget); + if (work_done == budget) + complete = false; + if (work_done) + v->rx_napi_work = true; + + if (!complete) { + enetc_unlock_mdio(); + return budget; + } + + napi_complete_done(napi, work_done); + + if (likely(v->rx_dim_en)) + enetc_rx_net_dim(v); + + v->rx_napi_work = false; + + /* enable interrupts */ + enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE); + + for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS) + enetc_wr_reg_hot(v->tbier_base + ENETC_BDR_OFF(i), + ENETC_TBIER_TXTIE); + + enetc_unlock_mdio(); + + return work_done; +} + /* Probing and Init */ #define ENETC_MAX_RFS_SIZE 64 void enetc_get_si_caps(struct enetc_si *si) @@ -836,7 +1509,7 @@ static void enetc_free_txbdr(struct enetc_bdr *txr) int size, i; for (i = 0; i < txr->bd_count; i++) - enetc_free_tx_skb(txr, &txr->tx_swbd[i]); + enetc_free_tx_frame(txr, &txr->tx_swbd[i]); size = txr->bd_count * sizeof(union enetc_tx_bd); @@ -953,7 +1626,7 @@ static void enetc_free_tx_ring(struct enetc_bdr *tx_ring) for (i = 0; i < tx_ring->bd_count; i++) { struct enetc_tx_swbd *tx_swbd = &tx_ring->tx_swbd[i]; - enetc_free_tx_skb(tx_ring, tx_swbd); + enetc_free_tx_frame(tx_ring, tx_swbd); } tx_ring->next_to_clean = 0; @@ -973,8 +1646,8 @@ static void enetc_free_rx_ring(struct enetc_bdr *rx_ring) if (!rx_swbd->page) continue; - dma_unmap_page(rx_ring->dev, rx_swbd->dma, - PAGE_SIZE, DMA_FROM_DEVICE); + dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE, + rx_swbd->dir); __free_page(rx_swbd->page); rx_swbd->page = NULL; } @@ -995,60 +1668,6 @@ static void enetc_free_rxtx_rings(struct enetc_ndev_priv *priv) enetc_free_tx_ring(priv->tx_ring[i]); } -int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr) -{ - int size = cbdr->bd_count * sizeof(struct enetc_cbd); - - cbdr->bd_base = dma_alloc_coherent(dev, size, &cbdr->bd_dma_base, - GFP_KERNEL); - if (!cbdr->bd_base) - return -ENOMEM; - - /* h/w requires 128B alignment */ - if (!IS_ALIGNED(cbdr->bd_dma_base, 128)) { - dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base); - return -EINVAL; - } - - cbdr->next_to_clean = 0; - cbdr->next_to_use = 0; - - return 0; -} - -void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr) -{ - int size = cbdr->bd_count * sizeof(struct enetc_cbd); - - dma_free_coherent(dev, size, cbdr->bd_base, cbdr->bd_dma_base); - cbdr->bd_base = NULL; -} - -void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr) -{ - /* set CBDR cache attributes */ - enetc_wr(hw, ENETC_SICAR2, - ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT); - - enetc_wr(hw, ENETC_SICBDRBAR0, lower_32_bits(cbdr->bd_dma_base)); - enetc_wr(hw, ENETC_SICBDRBAR1, upper_32_bits(cbdr->bd_dma_base)); - enetc_wr(hw, ENETC_SICBDRLENR, ENETC_RTBLENR_LEN(cbdr->bd_count)); - - enetc_wr(hw, ENETC_SICBDRPIR, 0); - enetc_wr(hw, ENETC_SICBDRCIR, 0); - - /* enable ring */ - enetc_wr(hw, ENETC_SICBDRMR, BIT(31)); - - cbdr->pir = hw->reg + ENETC_SICBDRPIR; - cbdr->cir = hw->reg + ENETC_SICBDRCIR; -} - -void enetc_clear_cbdr(struct enetc_hw *hw) -{ - enetc_wr(hw, ENETC_SICBDRMR, 0); -} - static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups) { int *rss_table; @@ -1108,45 +1727,22 @@ void enetc_init_si_rings_params(struct enetc_ndev_priv *priv) priv->bdr_int_num = cpus; priv->ic_mode = ENETC_IC_RX_ADAPTIVE | ENETC_IC_TX_MANUAL; priv->tx_ictt = ENETC_TXIC_TIMETHR; - - /* SI specific */ - si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE; } int enetc_alloc_si_resources(struct enetc_ndev_priv *priv) { struct enetc_si *si = priv->si; - int err; - - err = enetc_alloc_cbdr(priv->dev, &si->cbd_ring); - if (err) - return err; - - enetc_setup_cbdr(&si->hw, &si->cbd_ring); priv->cls_rules = kcalloc(si->num_fs_entries, sizeof(*priv->cls_rules), GFP_KERNEL); - if (!priv->cls_rules) { - err = -ENOMEM; - goto err_alloc_cls; - } + if (!priv->cls_rules) + return -ENOMEM; return 0; - -err_alloc_cls: - enetc_clear_cbdr(&si->hw); - enetc_free_cbdr(priv->dev, &si->cbd_ring); - - return err; } void enetc_free_si_resources(struct enetc_ndev_priv *priv) { - struct enetc_si *si = priv->si; - - enetc_clear_cbdr(&si->hw); - enetc_free_cbdr(priv->dev, &si->cbd_ring); - kfree(priv->cls_rules); } @@ -1199,7 +1795,10 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) enetc_rxbdr_wr(hw, idx, ENETC_RBLENR, ENETC_RTBLENR_LEN(rx_ring->bd_count)); - enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE); + if (rx_ring->xdp.prog) + enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE_XDP); + else + enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE); enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0); @@ -1217,9 +1816,9 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) rx_ring->rcir = hw->reg + ENETC_BDR(RX, idx, ENETC_RBCIR); rx_ring->idr = hw->reg + ENETC_SIRXIDR; + enetc_lock_mdio(); enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring)); - /* update ENETC's consumer index */ - enetc_rxbdr_wr(hw, idx, ENETC_RBCIR, rx_ring->next_to_use); + enetc_unlock_mdio(); /* enable ring */ enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr); @@ -1408,6 +2007,29 @@ static int enetc_phylink_connect(struct net_device *ndev) return 0; } +static void enetc_tx_onestep_tstamp(struct work_struct *work) +{ + struct enetc_ndev_priv *priv; + struct sk_buff *skb; + + priv = container_of(work, struct enetc_ndev_priv, tx_onestep_tstamp); + + netif_tx_lock(priv->ndev); + + clear_bit_unlock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS, &priv->flags); + skb = skb_dequeue(&priv->tx_skbs); + if (skb) + enetc_start_xmit(skb, priv->ndev); + + netif_tx_unlock(priv->ndev); +} + +static void enetc_tx_onestep_tstamp_init(struct enetc_ndev_priv *priv) +{ + INIT_WORK(&priv->tx_onestep_tstamp, enetc_tx_onestep_tstamp); + skb_queue_head_init(&priv->tx_skbs); +} + void enetc_start(struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); @@ -1434,6 +2056,7 @@ void enetc_start(struct net_device *ndev) int enetc_open(struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); + int num_stack_tx_queues; int err; err = enetc_setup_irqs(priv); @@ -1452,7 +2075,9 @@ int enetc_open(struct net_device *ndev) if (err) goto err_alloc_rx; - err = netif_set_real_num_tx_queues(ndev, priv->num_tx_rings); + num_stack_tx_queues = enetc_num_stack_tx_queues(priv); + + err = netif_set_real_num_tx_queues(ndev, num_stack_tx_queues); if (err) goto err_set_queues; @@ -1460,6 +2085,7 @@ int enetc_open(struct net_device *ndev) if (err) goto err_set_queues; + enetc_tx_onestep_tstamp_init(priv); enetc_setup_bdrs(priv); enetc_start(ndev); @@ -1524,15 +2150,17 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) struct enetc_ndev_priv *priv = netdev_priv(ndev); struct tc_mqprio_qopt *mqprio = type_data; struct enetc_bdr *tx_ring; + int num_stack_tx_queues; u8 num_tc; int i; + num_stack_tx_queues = enetc_num_stack_tx_queues(priv); mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; num_tc = mqprio->num_tc; if (!num_tc) { netdev_reset_tc(ndev); - netif_set_real_num_tx_queues(ndev, priv->num_tx_rings); + netif_set_real_num_tx_queues(ndev, num_stack_tx_queues); /* Reset all ring priorities to 0 */ for (i = 0; i < priv->num_tx_rings; i++) { @@ -1544,7 +2172,7 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) } /* Check if we have enough BD rings available to accommodate all TCs */ - if (num_tc > priv->num_tx_rings) { + if (num_tc > num_stack_tx_queues) { netdev_err(ndev, "Max %d traffic classes supported\n", priv->num_tx_rings); return -EINVAL; @@ -1590,6 +2218,54 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type, } } +static int enetc_setup_xdp_prog(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct enetc_ndev_priv *priv = netdev_priv(dev); + struct bpf_prog *old_prog; + bool is_up; + int i; + + /* The buffer layout is changing, so we need to drain the old + * RX buffers and seed new ones. + */ + is_up = netif_running(dev); + if (is_up) + dev_close(dev); + + old_prog = xchg(&priv->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + for (i = 0; i < priv->num_rx_rings; i++) { + struct enetc_bdr *rx_ring = priv->rx_ring[i]; + + rx_ring->xdp.prog = prog; + + if (prog) + rx_ring->buffer_offset = XDP_PACKET_HEADROOM; + else + rx_ring->buffer_offset = ENETC_RXB_PAD; + } + + if (is_up) + return dev_open(dev, extack); + + return 0; +} + +int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return enetc_setup_xdp_prog(dev, xdp->prog, xdp->extack); + default: + return -EINVAL; + } + + return 0; +} + struct net_device_stats *enetc_get_stats(struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); @@ -1710,11 +2386,16 @@ static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr) switch (config.tx_type) { case HWTSTAMP_TX_OFF: - priv->active_offloads &= ~ENETC_F_TX_TSTAMP; + priv->active_offloads &= ~ENETC_F_TX_TSTAMP_MASK; break; case HWTSTAMP_TX_ON: + priv->active_offloads &= ~ENETC_F_TX_TSTAMP_MASK; priv->active_offloads |= ENETC_F_TX_TSTAMP; break; + case HWTSTAMP_TX_ONESTEP_SYNC: + priv->active_offloads &= ~ENETC_F_TX_TSTAMP_MASK; + priv->active_offloads |= ENETC_F_TX_ONESTEP_SYNC_TSTAMP; + break; default: return -ERANGE; } @@ -1745,7 +2426,9 @@ static int enetc_hwtstamp_get(struct net_device *ndev, struct ifreq *ifr) config.flags = 0; - if (priv->active_offloads & ENETC_F_TX_TSTAMP) + if (priv->active_offloads & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) + config.tx_type = HWTSTAMP_TX_ONESTEP_SYNC; + else if (priv->active_offloads & ENETC_F_TX_TSTAMP) config.tx_type = HWTSTAMP_TX_ON; else config.tx_type = HWTSTAMP_TX_OFF; @@ -1777,8 +2460,9 @@ int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) int enetc_alloc_msix(struct enetc_ndev_priv *priv) { struct pci_dev *pdev = priv->si->pdev; - int v_tx_rings; + int first_xdp_tx_ring; int i, n, err, nvec; + int v_tx_rings; nvec = ENETC_BDR_INT_BASE_IDX + priv->bdr_int_num; /* allocate MSIX for both messaging and Rx/Tx interrupts */ @@ -1806,6 +2490,28 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv) priv->int_vector[i] = v; + bdr = &v->rx_ring; + bdr->index = i; + bdr->ndev = priv->ndev; + bdr->dev = priv->dev; + bdr->bd_count = priv->rx_bd_count; + bdr->buffer_offset = ENETC_RXB_PAD; + priv->rx_ring[i] = bdr; + + err = xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0); + if (err) { + kfree(v); + goto fail; + } + + err = xdp_rxq_info_reg_mem_model(&bdr->xdp.rxq, + MEM_TYPE_PAGE_SHARED, NULL); + if (err) { + xdp_rxq_info_unreg(&bdr->xdp.rxq); + kfree(v); + goto fail; + } + /* init defaults for adaptive IC */ if (priv->ic_mode & ENETC_IC_RX_ADAPTIVE) { v->rx_ictt = 0x1; @@ -1820,11 +2526,7 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv) int idx; /* default tx ring mapping policy */ - if (priv->bdr_int_num == ENETC_MAX_BDR_INT) - idx = 2 * j + i; /* 2 CPUs */ - else - idx = j + i * v_tx_rings; /* default */ - + idx = priv->bdr_int_num * j + i; __set_bit(idx, &v->tx_rings_map); bdr = &v->tx_ring[j]; bdr->index = idx; @@ -1833,22 +2535,23 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv) bdr->bd_count = priv->tx_bd_count; priv->tx_ring[idx] = bdr; } - - bdr = &v->rx_ring; - bdr->index = i; - bdr->ndev = priv->ndev; - bdr->dev = priv->dev; - bdr->bd_count = priv->rx_bd_count; - priv->rx_ring[i] = bdr; } + first_xdp_tx_ring = priv->num_tx_rings - num_possible_cpus(); + priv->xdp_tx_ring = &priv->tx_ring[first_xdp_tx_ring]; + return 0; fail: while (i--) { - netif_napi_del(&priv->int_vector[i]->napi); - cancel_work_sync(&priv->int_vector[i]->rx_dim.work); - kfree(priv->int_vector[i]); + struct enetc_int_vector *v = priv->int_vector[i]; + struct enetc_bdr *rx_ring = &v->rx_ring; + + xdp_rxq_info_unreg_mem_model(&rx_ring->xdp.rxq); + xdp_rxq_info_unreg(&rx_ring->xdp.rxq); + netif_napi_del(&v->napi); + cancel_work_sync(&v->rx_dim.work); + kfree(v); } pci_free_irq_vectors(pdev); @@ -1862,7 +2565,10 @@ void enetc_free_msix(struct enetc_ndev_priv *priv) for (i = 0; i < priv->bdr_int_num; i++) { struct enetc_int_vector *v = priv->int_vector[i]; + struct enetc_bdr *rx_ring = &v->rx_ring; + xdp_rxq_info_unreg_mem_model(&rx_ring->xdp.rxq); + xdp_rxq_info_unreg(&rx_ring->xdp.rxq); netif_napi_del(&v->napi); cancel_work_sync(&v->rx_dim.work); } diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 8b380fc13314..08b283347d9c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -19,12 +19,21 @@ (ETH_FCS_LEN + ETH_HLEN + VLAN_HLEN)) struct enetc_tx_swbd { - struct sk_buff *skb; + union { + struct sk_buff *skb; + struct xdp_frame *xdp_frame; + }; dma_addr_t dma; + struct page *page; /* valid only if is_xdp_tx */ + u16 page_offset; /* valid only if is_xdp_tx */ u16 len; + enum dma_data_direction dir; u8 is_dma_page:1; u8 check_wb:1; - u8 do_tstamp:1; + u8 do_twostep_tstamp:1; + u8 is_eof:1; + u8 is_xdp_tx:1; + u8 is_xdp_redirect:1; }; #define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE @@ -32,21 +41,45 @@ struct enetc_tx_swbd { #define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */ #define ENETC_RXB_DMA_SIZE \ (SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD) +#define ENETC_RXB_DMA_SIZE_XDP \ + (SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - XDP_PACKET_HEADROOM) struct enetc_rx_swbd { dma_addr_t dma; struct page *page; u16 page_offset; + enum dma_data_direction dir; + u16 len; }; +/* ENETC overhead: optional extension BD + 1 BD gap */ +#define ENETC_TXBDS_NEEDED(val) ((val) + 2) +/* max # of chained Tx BDs is 15, including head and extension BD */ +#define ENETC_MAX_SKB_FRAGS 13 +#define ENETC_TXBDS_MAX_NEEDED ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1) + struct enetc_ring_stats { unsigned int packets; unsigned int bytes; unsigned int rx_alloc_errs; + unsigned int xdp_drops; + unsigned int xdp_tx; + unsigned int xdp_tx_drops; + unsigned int xdp_redirect; + unsigned int xdp_redirect_failures; + unsigned int xdp_redirect_sg; + unsigned int recycles; + unsigned int recycle_failures; +}; + +struct enetc_xdp_data { + struct xdp_rxq_info rxq; + struct bpf_prog *prog; + int xdp_tx_in_flight; }; -#define ENETC_RX_RING_DEFAULT_SIZE 512 -#define ENETC_TX_RING_DEFAULT_SIZE 256 +#define ENETC_RX_RING_DEFAULT_SIZE 2048 +#define ENETC_TX_RING_DEFAULT_SIZE 2048 #define ENETC_DEFAULT_TX_WORK (ENETC_TX_RING_DEFAULT_SIZE / 2) struct enetc_bdr { @@ -71,6 +104,9 @@ struct enetc_bdr { }; void __iomem *idr; /* Interrupt Detect Register pointer */ + int buffer_offset; + struct enetc_xdp_data xdp; + struct enetc_ring_stats stats; dma_addr_t bd_dma_base; @@ -92,18 +128,28 @@ static inline int enetc_bd_unused(struct enetc_bdr *bdr) return bdr->bd_count + bdr->next_to_clean - bdr->next_to_use - 1; } +static inline int enetc_swbd_unused(struct enetc_bdr *bdr) +{ + if (bdr->next_to_clean > bdr->next_to_alloc) + return bdr->next_to_clean - bdr->next_to_alloc - 1; + + return bdr->bd_count + bdr->next_to_clean - bdr->next_to_alloc - 1; +} + /* Control BD ring */ #define ENETC_CBDR_DEFAULT_SIZE 64 struct enetc_cbdr { void *bd_base; /* points to Rx or Tx BD ring */ void __iomem *pir; void __iomem *cir; + void __iomem *mr; /* mode register */ int bd_count; /* # of BDs */ int next_to_use; int next_to_clean; dma_addr_t bd_dma_base; + struct device *dma_dev; }; #define ENETC_TXBD(BDR, i) (&(((union enetc_tx_bd *)((BDR).bd_base))[i])) @@ -119,19 +165,26 @@ static inline union enetc_rx_bd *enetc_rxbd(struct enetc_bdr *rx_ring, int i) return &(((union enetc_rx_bd *)rx_ring->bd_base)[hw_idx]); } -static inline union enetc_rx_bd *enetc_rxbd_next(struct enetc_bdr *rx_ring, - union enetc_rx_bd *rxbd, - int i) +static inline void enetc_rxbd_next(struct enetc_bdr *rx_ring, + union enetc_rx_bd **old_rxbd, int *old_index) { - rxbd++; + union enetc_rx_bd *new_rxbd = *old_rxbd; + int new_index = *old_index; + + new_rxbd++; + #ifdef CONFIG_FSL_ENETC_PTP_CLOCK if (rx_ring->ext_en) - rxbd++; + new_rxbd++; #endif - if (unlikely(++i == rx_ring->bd_count)) - rxbd = rx_ring->bd_base; - return rxbd; + if (unlikely(++new_index == rx_ring->bd_count)) { + new_rxbd = rx_ring->bd_base; + new_index = 0; + } + + *old_rxbd = new_rxbd; + *old_index = new_index; } static inline union enetc_rx_bd *enetc_rxbd_ext(union enetc_rx_bd *rxbd) @@ -184,6 +237,22 @@ static inline bool enetc_si_is_pf(struct enetc_si *si) return !!(si->hw.port); } +static inline int enetc_pf_to_port(struct pci_dev *pf_pdev) +{ + switch (pf_pdev->devfn) { + case 0: + return 0; + case 1: + return 1; + case 2: + return 2; + case 6: + return 3; + default: + return -1; + } +} + #define ENETC_MAX_NUM_TXQS 8 #define ENETC_INT_NAME_MAX (IFNAMSIZ + 8) @@ -218,12 +287,20 @@ struct psfp_cap { u32 max_psfp_meter; }; +#define ENETC_F_TX_TSTAMP_MASK 0xff /* TODO: more hardware offloads */ enum enetc_active_offloads { - ENETC_F_RX_TSTAMP = BIT(0), - ENETC_F_TX_TSTAMP = BIT(1), - ENETC_F_QBV = BIT(2), - ENETC_F_QCI = BIT(3), + /* 8 bits reserved for TX timestamp types (hwtstamp_tx_types) */ + ENETC_F_TX_TSTAMP = BIT(0), + ENETC_F_TX_ONESTEP_SYNC_TSTAMP = BIT(1), + + ENETC_F_RX_TSTAMP = BIT(8), + ENETC_F_QBV = BIT(9), + ENETC_F_QCI = BIT(10), +}; + +enum enetc_flags_bit { + ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS = 0, }; /* interrupt coalescing modes */ @@ -252,10 +329,11 @@ struct enetc_ndev_priv { u16 rx_bd_count, tx_bd_count; u16 msg_enable; - int active_offloads; + enum enetc_active_offloads active_offloads; u32 speed; /* store speed for compare update pspeed */ + struct enetc_bdr **xdp_tx_ring; struct enetc_bdr *tx_ring[16]; struct enetc_bdr *rx_ring[16]; @@ -266,6 +344,13 @@ struct enetc_ndev_priv { struct phylink *phylink; int ic_mode; u32 tx_ictt; + + struct bpf_prog *xdp_prog; + + unsigned long flags; + + struct work_struct tx_onestep_tstamp; + struct sk_buff_head tx_skbs; }; /* Messaging */ @@ -305,15 +390,17 @@ int enetc_set_features(struct net_device *ndev, int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd); int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type, void *type_data); +int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp); +int enetc_xdp_xmit(struct net_device *ndev, int num_frames, + struct xdp_frame **frames, u32 flags); /* ethtool */ void enetc_set_ethtool_ops(struct net_device *ndev); /* control buffer descriptor ring (CBDR) */ -int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr); -void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr); -void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr); -void enetc_clear_cbdr(struct enetc_hw *hw); +int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw, int bd_count, + struct enetc_cbdr *cbdr); +void enetc_teardown_cbdr(struct enetc_cbdr *cbdr); int enetc_set_mac_flt_entry(struct enetc_si *si, int index, char *mac_addr, int si_map); int enetc_clear_mac_flt_entry(struct enetc_si *si, int index); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c index 201cbc362e33..073e56dcca4e 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_cbdr.c @@ -3,9 +3,63 @@ #include "enetc.h" -static void enetc_clean_cbdr(struct enetc_si *si) +int enetc_setup_cbdr(struct device *dev, struct enetc_hw *hw, int bd_count, + struct enetc_cbdr *cbdr) +{ + int size = bd_count * sizeof(struct enetc_cbd); + + cbdr->bd_base = dma_alloc_coherent(dev, size, &cbdr->bd_dma_base, + GFP_KERNEL); + if (!cbdr->bd_base) + return -ENOMEM; + + /* h/w requires 128B alignment */ + if (!IS_ALIGNED(cbdr->bd_dma_base, 128)) { + dma_free_coherent(dev, size, cbdr->bd_base, + cbdr->bd_dma_base); + return -EINVAL; + } + + cbdr->next_to_clean = 0; + cbdr->next_to_use = 0; + cbdr->dma_dev = dev; + cbdr->bd_count = bd_count; + + cbdr->pir = hw->reg + ENETC_SICBDRPIR; + cbdr->cir = hw->reg + ENETC_SICBDRCIR; + cbdr->mr = hw->reg + ENETC_SICBDRMR; + + /* set CBDR cache attributes */ + enetc_wr(hw, ENETC_SICAR2, + ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT); + + enetc_wr(hw, ENETC_SICBDRBAR0, lower_32_bits(cbdr->bd_dma_base)); + enetc_wr(hw, ENETC_SICBDRBAR1, upper_32_bits(cbdr->bd_dma_base)); + enetc_wr(hw, ENETC_SICBDRLENR, ENETC_RTBLENR_LEN(cbdr->bd_count)); + + enetc_wr_reg(cbdr->pir, cbdr->next_to_clean); + enetc_wr_reg(cbdr->cir, cbdr->next_to_use); + /* enable ring */ + enetc_wr_reg(cbdr->mr, BIT(31)); + + return 0; +} + +void enetc_teardown_cbdr(struct enetc_cbdr *cbdr) +{ + int size = cbdr->bd_count * sizeof(struct enetc_cbd); + + /* disable ring */ + enetc_wr_reg(cbdr->mr, 0); + + dma_free_coherent(cbdr->dma_dev, size, cbdr->bd_base, + cbdr->bd_dma_base); + cbdr->bd_base = NULL; + cbdr->dma_dev = NULL; +} + +static void enetc_clean_cbdr(struct enetc_cbdr *ring) { - struct enetc_cbdr *ring = &si->cbd_ring; struct enetc_cbd *dest_cbd; int i, status; @@ -15,7 +69,7 @@ static void enetc_clean_cbdr(struct enetc_si *si) dest_cbd = ENETC_CBD(*ring, i); status = dest_cbd->status_flags & ENETC_CBD_STATUS_MASK; if (status) - dev_warn(&si->pdev->dev, "CMD err %04x for cmd %04x\n", + dev_warn(ring->dma_dev, "CMD err %04x for cmd %04x\n", status, dest_cbd->cmd); memset(dest_cbd, 0, sizeof(*dest_cbd)); @@ -43,7 +97,7 @@ int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd) return -EIO; if (unlikely(!enetc_cbd_unused(ring))) - enetc_clean_cbdr(si); + enetc_clean_cbdr(ring); i = ring->next_to_use; dest_cbd = ENETC_CBD(*ring, i); @@ -69,7 +123,7 @@ int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd) /* CBD may writeback data, feedback up level */ *cbd = *dest_cbd; - enetc_clean_cbdr(si); + enetc_clean_cbdr(ring); return 0; } @@ -117,6 +171,7 @@ int enetc_set_mac_flt_entry(struct enetc_si *si, int index, int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse, int index) { + struct enetc_cbdr *ring = &si->cbd_ring; struct enetc_cbd cbd = {.cmd = 0}; dma_addr_t dma, dma_align; void *tmp, *tmp_align; @@ -129,10 +184,10 @@ int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse, cbd.length = cpu_to_le16(sizeof(*rfse)); cbd.opt[3] = cpu_to_le32(0); /* SI */ - tmp = dma_alloc_coherent(&si->pdev->dev, sizeof(*rfse) + RFSE_ALIGN, + tmp = dma_alloc_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN, &dma, GFP_KERNEL); if (!tmp) { - dev_err(&si->pdev->dev, "DMA mapping of RFS entry failed!\n"); + dev_err(ring->dma_dev, "DMA mapping of RFS entry failed!\n"); return -ENOMEM; } @@ -145,9 +200,9 @@ int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse, err = enetc_send_cmd(si, &cbd); if (err) - dev_err(&si->pdev->dev, "FS entry add failed (%d)!", err); + dev_err(ring->dma_dev, "FS entry add failed (%d)!", err); - dma_free_coherent(&si->pdev->dev, sizeof(*rfse) + RFSE_ALIGN, + dma_free_coherent(ring->dma_dev, sizeof(*rfse) + RFSE_ALIGN, tmp, dma); return err; @@ -157,6 +212,7 @@ int enetc_set_fs_entry(struct enetc_si *si, struct enetc_cmd_rfse *rfse, static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count, bool read) { + struct enetc_cbdr *ring = &si->cbd_ring; struct enetc_cbd cbd = {.cmd = 0}; dma_addr_t dma, dma_align; u8 *tmp, *tmp_align; @@ -166,10 +222,10 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count, /* HW only takes in a full 64 entry table */ return -EINVAL; - tmp = dma_alloc_coherent(&si->pdev->dev, count + RSSE_ALIGN, + tmp = dma_alloc_coherent(ring->dma_dev, count + RSSE_ALIGN, &dma, GFP_KERNEL); if (!tmp) { - dev_err(&si->pdev->dev, "DMA mapping of RSS table failed!\n"); + dev_err(ring->dma_dev, "DMA mapping of RSS table failed!\n"); return -ENOMEM; } dma_align = ALIGN(dma, RSSE_ALIGN); @@ -189,13 +245,13 @@ static int enetc_cmd_rss_table(struct enetc_si *si, u32 *table, int count, err = enetc_send_cmd(si, &cbd); if (err) - dev_err(&si->pdev->dev, "RSS cmd failed (%d)!", err); + dev_err(ring->dma_dev, "RSS cmd failed (%d)!", err); if (read) for (i = 0; i < count; i++) table[i] = tmp_align[i]; - dma_free_coherent(&si->pdev->dev, count + RSSE_ALIGN, tmp, dma); + dma_free_coherent(ring->dma_dev, count + RSSE_ALIGN, tmp, dma); return err; } diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 89e558135432..ebccaf02411c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -192,10 +192,18 @@ static const struct { static const char rx_ring_stats[][ETH_GSTRING_LEN] = { "Rx ring %2d frames", "Rx ring %2d alloc errors", + "Rx ring %2d XDP drops", + "Rx ring %2d recycles", + "Rx ring %2d recycle failures", + "Rx ring %2d redirects", + "Rx ring %2d redirect failures", + "Rx ring %2d redirect S/G", }; static const char tx_ring_stats[][ETH_GSTRING_LEN] = { "Tx ring %2d frames", + "Tx ring %2d XDP frames", + "Tx ring %2d XDP drops", }; static int enetc_get_sset_count(struct net_device *ndev, int sset) @@ -267,12 +275,21 @@ static void enetc_get_ethtool_stats(struct net_device *ndev, for (i = 0; i < ARRAY_SIZE(enetc_si_counters); i++) data[o++] = enetc_rd64(hw, enetc_si_counters[i].reg); - for (i = 0; i < priv->num_tx_rings; i++) + for (i = 0; i < priv->num_tx_rings; i++) { data[o++] = priv->tx_ring[i]->stats.packets; + data[o++] = priv->tx_ring[i]->stats.xdp_tx; + data[o++] = priv->tx_ring[i]->stats.xdp_tx_drops; + } for (i = 0; i < priv->num_rx_rings; i++) { data[o++] = priv->rx_ring[i]->stats.packets; data[o++] = priv->rx_ring[i]->stats.rx_alloc_errs; + data[o++] = priv->rx_ring[i]->stats.xdp_drops; + data[o++] = priv->rx_ring[i]->stats.recycles; + data[o++] = priv->rx_ring[i]->stats.recycle_failures; + data[o++] = priv->rx_ring[i]->stats.xdp_redirect; + data[o++] = priv->rx_ring[i]->stats.xdp_redirect_failures; + data[o++] = priv->rx_ring[i]->stats.xdp_redirect_sg; } if (!enetc_si_is_pf(priv->si)) @@ -654,7 +671,8 @@ static int enetc_get_ts_info(struct net_device *ndev, SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = (1 << HWTSTAMP_TX_OFF) | - (1 << HWTSTAMP_TX_ON); + (1 << HWTSTAMP_TX_ON) | + (1 << HWTSTAMP_TX_ONESTEP_SYNC); info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | (1 << HWTSTAMP_FILTER_ALL); #else @@ -690,6 +708,22 @@ static int enetc_set_wol(struct net_device *dev, return ret; } +static void enetc_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct enetc_ndev_priv *priv = netdev_priv(dev); + + phylink_ethtool_get_pauseparam(priv->phylink, pause); +} + +static int enetc_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct enetc_ndev_priv *priv = netdev_priv(dev); + + return phylink_ethtool_set_pauseparam(priv->phylink, pause); +} + static int enetc_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { @@ -736,6 +770,8 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = { .get_ts_info = enetc_get_ts_info, .get_wol = enetc_get_wol, .set_wol = enetc_set_wol, + .get_pauseparam = enetc_get_pauseparam, + .set_pauseparam = enetc_set_pauseparam, }; static const struct ethtool_ops enetc_vf_ethtool_ops = { diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 00938f7960a4..0f5f081a5baf 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -109,6 +109,7 @@ enum enetc_bdr_type {TX, RX}; /* RX BDR reg offsets */ #define ENETC_RBMR 0 #define ENETC_RBMR_BDS BIT(2) +#define ENETC_RBMR_CM BIT(4) #define ENETC_RBMR_VTE BIT(5) #define ENETC_RBMR_EN BIT(31) #define ENETC_RBSR 0x4 @@ -180,6 +181,8 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PSIVLANR(n) (0x0240 + (n) * 4) /* n = SI index */ #define ENETC_PSIVLAN_EN BIT(31) #define ENETC_PSIVLAN_SET_QOS(val) ((u32)(val) << 12) +#define ENETC_PPAUONTR 0x0410 +#define ENETC_PPAUOFFTR 0x0414 #define ENETC_PTXMBAR 0x0608 #define ENETC_PCAPR0 0x0900 #define ENETC_PCAPR0_RXBDR(val) ((val) >> 24) @@ -227,6 +230,7 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PM0_TX_EN BIT(0) #define ENETC_PM0_RX_EN BIT(1) #define ENETC_PM0_PROMISC BIT(4) +#define ENETC_PM0_PAUSE_IGN BIT(8) #define ENETC_PM0_CMD_XGLP BIT(10) #define ENETC_PM0_CMD_TXP BIT(11) #define ENETC_PM0_CMD_PHY_TX_EN BIT(15) @@ -239,6 +243,17 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PM_IMDIO_BASE 0x8030 +#define ENETC_PM0_PAUSE_QUANTA 0x8054 +#define ENETC_PM0_PAUSE_THRESH 0x8064 +#define ENETC_PM1_PAUSE_QUANTA 0x9054 +#define ENETC_PM1_PAUSE_THRESH 0x9064 + +#define ENETC_PM0_SINGLE_STEP 0x80c0 +#define ENETC_PM1_SINGLE_STEP 0x90c0 +#define ENETC_PM0_SINGLE_STEP_CH BIT(7) +#define ENETC_PM0_SINGLE_STEP_EN BIT(31) +#define ENETC_SET_SINGLE_STEP_OFFSET(v) (((v) & 0xff) << 8) + #define ENETC_PM0_IF_MODE 0x8300 #define ENETC_PM0_IFM_RG BIT(2) #define ENETC_PM0_IFM_RLP (BIT(5) | BIT(11)) @@ -548,6 +563,7 @@ static inline void enetc_clear_tx_bd(union enetc_tx_bd *txbd) /* Extension flags */ #define ENETC_TXBD_E_FLAGS_VLAN_INS BIT(0) +#define ENETC_TXBD_E_FLAGS_ONE_STEP_PTP BIT(1) #define ENETC_TXBD_E_FLAGS_TWO_STEP_PTP BIT(2) union enetc_rx_bd { diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c new file mode 100644 index 000000000000..8b356c485507 --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) +/* Copyright 2021 NXP Semiconductors + * + * The Integrated Endpoint Register Block (IERB) is configured by pre-boot + * software and is supposed to be to ENETC what a NVRAM is to a 'real' PCIe + * card. Upon FLR, values from the IERB are transferred to the ENETC PFs, and + * are read-only in the PF memory space. + * + * This driver fixes up the power-on reset values for the ENETC shared FIFO, + * such that the TX and RX allocations are sufficient for jumbo frames, and + * that intelligent FIFO dropping is enabled before the internal data + * structures are corrupted. + * + * Even though not all ports might be used on a given board, we are not + * concerned with partitioning the FIFO, because the default values configure + * no strict reservations, so the entire FIFO can be used by the RX of a single + * port, or the TX of a single port. + */ + +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/pci.h> +#include <linux/platform_device.h> +#include "enetc.h" +#include "enetc_ierb.h" + +/* IERB registers */ +#define ENETC_IERB_TXMBAR(port) (((port) * 0x100) + 0x8080) +#define ENETC_IERB_RXMBER(port) (((port) * 0x100) + 0x8090) +#define ENETC_IERB_RXMBLR(port) (((port) * 0x100) + 0x8094) +#define ENETC_IERB_RXBCR(port) (((port) * 0x100) + 0x80a0) +#define ENETC_IERB_TXBCR(port) (((port) * 0x100) + 0x80a8) +#define ENETC_IERB_FMBDTR 0xa000 + +#define ENETC_RESERVED_FOR_ICM 1024 + +struct enetc_ierb { + void __iomem *regs; +}; + +static void enetc_ierb_write(struct enetc_ierb *ierb, u32 offset, u32 val) +{ + iowrite32(val, ierb->regs + offset); +} + +int enetc_ierb_register_pf(struct platform_device *pdev, + struct pci_dev *pf_pdev) +{ + struct enetc_ierb *ierb = platform_get_drvdata(pdev); + int port = enetc_pf_to_port(pf_pdev); + u16 tx_credit, rx_credit, tx_alloc; + + if (port < 0) + return -ENODEV; + + if (!ierb) + return -EPROBE_DEFER; + + /* By default, it is recommended to set the Host Transfer Agent + * per port transmit byte credit to "1000 + max_frame_size/2". + * The power-on reset value (1800 bytes) is rounded up to the nearest + * 100 assuming a maximum frame size of 1536 bytes. + */ + tx_credit = roundup(1000 + ENETC_MAC_MAXFRM_SIZE / 2, 100); + + /* Internal memory allocated for transmit buffering is guaranteed but + * not reserved; i.e. if the total transmit allocation is not used, + * then the unused portion is not left idle, it can be used for receive + * buffering but it will be reclaimed, if required, from receive by + * intelligently dropping already stored receive frames in the internal + * memory to ensure that the transmit allocation is respected. + * + * PaTXMBAR must be set to a value larger than + * PaTXBCR + 2 * max_frame_size + 32 + * if frame preemption is not enabled, or to + * 2 * PaTXBCR + 2 * p_max_frame_size (pMAC maximum frame size) + + * 2 * np_max_frame_size (eMAC maximum frame size) + 64 + * if frame preemption is enabled. + */ + tx_alloc = roundup(2 * tx_credit + 4 * ENETC_MAC_MAXFRM_SIZE + 64, 16); + + /* Initial credits, in units of 8 bytes, to the Ingress Congestion + * Manager for the maximum amount of bytes the port is allocated for + * pending traffic. + * It is recommended to set the initial credits to 2 times the maximum + * frame size (2 frames of maximum size). + */ + rx_credit = DIV_ROUND_UP(ENETC_MAC_MAXFRM_SIZE * 2, 8); + + enetc_ierb_write(ierb, ENETC_IERB_TXBCR(port), tx_credit); + enetc_ierb_write(ierb, ENETC_IERB_TXMBAR(port), tx_alloc); + enetc_ierb_write(ierb, ENETC_IERB_RXBCR(port), rx_credit); + + return 0; +} +EXPORT_SYMBOL(enetc_ierb_register_pf); + +static int enetc_ierb_probe(struct platform_device *pdev) +{ + struct enetc_ierb *ierb; + struct resource *res; + void __iomem *regs; + + ierb = devm_kzalloc(&pdev->dev, sizeof(*ierb), GFP_KERNEL); + if (!ierb) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(regs)) + return PTR_ERR(regs); + + ierb->regs = regs; + + /* Free buffer depletion threshold in bytes. + * This sets the minimum amount of free buffer memory that should be + * maintained in the datapath sub system, and when the amount of free + * buffer memory falls below this threshold, a depletion indication is + * asserted, which may trigger "intelligent drop" frame releases from + * the ingress queues in the ICM. + * It is recommended to set the free buffer depletion threshold to 1024 + * bytes, since the ICM needs some FIFO memory for its own use. + */ + enetc_ierb_write(ierb, ENETC_IERB_FMBDTR, ENETC_RESERVED_FOR_ICM); + + platform_set_drvdata(pdev, ierb); + + return 0; +} + +static int enetc_ierb_remove(struct platform_device *pdev) +{ + return 0; +} + +static const struct of_device_id enetc_ierb_match[] = { + { .compatible = "fsl,ls1028a-enetc-ierb", }, + {}, +}; +MODULE_DEVICE_TABLE(of, enetc_ierb_match); + +static struct platform_driver enetc_ierb_driver = { + .driver = { + .name = "fsl-enetc-ierb", + .of_match_table = enetc_ierb_match, + }, + .probe = enetc_ierb_probe, + .remove = enetc_ierb_remove, +}; + +module_platform_driver(enetc_ierb_driver); + +MODULE_DESCRIPTION("NXP ENETC IERB"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.h b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h new file mode 100644 index 000000000000..b3b774e0998a --- /dev/null +++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2021 NXP Semiconductors */ + +#include <linux/pci.h> +#include <linux/platform_device.h> + +#if IS_ENABLED(CONFIG_FSL_ENETC_IERB) + +int enetc_ierb_register_pf(struct platform_device *pdev, + struct pci_dev *pf_pdev); + +#else + +static inline int enetc_ierb_register_pf(struct platform_device *pdev, + struct pci_dev *pf_pdev) +{ + return -EOPNOTSUPP; +} + +#endif diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 224fc37a6757..31274325159a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -4,8 +4,10 @@ #include <linux/mdio.h> #include <linux/module.h> #include <linux/fsl/enetc_mdio.h> +#include <linux/of_platform.h> #include <linux/of_mdio.h> #include <linux/of_net.h> +#include "enetc_ierb.h" #include "enetc_pf.h" #define ENETC_DRV_NAME_STR "ENETC PF driver" @@ -129,16 +131,20 @@ static void enetc_clear_mac_ht_flt(struct enetc_si *si, int si_idx, int type) } static void enetc_set_mac_ht_flt(struct enetc_si *si, int si_idx, int type, - u32 *hash) + unsigned long hash) { bool err = si->errata & ENETC_ERR_UCMCSWP; if (type == UC) { - enetc_port_wr(&si->hw, ENETC_PSIUMHFR0(si_idx, err), *hash); - enetc_port_wr(&si->hw, ENETC_PSIUMHFR1(si_idx), *(hash + 1)); + enetc_port_wr(&si->hw, ENETC_PSIUMHFR0(si_idx, err), + lower_32_bits(hash)); + enetc_port_wr(&si->hw, ENETC_PSIUMHFR1(si_idx), + upper_32_bits(hash)); } else { /* MC */ - enetc_port_wr(&si->hw, ENETC_PSIMMHFR0(si_idx, err), *hash); - enetc_port_wr(&si->hw, ENETC_PSIMMHFR1(si_idx), *(hash + 1)); + enetc_port_wr(&si->hw, ENETC_PSIMMHFR0(si_idx, err), + lower_32_bits(hash)); + enetc_port_wr(&si->hw, ENETC_PSIMMHFR1(si_idx), + upper_32_bits(hash)); } } @@ -182,7 +188,7 @@ static void enetc_sync_mac_filters(struct enetc_pf *pf) if (i == UC) enetc_clear_mac_flt_entry(si, pos); - enetc_set_mac_ht_flt(si, 0, i, (u32 *)f->mac_hash_table); + enetc_set_mac_ht_flt(si, 0, i, *f->mac_hash_table); } } @@ -248,10 +254,10 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev) } static void enetc_set_vlan_ht_filter(struct enetc_hw *hw, int si_idx, - u32 *hash) + unsigned long hash) { - enetc_port_wr(hw, ENETC_PSIVHFR0(si_idx), *hash); - enetc_port_wr(hw, ENETC_PSIVHFR1(si_idx), *(hash + 1)); + enetc_port_wr(hw, ENETC_PSIVHFR0(si_idx), lower_32_bits(hash)); + enetc_port_wr(hw, ENETC_PSIVHFR1(si_idx), upper_32_bits(hash)); } static int enetc_vid_hash_idx(unsigned int vid) @@ -279,7 +285,7 @@ static void enetc_sync_vlan_ht_filter(struct enetc_pf *pf, bool rehash) } } - enetc_set_vlan_ht_filter(&pf->si->hw, 0, (u32 *)pf->vlan_ht_filter); + enetc_set_vlan_ht_filter(&pf->si->hw, 0, *pf->vlan_ht_filter); } static int enetc_vlan_rx_add_vid(struct net_device *ndev, __be16 prot, u16 vid) @@ -386,23 +392,54 @@ static int enetc_pf_set_vf_spoofchk(struct net_device *ndev, int vf, bool en) return 0; } -static void enetc_port_setup_primary_mac_address(struct enetc_si *si) +static int enetc_setup_mac_address(struct device_node *np, struct enetc_pf *pf, + int si) { - unsigned char mac_addr[MAX_ADDR_LEN]; - struct enetc_pf *pf = enetc_si_priv(si); - struct enetc_hw *hw = &si->hw; - int i; + struct device *dev = &pf->si->pdev->dev; + struct enetc_hw *hw = &pf->si->hw; + u8 mac_addr[ETH_ALEN] = { 0 }; + int err; - /* check MAC addresses for PF and all VFs, if any is 0 set it ro rand */ - for (i = 0; i < pf->total_vfs + 1; i++) { - enetc_pf_get_primary_mac_addr(hw, i, mac_addr); - if (!is_zero_ether_addr(mac_addr)) - continue; + /* (1) try to get the MAC address from the device tree */ + if (np) { + err = of_get_mac_address(np, mac_addr); + if (err == -EPROBE_DEFER) + return err; + } + + /* (2) bootloader supplied MAC address */ + if (is_zero_ether_addr(mac_addr)) + enetc_pf_get_primary_mac_addr(hw, si, mac_addr); + + /* (3) choose a random one */ + if (is_zero_ether_addr(mac_addr)) { eth_random_addr(mac_addr); - dev_info(&si->pdev->dev, "no MAC address specified for SI%d, using %pM\n", - i, mac_addr); - enetc_pf_set_primary_mac_addr(hw, i, mac_addr); + dev_info(dev, "no MAC address specified for SI%d, using %pM\n", + si, mac_addr); + } + + enetc_pf_set_primary_mac_addr(hw, si, mac_addr); + + return 0; +} + +static int enetc_setup_mac_addresses(struct device_node *np, + struct enetc_pf *pf) +{ + int err, i; + + /* The PF might take its MAC from the device tree */ + err = enetc_setup_mac_address(np, pf, 0); + if (err) + return err; + + for (i = 0; i < pf->total_vfs; i++) { + err = enetc_setup_mac_address(NULL, pf, i + 1); + if (err) + return err; } + + return 0; } static void enetc_port_assign_rfs_entries(struct enetc_si *si) @@ -483,7 +520,6 @@ static void enetc_configure_port_mac(struct enetc_hw *hw) ENETC_SET_MAXFRM(ENETC_RX_MAXFRM_SIZE)); enetc_port_wr(hw, ENETC_PTCMSDUR(0), ENETC_MAC_MAXFRM_SIZE); - enetc_port_wr(hw, ENETC_PTXMBAR, 2 * ENETC_MAC_MAXFRM_SIZE); enetc_port_wr(hw, ENETC_PM0_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN | ENETC_PM0_CMD_TXP | ENETC_PM0_PROMISC); @@ -558,9 +594,6 @@ static void enetc_configure_port(struct enetc_pf *pf) /* split up RFS entries */ enetc_port_assign_rfs_entries(pf->si); - /* fix-up primary MAC addresses, if not set already */ - enetc_port_setup_primary_mac_address(pf->si); - /* enforce VLAN promisc mode for all SIs */ pf->vlan_promisc_simap = ENETC_VLAN_PROMISC_MAP_ALL; enetc_set_vlan_promisc(hw, pf->vlan_promisc_simap); @@ -703,6 +736,8 @@ static const struct net_device_ops enetc_ndev_ops = { .ndo_set_features = enetc_pf_set_features, .ndo_do_ioctl = enetc_ioctl, .ndo_setup_tc = enetc_setup_tc, + .ndo_bpf = enetc_setup_bpf, + .ndo_xdp_xmit = enetc_xdp_xmit, }; static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, @@ -979,7 +1014,12 @@ static void enetc_pl_mac_link_up(struct phylink_config *config, int duplex, bool tx_pause, bool rx_pause) { struct enetc_pf *pf = phylink_to_enetc_pf(config); + u32 pause_off_thresh = 0, pause_on_thresh = 0; + u32 init_quanta = 0, refresh_quanta = 0; + struct enetc_hw *hw = &pf->si->hw; struct enetc_ndev_priv *priv; + u32 rbmr, cmd_cfg; + int idx; priv = netdev_priv(pf->si->ndev); if (priv->active_offloads & ENETC_F_QBV) @@ -987,9 +1027,60 @@ static void enetc_pl_mac_link_up(struct phylink_config *config, if (!phylink_autoneg_inband(mode) && phy_interface_mode_is_rgmii(interface)) - enetc_force_rgmii_mac(&pf->si->hw, speed, duplex); + enetc_force_rgmii_mac(hw, speed, duplex); + + /* Flow control */ + for (idx = 0; idx < priv->num_rx_rings; idx++) { + rbmr = enetc_rxbdr_rd(hw, idx, ENETC_RBMR); + + if (tx_pause) + rbmr |= ENETC_RBMR_CM; + else + rbmr &= ~ENETC_RBMR_CM; - enetc_mac_enable(&pf->si->hw, true); + enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr); + } + + if (tx_pause) { + /* When the port first enters congestion, send a PAUSE request + * with the maximum number of quanta. When the port exits + * congestion, it will automatically send a PAUSE frame with + * zero quanta. + */ + init_quanta = 0xffff; + + /* Also, set up the refresh timer to send follow-up PAUSE + * frames at half the quanta value, in case the congestion + * condition persists. + */ + refresh_quanta = 0xffff / 2; + + /* Start emitting PAUSE frames when 3 large frames (or more + * smaller frames) have accumulated in the FIFO waiting to be + * DMAed to the RX ring. + */ + pause_on_thresh = 3 * ENETC_MAC_MAXFRM_SIZE; + pause_off_thresh = 1 * ENETC_MAC_MAXFRM_SIZE; + } + + enetc_port_wr(hw, ENETC_PM0_PAUSE_QUANTA, init_quanta); + enetc_port_wr(hw, ENETC_PM1_PAUSE_QUANTA, init_quanta); + enetc_port_wr(hw, ENETC_PM0_PAUSE_THRESH, refresh_quanta); + enetc_port_wr(hw, ENETC_PM1_PAUSE_THRESH, refresh_quanta); + enetc_port_wr(hw, ENETC_PPAUONTR, pause_on_thresh); + enetc_port_wr(hw, ENETC_PPAUOFFTR, pause_off_thresh); + + cmd_cfg = enetc_port_rd(hw, ENETC_PM0_CMD_CFG); + + if (rx_pause) + cmd_cfg &= ~ENETC_PM0_PAUSE_IGN; + else + cmd_cfg |= ENETC_PM0_PAUSE_IGN; + + enetc_port_wr(hw, ENETC_PM0_CMD_CFG, cmd_cfg); + enetc_port_wr(hw, ENETC_PM1_CMD_CFG, cmd_cfg); + + enetc_mac_enable(hw, true); } static void enetc_pl_mac_link_down(struct phylink_config *config, @@ -1081,24 +1172,28 @@ static int enetc_init_port_rss_memory(struct enetc_si *si) return err; } -static void enetc_init_unused_port(struct enetc_si *si) +static int enetc_pf_register_with_ierb(struct pci_dev *pdev) { - struct device *dev = &si->pdev->dev; - struct enetc_hw *hw = &si->hw; - int err; + struct device_node *node = pdev->dev.of_node; + struct platform_device *ierb_pdev; + struct device_node *ierb_node; - si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE; - err = enetc_alloc_cbdr(dev, &si->cbd_ring); - if (err) - return; + /* Don't register with the IERB if the PF itself is disabled */ + if (!node || !of_device_is_available(node)) + return 0; + + ierb_node = of_find_compatible_node(NULL, NULL, + "fsl,ls1028a-enetc-ierb"); + if (!ierb_node || !of_device_is_available(ierb_node)) + return -ENODEV; - enetc_setup_cbdr(hw, &si->cbd_ring); + ierb_pdev = of_find_device_by_node(ierb_node); + of_node_put(ierb_node); - enetc_init_port_rfs_memory(si); - enetc_init_port_rss_memory(si); + if (!ierb_pdev) + return -EPROBE_DEFER; - enetc_clear_cbdr(hw); - enetc_free_cbdr(dev, &si->cbd_ring); + return enetc_ierb_register_pf(ierb_pdev, pdev); } static int enetc_pf_probe(struct pci_dev *pdev, @@ -1111,6 +1206,14 @@ static int enetc_pf_probe(struct pci_dev *pdev, struct enetc_pf *pf; int err; + err = enetc_pf_register_with_ierb(pdev); + if (err == -EPROBE_DEFER) + return err; + if (err) + dev_warn(&pdev->dev, + "Could not register with IERB driver: %pe, please update the device tree\n", + ERR_PTR(err)); + err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf)); if (err) { dev_err(&pdev->dev, "PCI probing failed\n"); @@ -1124,8 +1227,24 @@ static int enetc_pf_probe(struct pci_dev *pdev, goto err_map_pf_space; } + err = enetc_setup_cbdr(&pdev->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE, + &si->cbd_ring); + if (err) + goto err_setup_cbdr; + + err = enetc_init_port_rfs_memory(si); + if (err) { + dev_err(&pdev->dev, "Failed to initialize RFS memory\n"); + goto err_init_port_rfs; + } + + err = enetc_init_port_rss_memory(si); + if (err) { + dev_err(&pdev->dev, "Failed to initialize RSS memory\n"); + goto err_init_port_rss; + } + if (node && !of_device_is_available(node)) { - enetc_init_unused_port(si); dev_info(&pdev->dev, "device is disabled, skipping\n"); err = -ENODEV; goto err_device_disabled; @@ -1135,6 +1254,10 @@ static int enetc_pf_probe(struct pci_dev *pdev, pf->si = si; pf->total_vfs = pci_sriov_get_totalvfs(pdev); + err = enetc_setup_mac_addresses(node, pf); + if (err) + goto err_setup_mac_addresses; + enetc_configure_port(pf); enetc_get_si_caps(si); @@ -1158,18 +1281,6 @@ static int enetc_pf_probe(struct pci_dev *pdev, goto err_alloc_si_res; } - err = enetc_init_port_rfs_memory(si); - if (err) { - dev_err(&pdev->dev, "Failed to initialize RFS memory\n"); - goto err_init_port_rfs; - } - - err = enetc_init_port_rss_memory(si); - if (err) { - dev_err(&pdev->dev, "Failed to initialize RSS memory\n"); - goto err_init_port_rss; - } - err = enetc_configure_si(priv); if (err) { dev_err(&pdev->dev, "Failed to configure SI\n"); @@ -1205,15 +1316,18 @@ err_phylink_create: err_mdiobus_create: enetc_free_msix(priv); err_config_si: -err_init_port_rss: -err_init_port_rfs: err_alloc_msix: enetc_free_si_resources(priv); err_alloc_si_res: si->ndev = NULL; free_netdev(ndev); err_alloc_netdev: +err_init_port_rss: +err_init_port_rfs: err_device_disabled: +err_setup_mac_addresses: + enetc_teardown_cbdr(&si->cbd_ring); +err_setup_cbdr: err_map_pf_space: enetc_pci_remove(pdev); @@ -1239,6 +1353,7 @@ static void enetc_pf_remove(struct pci_dev *pdev) enetc_free_msix(priv); enetc_free_si_resources(priv); + enetc_teardown_cbdr(&si->cbd_ring); free_netdev(si->ndev); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index a9aee219fb58..af699f2ad095 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -455,11 +455,6 @@ static struct enetc_psfp epsfp = { static LIST_HEAD(enetc_block_cb_list); -static inline int enetc_get_port(struct enetc_ndev_priv *priv) -{ - return priv->si->pdev->devfn & 0x7; -} - /* Stream Identity Entry Set Descriptor */ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, struct enetc_streamid *sid, @@ -504,7 +499,7 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, si_conf = &cbd.sid_set; /* Only one port supported for one entry, set itself */ - si_conf->iports = cpu_to_le32(1 << enetc_get_port(priv)); + si_conf->iports = cpu_to_le32(1 << enetc_pf_to_port(priv->si->pdev)); si_conf->id_type = 1; si_conf->oui[2] = 0x0; si_conf->oui[1] = 0x80; @@ -529,7 +524,7 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, si_conf->en = 0x80; si_conf->stream_handle = cpu_to_le32(sid->handle); - si_conf->iports = cpu_to_le32(1 << enetc_get_port(priv)); + si_conf->iports = cpu_to_le32(1 << enetc_pf_to_port(priv->si->pdev)); si_conf->id_type = sid->filtertype; si_conf->oui[2] = 0x0; si_conf->oui[1] = 0x80; @@ -591,7 +586,8 @@ static int enetc_streamfilter_hw_set(struct enetc_ndev_priv *priv, } sfi_config->sg_inst_table_index = cpu_to_le16(sfi->gate_id); - sfi_config->input_ports = cpu_to_le32(1 << enetc_get_port(priv)); + sfi_config->input_ports = + cpu_to_le32(1 << enetc_pf_to_port(priv->si->pdev)); /* The priority value which may be matched against the * frame’s priority value to determine a match for this entry. @@ -1221,6 +1217,11 @@ static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv, /* Flow meter and max frame size */ if (entryp) { + if (entryp->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); + err = -EOPNOTSUPP; + goto free_sfi; + } if (entryp->police.burst) { fmi = kzalloc(sizeof(*fmi), GFP_KERNEL); if (!fmi) { @@ -1557,10 +1558,10 @@ int enetc_setup_tc_psfp(struct net_device *ndev, void *type_data) switch (f->command) { case FLOW_BLOCK_BIND: - set_bit(enetc_get_port(priv), &epsfp.dev_bitmap); + set_bit(enetc_pf_to_port(priv->si->pdev), &epsfp.dev_bitmap); break; case FLOW_BLOCK_UNBIND: - clear_bit(enetc_get_port(priv), &epsfp.dev_bitmap); + clear_bit(enetc_pf_to_port(priv->si->pdev), &epsfp.dev_bitmap); if (!epsfp.dev_bitmap) clean_psfp_all(); break; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 9b755a84c2d6..03090ba7e226 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -165,6 +165,11 @@ static int enetc_vf_probe(struct pci_dev *pdev, enetc_init_si_rings_params(priv); + err = enetc_setup_cbdr(priv->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE, + &si->cbd_ring); + if (err) + goto err_setup_cbdr; + err = enetc_alloc_si_resources(priv); if (err) { dev_err(&pdev->dev, "SI resource alloc failed\n"); @@ -197,6 +202,8 @@ err_config_si: err_alloc_msix: enetc_free_si_resources(priv); err_alloc_si_res: + enetc_teardown_cbdr(&si->cbd_ring); +err_setup_cbdr: si->ndev = NULL; free_netdev(ndev); err_alloc_netdev: @@ -216,6 +223,7 @@ static void enetc_vf_remove(struct pci_dev *pdev) enetc_free_msix(priv); enetc_free_si_resources(priv); + enetc_teardown_cbdr(&si->cbd_ring); free_netdev(si->ndev); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3db882322b2b..f2065f9d02e6 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -38,6 +38,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <net/ip.h> +#include <net/selftests.h> #include <net/tso.h> #include <linux/tcp.h> #include <linux/udp.h> @@ -1665,6 +1666,7 @@ static void fec_get_mac(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); unsigned char *iap, tmpaddr[ETH_ALEN]; + int ret; /* * try to get mac address in following order: @@ -1680,9 +1682,9 @@ static void fec_get_mac(struct net_device *ndev) if (!is_valid_ether_addr(iap)) { struct device_node *np = fep->pdev->dev.of_node; if (np) { - const char *mac = of_get_mac_address(np); - if (!IS_ERR(mac)) - iap = (unsigned char *) mac; + ret = of_get_mac_address(np, tmpaddr); + if (!ret) + iap = tmpaddr; } } @@ -2048,6 +2050,8 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->link = 0; fep->full_duplex = 0; + phy_dev->mac_managed_pm = 1; + phy_attached_info(phy_dev); return 0; @@ -2479,6 +2483,9 @@ static void fec_enet_get_strings(struct net_device *netdev, memcpy(data + i * ETH_GSTRING_LEN, fec_stats[i].name, ETH_GSTRING_LEN); break; + case ETH_SS_TEST: + net_selftest_get_strings(data); + break; } } @@ -2487,6 +2494,8 @@ static int fec_enet_get_sset_count(struct net_device *dev, int sset) switch (sset) { case ETH_SS_STATS: return ARRAY_SIZE(fec_stats); + case ETH_SS_TEST: + return net_selftest_get_count(); default: return -EOPNOTSUPP; } @@ -2738,6 +2747,7 @@ static const struct ethtool_ops fec_enet_ethtool_ops = { .set_wol = fec_enet_set_wol, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .self_test = net_selftest, }; static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) @@ -3864,6 +3874,7 @@ static int __maybe_unused fec_resume(struct device *dev) netif_device_attach(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); + phy_init_hw(ndev->phydev); phy_start(ndev->phydev); } rtnl_unlock(); diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index b3bad429e03b..02c47658a215 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -813,7 +813,6 @@ static int mpc52xx_fec_probe(struct platform_device *op) const u32 *prop; int prop_size; struct device_node *np = op->dev.of_node; - const char *mac_addr; phys_addr_t rx_fifo; phys_addr_t tx_fifo; @@ -891,10 +890,8 @@ static int mpc52xx_fec_probe(struct platform_device *op) * * First try to read MAC address from DT */ - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) { - ether_addr_copy(ndev->dev_addr, mac_addr); - } else { + rv = of_get_mac_address(np, ndev->dev_addr); + if (rv) { struct mpc52xx_fec __iomem *fec = priv->fec; /* diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 901749a7a318..46ecb42f2ef8 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -605,7 +605,6 @@ static int mac_probe(struct platform_device *_of_dev) struct platform_device *of_dev; struct resource res; struct mac_priv_s *priv; - const u8 *mac_addr; u32 val; u8 fman_id; phy_interface_t phy_if; @@ -723,11 +722,9 @@ static int mac_probe(struct platform_device *_of_dev) priv->cell_index = (u8)val; /* Get the MAC address */ - mac_addr = of_get_mac_address(mac_node); - if (IS_ERR(mac_addr)) + err = of_get_mac_address(mac_node, mac_dev->addr); + if (err) dev_warn(dev, "of_get_mac_address(%pOF) failed\n", mac_node); - else - ether_addr_copy(mac_dev->addr, mac_addr); /* Get the port handles */ nph = of_count_phandle_with_args(mac_node, "fsl,fman-ports", NULL); @@ -853,7 +850,7 @@ static int mac_probe(struct platform_device *_of_dev) if (err < 0) dev_err(dev, "fman_set_mac_active_pause() = %d\n", err); - if (!IS_ERR(mac_addr)) + if (!is_zero_ether_addr(mac_dev->addr)) dev_info(dev, "FMan MAC address: %pM\n", mac_dev->addr); priv->eth_dev = dpaa_eth_add_device(fman_id, mac_dev); diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 78e008b81374..6ee325ad35c5 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -918,7 +918,6 @@ static int fs_enet_probe(struct platform_device *ofdev) const u32 *data; struct clk *clk; int err; - const u8 *mac_addr; const char *phy_connection_type; int privsize, len, ret = -ENODEV; @@ -1006,9 +1005,7 @@ static int fs_enet_probe(struct platform_device *ofdev) spin_lock_init(&fep->lock); spin_lock_init(&fep->tx_lock); - mac_addr = of_get_mac_address(ofdev->dev.of_node); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); + of_get_mac_address(ofdev->dev.of_node, ndev->dev_addr); ret = fep->ops->allocate_bd(ndev); if (ret) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 1cf8ef717453..f2945abdb041 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -175,10 +175,7 @@ static void gfar_mac_rx_config(struct gfar_private *priv) if (priv->rx_filer_enable) { rctrl |= RCTRL_FILREN | RCTRL_PRSDEP_INIT; /* Program the RIR0 reg with the required distribution */ - if (priv->poll_mode == GFAR_SQ_POLLING) - gfar_write(®s->rir0, DEFAULT_2RXQ_RIR0); - else /* GFAR_MQ_POLLING */ - gfar_write(®s->rir0, DEFAULT_8RXQ_RIR0); + gfar_write(®s->rir0, DEFAULT_2RXQ_RIR0); } /* Restore PROMISC mode */ @@ -363,7 +360,11 @@ static void gfar_set_mac_for_addr(struct net_device *dev, int num, static int gfar_set_mac_addr(struct net_device *dev, void *p) { - eth_mac_addr(dev, p); + int ret; + + ret = eth_mac_addr(dev, p); + if (ret) + return ret; gfar_set_mac_for_addr(dev, 0, dev->dev_addr); @@ -517,29 +518,9 @@ static int gfar_parse_group(struct device_node *np, grp->priv = priv; spin_lock_init(&grp->grplock); if (priv->mode == MQ_MG_MODE) { - u32 rxq_mask, txq_mask; - int ret; - + /* One Q per interrupt group: Q0 to G0, Q1 to G1 */ grp->rx_bit_map = (DEFAULT_MAPPING >> priv->num_grps); grp->tx_bit_map = (DEFAULT_MAPPING >> priv->num_grps); - - ret = of_property_read_u32(np, "fsl,rx-bit-map", &rxq_mask); - if (!ret) { - grp->rx_bit_map = rxq_mask ? - rxq_mask : (DEFAULT_MAPPING >> priv->num_grps); - } - - ret = of_property_read_u32(np, "fsl,tx-bit-map", &txq_mask); - if (!ret) { - grp->tx_bit_map = txq_mask ? - txq_mask : (DEFAULT_MAPPING >> priv->num_grps); - } - - if (priv->poll_mode == GFAR_SQ_POLLING) { - /* One Q per interrupt group: Q0 to G0, Q1 to G1 */ - grp->rx_bit_map = (DEFAULT_MAPPING >> priv->num_grps); - grp->tx_bit_map = (DEFAULT_MAPPING >> priv->num_grps); - } } else { grp->rx_bit_map = 0xFF; grp->tx_bit_map = 0xFF; @@ -636,7 +617,6 @@ static phy_interface_t gfar_get_interface(struct net_device *dev) static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) { const char *model; - const void *mac_addr; int err = 0, i; phy_interface_t interface; struct net_device *dev = NULL; @@ -646,18 +626,15 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) u32 stash_len = 0; u32 stash_idx = 0; unsigned int num_tx_qs, num_rx_qs; - unsigned short mode, poll_mode; + unsigned short mode; if (!np) return -ENODEV; - if (of_device_is_compatible(np, "fsl,etsec2")) { + if (of_device_is_compatible(np, "fsl,etsec2")) mode = MQ_MG_MODE; - poll_mode = GFAR_SQ_POLLING; - } else { + else mode = SQ_SG_MODE; - poll_mode = GFAR_SQ_POLLING; - } if (mode == SQ_SG_MODE) { num_tx_qs = 1; @@ -673,22 +650,8 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) return -EINVAL; } - if (poll_mode == GFAR_SQ_POLLING) { - num_tx_qs = num_grps; /* one txq per int group */ - num_rx_qs = num_grps; /* one rxq per int group */ - } else { /* GFAR_MQ_POLLING */ - u32 tx_queues, rx_queues; - int ret; - - /* parse the num of HW tx and rx queues */ - ret = of_property_read_u32(np, "fsl,num_tx_queues", - &tx_queues); - num_tx_qs = ret ? 1 : tx_queues; - - ret = of_property_read_u32(np, "fsl,num_rx_queues", - &rx_queues); - num_rx_qs = ret ? 1 : rx_queues; - } + num_tx_qs = num_grps; /* one txq per int group */ + num_rx_qs = num_grps; /* one rxq per int group */ } if (num_tx_qs > MAX_TX_QS) { @@ -714,7 +677,6 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) priv->ndev = dev; priv->mode = mode; - priv->poll_mode = poll_mode; priv->num_tx_queues = num_tx_qs; netif_set_real_num_rx_queues(dev, num_rx_qs); @@ -778,11 +740,8 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) if (stash_len || stash_idx) priv->device_flags |= FSL_GIANFAR_DEV_HAS_BUF_STASHING; - mac_addr = of_get_mac_address(np); - - if (!IS_ERR(mac_addr)) { - ether_addr_copy(dev->dev_addr, mac_addr); - } else { + err = of_get_mac_address(np, dev->dev_addr); + if (err) { eth_hw_addr_random(dev); dev_info(&ofdev->dev, "Using random MAC address: %pM\n", dev->dev_addr); } @@ -2691,106 +2650,6 @@ static int gfar_poll_tx_sq(struct napi_struct *napi, int budget) return 0; } -static int gfar_poll_rx(struct napi_struct *napi, int budget) -{ - struct gfar_priv_grp *gfargrp = - container_of(napi, struct gfar_priv_grp, napi_rx); - struct gfar_private *priv = gfargrp->priv; - struct gfar __iomem *regs = gfargrp->regs; - struct gfar_priv_rx_q *rx_queue = NULL; - int work_done = 0, work_done_per_q = 0; - int i, budget_per_q = 0; - unsigned long rstat_rxf; - int num_act_queues; - - /* Clear IEVENT, so interrupts aren't called again - * because of the packets that have already arrived - */ - gfar_write(®s->ievent, IEVENT_RX_MASK); - - rstat_rxf = gfar_read(®s->rstat) & RSTAT_RXF_MASK; - - num_act_queues = bitmap_weight(&rstat_rxf, MAX_RX_QS); - if (num_act_queues) - budget_per_q = budget/num_act_queues; - - for_each_set_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) { - /* skip queue if not active */ - if (!(rstat_rxf & (RSTAT_CLEAR_RXF0 >> i))) - continue; - - rx_queue = priv->rx_queue[i]; - work_done_per_q = - gfar_clean_rx_ring(rx_queue, budget_per_q); - work_done += work_done_per_q; - - /* finished processing this queue */ - if (work_done_per_q < budget_per_q) { - /* clear active queue hw indication */ - gfar_write(®s->rstat, - RSTAT_CLEAR_RXF0 >> i); - num_act_queues--; - - if (!num_act_queues) - break; - } - } - - if (!num_act_queues) { - u32 imask; - napi_complete_done(napi, work_done); - - /* Clear the halt bit in RSTAT */ - gfar_write(®s->rstat, gfargrp->rstat); - - spin_lock_irq(&gfargrp->grplock); - imask = gfar_read(®s->imask); - imask |= IMASK_RX_DEFAULT; - gfar_write(®s->imask, imask); - spin_unlock_irq(&gfargrp->grplock); - } - - return work_done; -} - -static int gfar_poll_tx(struct napi_struct *napi, int budget) -{ - struct gfar_priv_grp *gfargrp = - container_of(napi, struct gfar_priv_grp, napi_tx); - struct gfar_private *priv = gfargrp->priv; - struct gfar __iomem *regs = gfargrp->regs; - struct gfar_priv_tx_q *tx_queue = NULL; - int has_tx_work = 0; - int i; - - /* Clear IEVENT, so interrupts aren't called again - * because of the packets that have already arrived - */ - gfar_write(®s->ievent, IEVENT_TX_MASK); - - for_each_set_bit(i, &gfargrp->tx_bit_map, priv->num_tx_queues) { - tx_queue = priv->tx_queue[i]; - /* run Tx cleanup to completion */ - if (tx_queue->tx_skbuff[tx_queue->skb_dirtytx]) { - gfar_clean_tx_ring(tx_queue); - has_tx_work = 1; - } - } - - if (!has_tx_work) { - u32 imask; - napi_complete(napi); - - spin_lock_irq(&gfargrp->grplock); - imask = gfar_read(®s->imask); - imask |= IMASK_TX_DEFAULT; - gfar_write(®s->imask, imask); - spin_unlock_irq(&gfargrp->grplock); - } - - return 0; -} - /* GFAR error interrupt handler */ static irqreturn_t gfar_error(int irq, void *grp_id) { @@ -3348,17 +3207,10 @@ static int gfar_probe(struct platform_device *ofdev) /* Register for napi ...We are registering NAPI for each grp */ for (i = 0; i < priv->num_grps; i++) { - if (priv->poll_mode == GFAR_SQ_POLLING) { - netif_napi_add(dev, &priv->gfargrp[i].napi_rx, - gfar_poll_rx_sq, GFAR_DEV_WEIGHT); - netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, - gfar_poll_tx_sq, 2); - } else { - netif_napi_add(dev, &priv->gfargrp[i].napi_rx, - gfar_poll_rx, GFAR_DEV_WEIGHT); - netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, - gfar_poll_tx, 2); - } + netif_napi_add(dev, &priv->gfargrp[i].napi_rx, + gfar_poll_rx_sq, GFAR_DEV_WEIGHT); + netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, + gfar_poll_tx_sq, 2); } if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) { diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index 8ced783f5302..5ea47df93e5e 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -909,22 +909,6 @@ enum { MQ_MG_MODE }; -/* GFAR_SQ_POLLING: Single Queue NAPI polling mode - * The driver supports a single pair of RX/Tx queues - * per interrupt group (Rx/Tx int line). MQ_MG mode - * devices have 2 interrupt groups, so the device will - * have a total of 2 Tx and 2 Rx queues in this case. - * GFAR_MQ_POLLING: Multi Queue NAPI polling mode - * The driver supports all the 8 Rx and Tx HW queues - * each queue mapped by the Device Tree to one of - * the 2 interrupt groups. This mode implies significant - * processing overhead (CPU and controller level). - */ -enum gfar_poll_mode { - GFAR_SQ_POLLING = 0, - GFAR_MQ_POLLING -}; - /* * Per TX queue stats */ @@ -1105,7 +1089,6 @@ struct gfar_private { unsigned long state; unsigned short mode; - unsigned short poll_mode; unsigned int num_tx_queues; unsigned int num_rx_queues; unsigned int num_grps; diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index ef4e2febeb5b..e0936510fa34 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3562,7 +3562,6 @@ static int ucc_geth_probe(struct platform_device* ofdev) struct resource res; int err, ucc_num, max_speed = 0; const unsigned int *prop; - const void *mac_addr; phy_interface_t phy_interface; static const int enet_to_speed[] = { SPEED_10, SPEED_10, SPEED_10, @@ -3733,9 +3732,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) goto err_free_netdev; } - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(dev->dev_addr, mac_addr); + of_get_mac_address(np, dev->dev_addr); ugeth->ug_info = ug_info; ugeth->dev = device; diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 0901fa6853ca..5fb05cf36b49 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -14,9 +14,9 @@ static void gve_get_drvinfo(struct net_device *netdev, { struct gve_priv *priv = netdev_priv(netdev); - strlcpy(info->driver, "gve", sizeof(info->driver)); - strlcpy(info->version, gve_version_str, sizeof(info->version)); - strlcpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info)); + strscpy(info->driver, "gve", sizeof(info->driver)); + strscpy(info->version, gve_version_str, sizeof(info->version)); + strscpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info)); } static void gve_set_msglevel(struct net_device *netdev, u32 value) @@ -388,7 +388,7 @@ static int gve_set_channels(struct net_device *netdev, gve_get_channels(netdev, &old_settings); - /* Changing combined is not allowed allowed */ + /* Changing combined is not allowed */ if (cmd->combined_count != old_settings.combined_count) return -EINVAL; diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index 57c3bc4f7089..3c4db4a6b431 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -772,7 +772,6 @@ static int hisi_femac_drv_probe(struct platform_device *pdev) struct net_device *ndev; struct hisi_femac_priv *priv; struct phy_device *phy; - const char *mac_addr; int ret; ndev = alloc_etherdev(sizeof(*priv)); @@ -842,10 +841,8 @@ static int hisi_femac_drv_probe(struct platform_device *pdev) (unsigned long)phy->phy_id, phy_modes(phy->interface)); - mac_addr = of_get_mac_address(node); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - if (!is_valid_ether_addr(ndev->dev_addr)) { + ret = of_get_mac_address(node, ndev->dev_addr); + if (ret) { eth_hw_addr_random(ndev); dev_warn(dev, "using random MAC address %pM\n", ndev->dev_addr); diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 8b2bf85039f1..c1aae0fca5e9 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -1098,7 +1098,6 @@ static int hix5hd2_dev_probe(struct platform_device *pdev) struct net_device *ndev; struct hix5hd2_priv *priv; struct mii_bus *bus; - const char *mac_addr; int ret; ndev = alloc_etherdev(sizeof(struct hix5hd2_priv)); @@ -1220,10 +1219,8 @@ static int hix5hd2_dev_probe(struct platform_device *pdev) goto out_phy_node; } - mac_addr = of_get_mac_address(node); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - if (!is_valid_ether_addr(ndev->dev_addr)) { + ret = of_get_mac_address(node, ndev->dev_addr); + if (ret) { eth_hw_addr_random(ndev); netdev_warn(ndev, "using random MAC address %pM\n", ndev->dev_addr); diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index 6ab9458302e1..2b7db1c22321 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -414,10 +414,6 @@ enum hnae_media_type { * get ring bd number limit * get_pauseparam() * get tx and rx of pause frame use - * set_autoneg() - * set auto autonegotiation of pause frame use - * get_autoneg() - * get auto autonegotiation of pause frame use * set_pauseparam() * set tx and rx of pause frame use * get_coalesce_usecs() @@ -487,8 +483,6 @@ struct hnae_ae_ops { u32 *uplimit); void (*get_pauseparam)(struct hnae_handle *handle, u32 *auto_neg, u32 *rx_en, u32 *tx_en); - int (*set_autoneg)(struct hnae_handle *handle, u8 enable); - int (*get_autoneg)(struct hnae_handle *handle); int (*set_pauseparam)(struct hnae_handle *handle, u32 auto_neg, u32 rx_en, u32 tx_en); void (*get_coalesce_usecs)(struct hnae_handle *handle, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index b98244f75ab9..c615fbf9094e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -487,13 +487,6 @@ static void hns_ae_get_pauseparam(struct hnae_handle *handle, hns_dsaf_get_rx_mac_pause_en(dsaf_dev, mac_cb->mac_id, rx_en); } -static int hns_ae_set_autoneg(struct hnae_handle *handle, u8 enable) -{ - assert(handle); - - return hns_mac_set_autoneg(hns_get_mac_cb(handle), enable); -} - static void hns_ae_set_promisc_mode(struct hnae_handle *handle, u32 en) { struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle); @@ -502,17 +495,6 @@ static void hns_ae_set_promisc_mode(struct hnae_handle *handle, u32 en) hns_mac_set_promisc(mac_cb, (u8)!!en); } -static int hns_ae_get_autoneg(struct hnae_handle *handle) -{ - u32 auto_neg; - - assert(handle); - - hns_mac_get_autoneg(hns_get_mac_cb(handle), &auto_neg); - - return auto_neg; -} - static int hns_ae_set_pauseparam(struct hnae_handle *handle, u32 autoneg, u32 rx_en, u32 tx_en) { @@ -648,7 +630,7 @@ static void hns_ae_update_stats(struct hnae_handle *handle, struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle); u64 tx_bytes = 0, rx_bytes = 0, tx_packets = 0, rx_packets = 0; u64 rx_errors = 0, tx_errors = 0, tx_dropped = 0; - u64 rx_missed_errors = 0; + u64 rx_missed_errors; dsaf_dev = hns_ae_get_dsaf_dev(handle->dev); if (!dsaf_dev) @@ -965,8 +947,6 @@ static struct hnae_ae_ops hns_dsaf_ops = { .set_loopback = hns_ae_config_loopback, .get_ring_bdnum_limit = hns_ae_get_ring_bdnum_limit, .get_pauseparam = hns_ae_get_pauseparam, - .set_autoneg = hns_ae_set_autoneg, - .get_autoneg = hns_ae_get_autoneg, .set_pauseparam = hns_ae_set_pauseparam, .get_coalesce_usecs = hns_ae_get_coalesce_usecs, .get_max_coalesced_frames = hns_ae_get_max_coalesced_frames, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index 7fb7a419607d..f387a859a201 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -130,14 +130,6 @@ static void hns_gmac_get_tx_auto_pause_frames(void *mac_drv, u16 *newval) GMAC_FC_TX_TIMER_M, GMAC_FC_TX_TIMER_S); } -static void hns_gmac_set_rx_auto_pause_frames(void *mac_drv, u32 newval) -{ - struct mac_driver *drv = (struct mac_driver *)mac_drv; - - dsaf_set_dev_bit(drv, GMAC_PAUSE_EN_REG, - GMAC_PAUSE_EN_RX_FDFC_B, !!newval); -} - static void hns_gmac_config_max_frame_length(void *mac_drv, u16 newval) { struct mac_driver *drv = (struct mac_driver *)mac_drv; @@ -179,14 +171,6 @@ static void hns_gmac_tx_loop_pkt_dis(void *mac_drv) dsaf_write_dev(drv, GMAC_TX_LOOP_PKT_PRI_REG, tx_loop_pkt_pri); } -static void hns_gmac_set_duplex_type(void *mac_drv, u8 newval) -{ - struct mac_driver *drv = (struct mac_driver *)mac_drv; - - dsaf_set_dev_bit(drv, GMAC_DUPLEX_TYPE_REG, - GMAC_DUPLEX_TYPE_B, !!newval); -} - static void hns_gmac_get_duplex_type(void *mac_drv, enum hns_gmac_duplex_mdoe *duplex_mode) { @@ -687,17 +671,14 @@ static void hns_gmac_get_stats(void *mac_drv, u64 *data) static void hns_gmac_get_strings(u32 stringset, u8 *data) { - char *buff = (char *)data; + u8 *buff = data; u32 i; if (stringset != ETH_SS_STATS) return; - for (i = 0; i < ARRAY_SIZE(g_gmac_stats_string); i++) { - snprintf(buff, ETH_GSTRING_LEN, "%s", - g_gmac_stats_string[i].desc); - buff = buff + ETH_GSTRING_LEN; - } + for (i = 0; i < ARRAY_SIZE(g_gmac_stats_string); i++) + ethtool_sprintf(&buff, g_gmac_stats_string[i].desc); } static int hns_gmac_get_sset_count(int stringset) @@ -741,8 +722,6 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param) mac_drv->set_an_mode = hns_gmac_config_an_mode; mac_drv->config_loopback = hns_gmac_config_loopback; mac_drv->config_pad_and_crc = hns_gmac_config_pad_and_crc; - mac_drv->config_half_duplex = hns_gmac_set_duplex_type; - mac_drv->set_rx_ignore_pause_frames = hns_gmac_set_rx_auto_pause_frames; mac_drv->get_info = hns_gmac_get_info; mac_drv->autoneg_stat = hns_gmac_autoneg_stat; mac_drv->get_pause_enable = hns_gmac_get_pausefrm_cfg; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 4a448138b4ec..f4cf569a2599 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -1202,7 +1202,7 @@ void hns_mac_get_regs(struct hns_mac_cb *mac_cb, void *data) void hns_set_led_opt(struct hns_mac_cb *mac_cb) { - int nic_data = 0; + int nic_data; int txpkts, rxpkts; txpkts = mac_cb->txpkt_for_led - mac_cb->hw_stats.tx_good_pkts; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index 3278bf471ddf..8943ffab4418 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -364,12 +364,8 @@ struct mac_driver { void (*config_max_frame_length)(void *mac_drv, u16 newval); /*config PAD and CRC enable */ void (*config_pad_and_crc)(void *mac_drv, u8 newval); - /* config duplex mode*/ - void (*config_half_duplex)(void *mac_drv, u8 newval); /*config tx pause time,if pause_time is zero,disable tx pause enable*/ void (*set_tx_auto_pause_frames)(void *mac_drv, u16 pause_time); - /*config rx pause enable*/ - void (*set_rx_ignore_pause_frames)(void *mac_drv, u32 enable); /* config rx mode for promiscuous*/ void (*set_promiscuous)(void *mac_drv, u8 enable); void (*mac_pausefrm_cfg)(void *mac_drv, u32 rx_en, u32 tx_en); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 87d3db4666df..c2a60612f503 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -1613,7 +1613,7 @@ int hns_dsaf_set_mac_uc_entry( struct dsaf_device *dsaf_dev, struct dsaf_drv_mac_single_dest_entry *mac_entry) { - u16 entry_index = DSAF_INVALID_ENTRY_IDX; + u16 entry_index; struct dsaf_drv_tbl_tcam_key mac_key; struct dsaf_tbl_tcam_ucast_cfg mac_data; struct dsaf_drv_priv *priv = @@ -1679,7 +1679,7 @@ int hns_dsaf_rm_mac_addr( struct dsaf_device *dsaf_dev, struct dsaf_drv_mac_single_dest_entry *mac_entry) { - u16 entry_index = DSAF_INVALID_ENTRY_IDX; + u16 entry_index; struct dsaf_tbl_tcam_ucast_cfg mac_data; struct dsaf_drv_tbl_tcam_key mac_key; @@ -1751,7 +1751,7 @@ static void hns_dsaf_mc_mask_bit_clear(char *dst, const char *src) int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, struct dsaf_drv_mac_single_dest_entry *mac_entry) { - u16 entry_index = DSAF_INVALID_ENTRY_IDX; + u16 entry_index; struct dsaf_drv_tbl_tcam_key mac_key; struct dsaf_drv_tbl_tcam_key mask_key; struct dsaf_tbl_tcam_data *pmask_key = NULL; @@ -1861,7 +1861,7 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev, int hns_dsaf_del_mac_entry(struct dsaf_device *dsaf_dev, u16 vlan_id, u8 in_port_num, u8 *addr) { - u16 entry_index = DSAF_INVALID_ENTRY_IDX; + u16 entry_index; struct dsaf_drv_tbl_tcam_key mac_key; struct dsaf_drv_priv *priv = (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev); @@ -1910,7 +1910,7 @@ int hns_dsaf_del_mac_entry(struct dsaf_device *dsaf_dev, u16 vlan_id, int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev, struct dsaf_drv_mac_single_dest_entry *mac_entry) { - u16 entry_index = DSAF_INVALID_ENTRY_IDX; + u16 entry_index; struct dsaf_drv_tbl_tcam_key mac_key; struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev); struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl; @@ -2264,7 +2264,7 @@ void hns_dsaf_update_stats(struct dsaf_device *dsaf_dev, u32 node_num) */ void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data) { - u32 i = 0; + u32 i; u32 j; u32 *p = data; u32 reg_tmp; @@ -2768,7 +2768,7 @@ static void set_promisc_tcam_enable(struct dsaf_device *dsaf_dev, u32 port) struct dsaf_drv_mac_single_dest_entry mask_entry; struct dsaf_drv_tbl_tcam_key temp_key, mask_key; struct dsaf_drv_soft_mac_tbl *soft_mac_entry; - u16 entry_index = DSAF_INVALID_ENTRY_IDX; + u16 entry_index; struct dsaf_drv_tbl_tcam_key mac_key; struct hns_mac_cb *mac_cb; u8 addr[ETH_ALEN] = {0}; @@ -2870,7 +2870,7 @@ static void set_promisc_tcam_disable(struct dsaf_device *dsaf_dev, u32 port) struct dsaf_tbl_tcam_data tbl_tcam_data_uc = {0, 0}; struct dsaf_tbl_tcam_data tbl_tcam_mask = {0, 0}; struct dsaf_drv_soft_mac_tbl *soft_mac_entry; - u16 entry_index = DSAF_INVALID_ENTRY_IDX; + u16 entry_index; struct dsaf_drv_tbl_tcam_key mac_key; u8 addr[ETH_ALEN] = {0}; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 173d6966c1a3..325e81d30cfd 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -686,7 +686,7 @@ hns_mac_config_sds_loopback_acpi(struct hns_mac_cb *mac_cb, bool en) obj_args[0].integer.type = ACPI_TYPE_INTEGER; obj_args[0].integer.value = mac_cb->mac_id; obj_args[1].integer.type = ACPI_TYPE_INTEGER; - obj_args[1].integer.value = !!en; + obj_args[1].integer.value = en; argv4.type = ACPI_TYPE_PACKAGE; argv4.package.count = 2; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c index d0f8b1fff333..ff03cafccb66 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c @@ -462,33 +462,22 @@ int hns_ppe_get_regs_count(void) */ void hns_ppe_get_strings(struct hns_ppe_cb *ppe_cb, int stringset, u8 *data) { - char *buff = (char *)data; int index = ppe_cb->index; - - snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_sw_pkt", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_pkt_ok", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_drop_pkt_no_bd", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_alloc_buf_fail", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_alloc_buf_wait", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_pkt_drop_no_buf", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "ppe%d_rx_pkt_err_fifo_full", index); - buff = buff + ETH_GSTRING_LEN; - - snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_bd", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_pkt", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_pkt_ok", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_pkt_err_fifo_empty", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "ppe%d_tx_pkt_err_csum_fail", index); + u8 *buff = data; + + ethtool_sprintf(&buff, "ppe%d_rx_sw_pkt", index); + ethtool_sprintf(&buff, "ppe%d_rx_pkt_ok", index); + ethtool_sprintf(&buff, "ppe%d_rx_drop_pkt_no_bd", index); + ethtool_sprintf(&buff, "ppe%d_rx_alloc_buf_fail", index); + ethtool_sprintf(&buff, "ppe%d_rx_alloc_buf_wait", index); + ethtool_sprintf(&buff, "ppe%d_rx_pkt_drop_no_buf", index); + ethtool_sprintf(&buff, "ppe%d_rx_pkt_err_fifo_full", index); + + ethtool_sprintf(&buff, "ppe%d_tx_bd", index); + ethtool_sprintf(&buff, "ppe%d_tx_pkt", index); + ethtool_sprintf(&buff, "ppe%d_tx_pkt_ok", index); + ethtool_sprintf(&buff, "ppe%d_tx_pkt_err_fifo_empty", index); + ethtool_sprintf(&buff, "ppe%d_tx_pkt_err_csum_fail", index); } void hns_ppe_get_stats(struct hns_ppe_cb *ppe_cb, u64 *data) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index b6c8910cf7ba..5d5dc6942232 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -929,69 +929,42 @@ int hns_rcb_get_ring_regs_count(void) */ void hns_rcb_get_strings(int stringset, u8 *data, int index) { - char *buff = (char *)data; + u8 *buff = data; if (stringset != ETH_SS_STATS) return; - snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_rcb_pkt_num", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_ppe_tx_pkt_num", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_ppe_drop_pkt_num", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_fbd_num", index); - buff = buff + ETH_GSTRING_LEN; - - snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_pkt_num", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_bytes", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_err_cnt", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_io_err", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_sw_err", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_seg_pkt", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_restart_queue", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_ring%d_tx_busy", index); - buff = buff + ETH_GSTRING_LEN; - - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_rcb_pkt_num", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_ppe_pkt_num", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_ppe_drop_pkt_num", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_fbd_num", index); - buff = buff + ETH_GSTRING_LEN; - - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_pkt_num", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_bytes", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_err_cnt", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_io_err", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_sw_err", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_seg_pkt", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_reuse_pg", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_len_err", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_non_vld_desc_err", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_bd_num_err", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_l2_err", index); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_ring%d_l3l4csum_err", index); + ethtool_sprintf(&buff, "tx_ring%d_rcb_pkt_num", index); + ethtool_sprintf(&buff, "tx_ring%d_ppe_tx_pkt_num", index); + ethtool_sprintf(&buff, "tx_ring%d_ppe_drop_pkt_num", index); + ethtool_sprintf(&buff, "tx_ring%d_fbd_num", index); + + ethtool_sprintf(&buff, "tx_ring%d_pkt_num", index); + ethtool_sprintf(&buff, "tx_ring%d_bytes", index); + ethtool_sprintf(&buff, "tx_ring%d_err_cnt", index); + ethtool_sprintf(&buff, "tx_ring%d_io_err", index); + ethtool_sprintf(&buff, "tx_ring%d_sw_err", index); + ethtool_sprintf(&buff, "tx_ring%d_seg_pkt", index); + ethtool_sprintf(&buff, "tx_ring%d_restart_queue", index); + ethtool_sprintf(&buff, "tx_ring%d_tx_busy", index); + + ethtool_sprintf(&buff, "rx_ring%d_rcb_pkt_num", index); + ethtool_sprintf(&buff, "rx_ring%d_ppe_pkt_num", index); + ethtool_sprintf(&buff, "rx_ring%d_ppe_drop_pkt_num", index); + ethtool_sprintf(&buff, "rx_ring%d_fbd_num", index); + + ethtool_sprintf(&buff, "rx_ring%d_pkt_num", index); + ethtool_sprintf(&buff, "rx_ring%d_bytes", index); + ethtool_sprintf(&buff, "rx_ring%d_err_cnt", index); + ethtool_sprintf(&buff, "rx_ring%d_io_err", index); + ethtool_sprintf(&buff, "rx_ring%d_sw_err", index); + ethtool_sprintf(&buff, "rx_ring%d_seg_pkt", index); + ethtool_sprintf(&buff, "rx_ring%d_reuse_pg", index); + ethtool_sprintf(&buff, "rx_ring%d_len_err", index); + ethtool_sprintf(&buff, "rx_ring%d_non_vld_desc_err", index); + ethtool_sprintf(&buff, "rx_ring%d_bd_num_err", index); + ethtool_sprintf(&buff, "rx_ring%d_l2_err", index); + ethtool_sprintf(&buff, "rx_ring%d_l3l4csum_err", index); } void hns_rcb_get_common_regs(struct rcb_common_cb *rcb_com, void *data) @@ -1001,7 +974,7 @@ void hns_rcb_get_common_regs(struct rcb_common_cb *rcb_com, void *data) bool is_dbg = HNS_DSAF_IS_DEBUG(rcb_com->dsaf_dev); u32 reg_tmp; u32 reg_num_tmp; - u32 i = 0; + u32 i; /*rcb common registers */ regs[0] = dsaf_read_dev(rcb_com, RCB_COM_CFG_ENDIAN_REG); @@ -1072,7 +1045,7 @@ void hns_rcb_get_ring_regs(struct hnae_queue *queue, void *data) u32 *regs = data; struct ring_pair_cb *ring_pair = container_of(queue, struct ring_pair_cb, q); - u32 i = 0; + u32 i; /*rcb ring registers */ regs[0] = dsaf_read_dev(queue, RCB_RING_RX_RING_BASEADDR_L_REG); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c index 7e3609ce112a..be52acd448f9 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c @@ -267,19 +267,6 @@ static void hns_xgmac_set_pausefrm_mac_addr(void *mac_drv, char *mac_addr) } /** - *hns_xgmac_set_rx_ignore_pause_frames - set rx pause param about xgmac - *@mac_drv: mac driver - *@enable:enable rx pause param - */ -static void hns_xgmac_set_rx_ignore_pause_frames(void *mac_drv, u32 enable) -{ - struct mac_driver *drv = (struct mac_driver *)mac_drv; - - dsaf_set_dev_bit(drv, XGMAC_MAC_PAUSE_CTRL_REG, - XGMAC_PAUSE_CTL_RX_B, !!enable); -} - -/** *hns_xgmac_set_tx_auto_pause_frames - set tx pause param about xgmac *@mac_drv: mac driver *@enable:enable tx pause param @@ -495,7 +482,7 @@ static void hns_xgmac_get_link_status(void *mac_drv, u32 *link_stat) */ static void hns_xgmac_get_regs(void *mac_drv, void *data) { - u32 i = 0; + u32 i; struct mac_driver *drv = (struct mac_driver *)mac_drv; u32 *regs = data; u64 qtmp; @@ -758,16 +745,14 @@ static void hns_xgmac_get_stats(void *mac_drv, u64 *data) */ static void hns_xgmac_get_strings(u32 stringset, u8 *data) { - char *buff = (char *)data; + u8 *buff = data; u32 i; if (stringset != ETH_SS_STATS) return; - for (i = 0; i < ARRAY_SIZE(g_xgmac_stats_string); i++) { - snprintf(buff, ETH_GSTRING_LEN, g_xgmac_stats_string[i].desc); - buff = buff + ETH_GSTRING_LEN; - } + for (i = 0; i < ARRAY_SIZE(g_xgmac_stats_string); i++) + ethtool_sprintf(&buff, g_xgmac_stats_string[i].desc); } /** @@ -814,9 +799,6 @@ void *hns_xgmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param) mac_drv->set_an_mode = NULL; mac_drv->config_loopback = NULL; mac_drv->config_pad_and_crc = hns_xgmac_config_pad_and_crc; - mac_drv->config_half_duplex = NULL; - mac_drv->set_rx_ignore_pause_frames = - hns_xgmac_set_rx_ignore_pause_frames; mac_drv->mac_free = hns_xgmac_free; mac_drv->adjust_link = NULL; mac_drv->set_tx_auto_pause_frames = hns_xgmac_set_tx_auto_pause_frames; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index c66a7a51198e..5e349c0bdecc 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -872,7 +872,7 @@ out: static bool hns_nic_rx_fini_pro(struct hns_nic_ring_data *ring_data) { struct hnae_ring *ring = ring_data->ring; - int num = 0; + int num; bool rx_stopped; hns_update_rx_rate(ring); @@ -1235,7 +1235,7 @@ static int hns_nic_init_affinity_mask(int q_num, int ring_idx, { int cpu; - /* Diffrent irq banlance between 16core and 32core. + /* Different irq balance between 16core and 32core. * The cpu mask set by ring index according to the ring flag * which indicate the ring is tx or rx. */ @@ -1592,7 +1592,7 @@ static void hns_disable_serdes_lb(struct net_device *ndev) * which buffer size is 4096. * 2. we set the chip serdes loopback and set rss indirection to the ring. * 3. construct 64-bytes ip broadcast packages, wait the associated rx ring - * recieving all packages and it will fetch new descriptions. + * receiving all packages and it will fetch new descriptions. * 4. recover to the original state. * *@ndev: net device @@ -1621,7 +1621,7 @@ static int hns_nic_clear_all_rx_fetch(struct net_device *ndev) if (!org_indir) return -ENOMEM; - /* store the orginal indirection */ + /* store the original indirection */ ops->get_rss(h, org_indir, NULL, NULL); cur_indir = kzalloc(indir_size, GFP_KERNEL); @@ -1881,7 +1881,7 @@ static void hns_nic_set_rx_mode(struct net_device *ndev) static void hns_nic_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *stats) { - int idx = 0; + int idx; u64 tx_bytes = 0; u64 rx_bytes = 0; u64 tx_pkts = 0; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index a6e3f07caf99..da48c05435ea 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -17,7 +17,6 @@ #define HNS_PHY_CSC_REG 16 /* Copper Specific Control Register */ #define HNS_PHY_CSS_REG 17 /* Copper Specific Status Register */ #define HNS_LED_FC_REG 16 /* LED Function Control Reg. */ -#define HNS_LED_PC_REG 17 /* LED Polarity Control Reg. */ #define HNS_LED_FORCE_ON 9 #define HNS_LED_FORCE_OFF 8 @@ -480,7 +479,7 @@ static int __lb_run_test(struct net_device *ndev, #define NIC_LB_TEST_NO_MEM_ERR 1 #define NIC_LB_TEST_TX_CNT_ERR 2 #define NIC_LB_TEST_RX_CNT_ERR 3 -#define NIC_LB_TEST_RX_PKG_ERR 4 + struct hns_nic_priv *priv = netdev_priv(ndev); struct hnae_handle *h = priv->ae_handle; int i, j, lc, good_cnt, ret_val = 0; @@ -895,7 +894,7 @@ static void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_handle *h = priv->ae_handle; - char *buff = (char *)data; + u8 *buff = data; if (!h->dev->ops->get_strings) { netdev_err(netdev, "h->dev->ops->get_strings is null!\n"); @@ -903,74 +902,45 @@ static void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } if (stringset == ETH_SS_TEST) { - if (priv->ae_handle->phy_if != PHY_INTERFACE_MODE_XGMII) { - memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_MAC], - ETH_GSTRING_LEN); - buff += ETH_GSTRING_LEN; - } - memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_SERDES], - ETH_GSTRING_LEN); - buff += ETH_GSTRING_LEN; + if (priv->ae_handle->phy_if != PHY_INTERFACE_MODE_XGMII) + ethtool_sprintf(&buff, + hns_nic_test_strs[MAC_INTERNALLOOP_MAC]); + ethtool_sprintf(&buff, + hns_nic_test_strs[MAC_INTERNALLOOP_SERDES]); if ((netdev->phydev) && (!netdev->phydev->is_c45)) - memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_PHY], - ETH_GSTRING_LEN); + ethtool_sprintf(&buff, + hns_nic_test_strs[MAC_INTERNALLOOP_PHY]); } else { - snprintf(buff, ETH_GSTRING_LEN, "rx_packets"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_packets"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_bytes"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_bytes"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_dropped"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_dropped"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "multicast"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "collisions"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_over_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_crc_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_frame_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_fifo_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_missed_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_aborted_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_carrier_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_fifo_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_heartbeat_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_length_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_window_errors"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "rx_compressed"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "tx_compressed"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "netdev_rx_dropped"); - buff = buff + ETH_GSTRING_LEN; - snprintf(buff, ETH_GSTRING_LEN, "netdev_tx_dropped"); - buff = buff + ETH_GSTRING_LEN; - - snprintf(buff, ETH_GSTRING_LEN, "netdev_tx_timeout"); - buff = buff + ETH_GSTRING_LEN; - - h->dev->ops->get_strings(h, stringset, (u8 *)buff); + ethtool_sprintf(&buff, "rx_packets"); + ethtool_sprintf(&buff, "tx_packets"); + ethtool_sprintf(&buff, "rx_bytes"); + ethtool_sprintf(&buff, "tx_bytes"); + ethtool_sprintf(&buff, "rx_errors"); + ethtool_sprintf(&buff, "tx_errors"); + ethtool_sprintf(&buff, "rx_dropped"); + ethtool_sprintf(&buff, "tx_dropped"); + ethtool_sprintf(&buff, "multicast"); + ethtool_sprintf(&buff, "collisions"); + ethtool_sprintf(&buff, "rx_over_errors"); + ethtool_sprintf(&buff, "rx_crc_errors"); + ethtool_sprintf(&buff, "rx_frame_errors"); + ethtool_sprintf(&buff, "rx_fifo_errors"); + ethtool_sprintf(&buff, "rx_missed_errors"); + ethtool_sprintf(&buff, "tx_aborted_errors"); + ethtool_sprintf(&buff, "tx_carrier_errors"); + ethtool_sprintf(&buff, "tx_fifo_errors"); + ethtool_sprintf(&buff, "tx_heartbeat_errors"); + ethtool_sprintf(&buff, "rx_length_errors"); + ethtool_sprintf(&buff, "tx_window_errors"); + ethtool_sprintf(&buff, "rx_compressed"); + ethtool_sprintf(&buff, "tx_compressed"); + ethtool_sprintf(&buff, "netdev_rx_dropped"); + ethtool_sprintf(&buff, "netdev_tx_dropped"); + + ethtool_sprintf(&buff, "netdev_tx_timeout"); + + h->dev->ops->get_strings(h, stringset, buff); } } diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 33defa4c180a..a2c17af57fde 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -172,4 +172,7 @@ struct hclgevf_mbx_arq_ring { (arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM) #define hclge_mbx_head_ptr_move_arq(arq) \ (arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM) + +/* PF immediately push link status to VFs when link status changed */ +#define HCLGE_MBX_PUSH_LINK_STATUS_EN BIT(0) #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index e9e60a935f40..1d2189047781 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -90,6 +90,7 @@ enum HNAE3_DEV_CAP_BITS { HNAE3_DEV_SUPPORT_HW_PAD_B, HNAE3_DEV_SUPPORT_STASH_B, HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, + HNAE3_DEV_SUPPORT_PAUSE_B, }; #define hnae3_dev_fd_supported(hdev) \ @@ -134,6 +135,9 @@ enum HNAE3_DEV_CAP_BITS { #define hnae3_dev_stash_supported(hdev) \ test_bit(HNAE3_DEV_SUPPORT_STASH_B, (hdev)->ae_dev->caps) +#define hnae3_dev_pause_supported(hdev) \ + test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, (hdev)->ae_dev->caps) + #define hnae3_ae_dev_tqp_txrx_indep_supported(ae_dev) \ test_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, (ae_dev)->caps) @@ -470,8 +474,9 @@ struct hnae3_ae_dev { struct hnae3_ae_ops { int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev); void (*uninit_ae_dev)(struct hnae3_ae_dev *ae_dev); - void (*flr_prepare)(struct hnae3_ae_dev *ae_dev); - void (*flr_done)(struct hnae3_ae_dev *ae_dev); + void (*reset_prepare)(struct hnae3_ae_dev *ae_dev, + enum hnae3_reset_type rst_type); + void (*reset_done)(struct hnae3_ae_dev *ae_dev); int (*init_client_instance)(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev); void (*uninit_client_instance)(struct hnae3_client *client, @@ -575,7 +580,7 @@ struct hnae3_ae_ops { int vector_num, struct hnae3_ring_chain_node *vr_chain); - int (*reset_queue)(struct hnae3_handle *handle, u16 queue_id); + int (*reset_queue)(struct hnae3_handle *handle); u32 (*get_fw_version)(struct hnae3_handle *handle); void (*get_mdix_mode)(struct hnae3_handle *handle, u8 *tp_mdix_ctrl, u8 *tp_mdix); @@ -608,8 +613,6 @@ struct hnae3_ae_ops { struct ethtool_rxnfc *cmd); int (*del_fd_entry)(struct hnae3_handle *handle, struct ethtool_rxnfc *cmd); - void (*del_all_fd_entries)(struct hnae3_handle *handle, - bool clear_list); int (*get_fd_rule_cnt)(struct hnae3_handle *handle, struct ethtool_rxnfc *cmd); int (*get_fd_rule_info)(struct hnae3_handle *handle, @@ -649,6 +652,10 @@ struct hnae3_ae_ops { int (*del_cls_flower)(struct hnae3_handle *handle, struct flow_cls_offload *cls_flower); bool (*cls_flower_active)(struct hnae3_handle *handle); + int (*get_phy_link_ksettings)(struct hnae3_handle *handle, + struct ethtool_link_ksettings *cmd); + int (*set_phy_link_ksettings)(struct hnae3_handle *handle, + const struct ethtool_link_ksettings *cmd); }; struct hnae3_dcb_ops { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index dd11c57027bb..9d702bd0c7c1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -362,6 +362,11 @@ static void hns3_dbg_dev_caps(struct hnae3_handle *h) dev_info(&h->pdev->dev, "support UDP tunnel csum: %s\n", test_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, caps) ? "yes" : "no"); + dev_info(&h->pdev->dev, "support PAUSE: %s\n", + test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps) ? + "yes" : "no"); + dev_info(&h->pdev->dev, "support imp-controlled PHY: %s\n", + test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, caps) ? "yes" : "no"); } static void hns3_dbg_dev_specs(struct hnae3_handle *h) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index bf4302a5cf95..c21dd11baed9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -210,7 +210,6 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector, * Rl defines rate of interrupts i.e. number of interrupts-per-second * GL and RL(Rate Limiter) are 2 ways to acheive interrupt coalescing */ - if (rl_reg > 0 && !tqp_vector->tx_group.coal.adapt_enable && !tqp_vector->rx_group.coal.adapt_enable) /* According to the hardware, the range of rl_reg is @@ -695,7 +694,7 @@ void hns3_enable_vlan_filter(struct net_device *netdev, bool enable) } static int hns3_set_tso(struct sk_buff *skb, u32 *paylen_fdop_ol4cs, - u16 *mss, u32 *type_cs_vlan_tso) + u16 *mss, u32 *type_cs_vlan_tso, u32 *send_bytes) { u32 l4_offset, hdr_len; union l3_hdr_info l3; @@ -751,6 +750,8 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen_fdop_ol4cs, (__force __wsum)htonl(l4_paylen)); } + *send_bytes = (skb_shinfo(skb)->gso_segs - 1) * hdr_len + skb->len; + /* find the txbd field values */ *paylen_fdop_ol4cs = skb->len - hdr_len; hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_TSO_B, 1); @@ -883,7 +884,6 @@ static void hns3_set_outer_l2l3l4(struct sk_buff *skb, u8 ol4_proto, hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_OL3T_S, HNS3_OL3T_IPV4_NO_CSUM); - } else if (skb->protocol == htons(ETH_P_IPV6)) { hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_OL3T_S, HNS3_OL3T_IPV6); @@ -1078,7 +1078,8 @@ static bool hns3_check_hw_tx_csum(struct sk_buff *skb) } static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, - struct sk_buff *skb, struct hns3_desc *desc) + struct sk_buff *skb, struct hns3_desc *desc, + struct hns3_desc_cb *desc_cb) { u32 ol_type_vlan_len_msec = 0; u32 paylen_ol4cs = skb->len; @@ -1107,6 +1108,8 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, 1); } + desc_cb->send_bytes = skb->len; + if (skb->ip_summed == CHECKSUM_PARTIAL) { u8 ol4_proto, il4_proto; @@ -1142,7 +1145,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, } ret = hns3_set_tso(skb, &paylen_ol4cs, &mss_hw_csum, - &type_cs_vlan_tso); + &type_cs_vlan_tso, &desc_cb->send_bytes); if (unlikely(ret < 0)) { u64_stats_update_begin(&ring->syncp); ring->stats.tx_tso_err++; @@ -1277,31 +1280,29 @@ static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size, } static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size, - u8 max_non_tso_bd_num) + u8 max_non_tso_bd_num, unsigned int bd_num, + unsigned int recursion_level) { +#define HNS3_MAX_RECURSION_LEVEL 24 + struct sk_buff *frag_skb; - unsigned int bd_num = 0; /* If the total len is within the max bd limit */ - if (likely(skb->len <= HNS3_MAX_BD_SIZE && !skb_has_frag_list(skb) && + if (likely(skb->len <= HNS3_MAX_BD_SIZE && !recursion_level && + !skb_has_frag_list(skb) && skb_shinfo(skb)->nr_frags < max_non_tso_bd_num)) return skb_shinfo(skb)->nr_frags + 1U; - /* The below case will always be linearized, return - * HNS3_MAX_BD_NUM_TSO + 1U to make sure it is linearized. - */ - if (unlikely(skb->len > HNS3_MAX_TSO_SIZE || - (!skb_is_gso(skb) && skb->len > - HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num)))) - return HNS3_MAX_TSO_BD_NUM + 1U; + if (unlikely(recursion_level >= HNS3_MAX_RECURSION_LEVEL)) + return UINT_MAX; bd_num = hns3_skb_bd_num(skb, bd_size, bd_num); - if (!skb_has_frag_list(skb) || bd_num > HNS3_MAX_TSO_BD_NUM) return bd_num; skb_walk_frags(skb, frag_skb) { - bd_num = hns3_skb_bd_num(frag_skb, bd_size, bd_num); + bd_num = hns3_tx_bd_num(frag_skb, bd_size, max_non_tso_bd_num, + bd_num, recursion_level + 1); if (bd_num > HNS3_MAX_TSO_BD_NUM) return bd_num; } @@ -1361,6 +1362,43 @@ void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size) size[i] = skb_frag_size(&shinfo->frags[i]); } +static int hns3_skb_linearize(struct hns3_enet_ring *ring, + struct sk_buff *skb, + u8 max_non_tso_bd_num, + unsigned int bd_num) +{ + /* 'bd_num == UINT_MAX' means the skb' fraglist has a + * recursion level of over HNS3_MAX_RECURSION_LEVEL. + */ + if (bd_num == UINT_MAX) { + u64_stats_update_begin(&ring->syncp); + ring->stats.over_max_recursion++; + u64_stats_update_end(&ring->syncp); + return -ENOMEM; + } + + /* The skb->len has exceeded the hw limitation, linearization + * will not help. + */ + if (skb->len > HNS3_MAX_TSO_SIZE || + (!skb_is_gso(skb) && skb->len > + HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num))) { + u64_stats_update_begin(&ring->syncp); + ring->stats.hw_limitation++; + u64_stats_update_end(&ring->syncp); + return -ENOMEM; + } + + if (__skb_linearize(skb)) { + u64_stats_update_begin(&ring->syncp); + ring->stats.sw_err_cnt++; + u64_stats_update_end(&ring->syncp); + return -ENOMEM; + } + + return 0; +} + static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, struct net_device *netdev, struct sk_buff *skb) @@ -1370,7 +1408,7 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, unsigned int bd_size[HNS3_MAX_TSO_BD_NUM + 1U]; unsigned int bd_num; - bd_num = hns3_tx_bd_num(skb, bd_size, max_non_tso_bd_num); + bd_num = hns3_tx_bd_num(skb, bd_size, max_non_tso_bd_num, 0, 0); if (unlikely(bd_num > max_non_tso_bd_num)) { if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) && !hns3_skb_need_linearized(skb, bd_size, bd_num, @@ -1379,16 +1417,11 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, goto out; } - if (__skb_linearize(skb)) + if (hns3_skb_linearize(ring, skb, max_non_tso_bd_num, + bd_num)) return -ENOMEM; bd_num = hns3_tx_bd_count(skb->len); - if ((skb_is_gso(skb) && bd_num > HNS3_MAX_TSO_BD_NUM) || - (!skb_is_gso(skb) && - bd_num > max_non_tso_bd_num)) { - trace_hns3_over_max_bd(skb); - return -ENOMEM; - } u64_stats_update_begin(&ring->syncp); ring->stats.tx_copy++; @@ -1412,6 +1445,10 @@ out: return bd_num; } + u64_stats_update_begin(&ring->syncp); + ring->stats.tx_busy++; + u64_stats_update_end(&ring->syncp); + return -EBUSY; } @@ -1459,6 +1496,7 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring, struct sk_buff *skb, enum hns_desc_type type) { unsigned int size = skb_headlen(skb); + struct sk_buff *frag_skb; int i, ret, bd_num = 0; if (size) { @@ -1483,6 +1521,15 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring, bd_num += ret; } + skb_walk_frags(skb, frag_skb) { + ret = hns3_fill_skb_to_desc(ring, frag_skb, + DESC_TYPE_FRAGLIST_SKB); + if (unlikely(ret < 0)) + return ret; + + bd_num += ret; + } + return bd_num; } @@ -1511,16 +1558,20 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hns3_enet_ring *ring = &priv->ring[skb->queue_mapping]; + struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; struct netdev_queue *dev_queue; int pre_ntu, next_to_use_head; - struct sk_buff *frag_skb; - int bd_num = 0; bool doorbell; int ret; /* Hardware can only handle short frames above 32 bytes */ if (skb_put_padto(skb, HNS3_MIN_TX_LEN)) { hns3_tx_doorbell(ring, 0, !netdev_xmit_more()); + + u64_stats_update_begin(&ring->syncp); + ring->stats.sw_err_cnt++; + u64_stats_update_end(&ring->syncp); + return NETDEV_TX_OK; } @@ -1530,15 +1581,8 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) ret = hns3_nic_maybe_stop_tx(ring, netdev, skb); if (unlikely(ret <= 0)) { if (ret == -EBUSY) { - u64_stats_update_begin(&ring->syncp); - ring->stats.tx_busy++; - u64_stats_update_end(&ring->syncp); hns3_tx_doorbell(ring, 0, true); return NETDEV_TX_BUSY; - } else if (ret == -ENOMEM) { - u64_stats_update_begin(&ring->syncp); - ring->stats.sw_err_cnt++; - u64_stats_update_end(&ring->syncp); } hns3_rl_err(netdev, "xmit error: %d!\n", ret); @@ -1547,25 +1591,19 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) next_to_use_head = ring->next_to_use; - ret = hns3_fill_skb_desc(ring, skb, &ring->desc[ring->next_to_use]); + ret = hns3_fill_skb_desc(ring, skb, &ring->desc[ring->next_to_use], + desc_cb); if (unlikely(ret < 0)) goto fill_err; + /* 'ret < 0' means filling error, 'ret == 0' means skb->len is + * zero, which is unlikely, and 'ret > 0' means how many tx desc + * need to be notified to the hw. + */ ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB); - if (unlikely(ret < 0)) + if (unlikely(ret <= 0)) goto fill_err; - bd_num += ret; - - skb_walk_frags(skb, frag_skb) { - ret = hns3_fill_skb_to_desc(ring, frag_skb, - DESC_TYPE_FRAGLIST_SKB); - if (unlikely(ret < 0)) - goto fill_err; - - bd_num += ret; - } - pre_ntu = ring->next_to_use ? (ring->next_to_use - 1) : (ring->desc_num - 1); ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |= @@ -1574,9 +1612,9 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) /* Complete translate all packets */ dev_queue = netdev_get_tx_queue(netdev, ring->queue_index); - doorbell = __netdev_tx_sent_queue(dev_queue, skb->len, + doorbell = __netdev_tx_sent_queue(dev_queue, desc_cb->send_bytes, netdev_xmit_more()); - hns3_tx_doorbell(ring, bd_num, doorbell); + hns3_tx_doorbell(ring, ret, doorbell); return NETDEV_TX_OK; @@ -1748,11 +1786,15 @@ static void hns3_nic_get_stats64(struct net_device *netdev, tx_drop += ring->stats.tx_l4_proto_err; tx_drop += ring->stats.tx_l2l3l4_err; tx_drop += ring->stats.tx_tso_err; + tx_drop += ring->stats.over_max_recursion; + tx_drop += ring->stats.hw_limitation; tx_errors += ring->stats.sw_err_cnt; tx_errors += ring->stats.tx_vlan_err; tx_errors += ring->stats.tx_l4_proto_err; tx_errors += ring->stats.tx_l2l3l4_err; tx_errors += ring->stats.tx_tso_err; + tx_errors += ring->stats.over_max_recursion; + tx_errors += ring->stats.hw_limitation; } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); /* fetch the rx stats */ @@ -2323,6 +2365,32 @@ static void hns3_shutdown(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D3hot); } +static int __maybe_unused hns3_suspend(struct device *dev) +{ + struct hnae3_ae_dev *ae_dev = dev_get_drvdata(dev); + + if (ae_dev && hns3_is_phys_func(ae_dev->pdev)) { + dev_info(dev, "Begin to suspend.\n"); + if (ae_dev->ops && ae_dev->ops->reset_prepare) + ae_dev->ops->reset_prepare(ae_dev, HNAE3_FUNC_RESET); + } + + return 0; +} + +static int __maybe_unused hns3_resume(struct device *dev) +{ + struct hnae3_ae_dev *ae_dev = dev_get_drvdata(dev); + + if (ae_dev && hns3_is_phys_func(ae_dev->pdev)) { + dev_info(dev, "Begin to resume.\n"); + if (ae_dev->ops && ae_dev->ops->reset_done) + ae_dev->ops->reset_done(ae_dev); + } + + return 0; +} + static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { @@ -2381,8 +2449,8 @@ static void hns3_reset_prepare(struct pci_dev *pdev) struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); dev_info(&pdev->dev, "FLR prepare\n"); - if (ae_dev && ae_dev->ops && ae_dev->ops->flr_prepare) - ae_dev->ops->flr_prepare(ae_dev); + if (ae_dev && ae_dev->ops && ae_dev->ops->reset_prepare) + ae_dev->ops->reset_prepare(ae_dev, HNAE3_FLR_RESET); } static void hns3_reset_done(struct pci_dev *pdev) @@ -2390,8 +2458,8 @@ static void hns3_reset_done(struct pci_dev *pdev) struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); dev_info(&pdev->dev, "FLR done\n"); - if (ae_dev && ae_dev->ops && ae_dev->ops->flr_done) - ae_dev->ops->flr_done(ae_dev); + if (ae_dev && ae_dev->ops && ae_dev->ops->reset_done) + ae_dev->ops->reset_done(ae_dev); } static const struct pci_error_handlers hns3_err_handler = { @@ -2401,12 +2469,15 @@ static const struct pci_error_handlers hns3_err_handler = { .reset_done = hns3_reset_done, }; +static SIMPLE_DEV_PM_OPS(hns3_pm_ops, hns3_suspend, hns3_resume); + static struct pci_driver hns3_driver = { .name = hns3_driver_name, .id_table = hns3_pci_tbl, .probe = hns3_probe, .remove = hns3_remove, .shutdown = hns3_shutdown, + .driver.pm = &hns3_pm_ops, .sriov_configure = hns3_pci_sriov_configure, .err_handler = &hns3_err_handler, }; @@ -2691,8 +2762,12 @@ static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring, break; desc_cb = &ring->desc_cb[ntc]; - (*pkts) += (desc_cb->type == DESC_TYPE_SKB); - (*bytes) += desc_cb->length; + + if (desc_cb->type == DESC_TYPE_SKB) { + (*pkts)++; + (*bytes) += desc_cb->send_bytes; + } + /* desc_cb will be cleaned, after hnae3_free_buffer_detach */ hns3_free_buffer_detach(ring, ntc, budget); @@ -2965,7 +3040,6 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, HNS3_RXD_L3ID_S); l4_type = hnae3_get_field(l234info, HNS3_RXD_L4ID_M, HNS3_RXD_L4ID_S); - /* Can checksum ipv4 or ipv6 + UDP/TCP/SCTP packets */ if ((l3_type == HNS3_L3_TYPE_IPV4 || l3_type == HNS3_L3_TYPE_IPV6) && @@ -3295,7 +3369,6 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring) if (!skb) { bd_base_info = le32_to_cpu(desc->rx.bd_base_info); - /* Check valid BD */ if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B)))) return -ENXIO; @@ -3557,7 +3630,6 @@ static int hns3_nic_common_poll(struct napi_struct *napi, int budget) hns3_for_each_ring(ring, tqp_vector->rx_group) { int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget, hns3_rx_skb); - if (rx_cleaned >= rx_budget) clean_complete = false; @@ -3704,7 +3776,6 @@ static void hns3_nic_set_cpumask(struct hns3_nic_priv *priv) static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv) { - struct hnae3_ring_chain_node vector_ring_chain; struct hnae3_handle *h = priv->ae_handle; struct hns3_enet_tqp_vector *tqp_vector; int ret; @@ -3736,6 +3807,8 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv) } for (i = 0; i < priv->vector_num; i++) { + struct hnae3_ring_chain_node vector_ring_chain; + tqp_vector = &priv->tqp_vector[i]; tqp_vector->rx_group.total_bytes = 0; @@ -4024,7 +4097,6 @@ static void hns3_init_ring_hw(struct hns3_enet_ring *ring) hns3_buf_size2type(ring->buf_size)); hns3_write_dev(q, HNS3_RING_RX_RING_BD_NUM_REG, ring->desc_num / 8 - 1); - } else { hns3_write_dev(q, HNS3_RING_TX_RING_BASEADDR_L_REG, (u32)dma); @@ -4143,14 +4215,6 @@ static void hns3_uninit_phy(struct net_device *netdev) h->ae_algo->ops->mac_disconnect_phy(h); } -static void hns3_del_all_fd_rules(struct net_device *netdev, bool clear_list) -{ - struct hnae3_handle *h = hns3_get_handle(netdev); - - if (h->ae_algo->ops->del_all_fd_entries) - h->ae_algo->ops->del_all_fd_entries(h, clear_list); -} - static int hns3_client_start(struct hnae3_handle *handle) { if (!handle->ae_algo->ops->client_start) @@ -4337,8 +4401,6 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset) hns3_nic_uninit_irq(priv); - hns3_del_all_fd_rules(netdev, true); - hns3_clear_all_ring(handle, true); hns3_nic_uninit_vector_data(priv); @@ -4472,11 +4534,11 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h) int i, j; int ret; - for (i = 0; i < h->kinfo.num_tqps; i++) { - ret = h->ae_algo->ops->reset_queue(h, i); - if (ret) - return ret; + ret = h->ae_algo->ops->reset_queue(h); + if (ret) + return ret; + for (i = 0; i < h->kinfo.num_tqps; i++) { hns3_init_ring_hw(&priv->ring[i]); /* We need to clear tx ring here because self test will diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index d069b04ee587..daa04aeb0942 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -298,7 +298,12 @@ struct hns3_desc_cb { /* priv data for the desc, e.g. skb when use with ip stack */ void *priv; - u32 page_offset; + + union { + u32 page_offset; /* for rx */ + u32 send_bytes; /* for tx */ + }; + u32 length; /* length of the buffer */ u16 reuse_flag; @@ -376,6 +381,8 @@ struct ring_stats { u64 tx_l4_proto_err; u64 tx_l2l3l4_err; u64 tx_tso_err; + u64 over_max_recursion; + u64 hw_limitation; }; struct { u64 rx_pkts; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index adcec4ea7cb9..b48faf769b1c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -44,6 +44,8 @@ static const struct hns3_stats hns3_txq_stats[] = { HNS3_TQP_STAT("l4_proto_err", tx_l4_proto_err), HNS3_TQP_STAT("l2l3l4_err", tx_l2l3l4_err), HNS3_TQP_STAT("tso_err", tx_tso_err), + HNS3_TQP_STAT("over_max_recursion", over_max_recursion), + HNS3_TQP_STAT("hw_limitation", hw_limitation), }; #define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats) @@ -307,7 +309,7 @@ out: } /** - * hns3_nic_self_test - self test + * hns3_self_test - self test * @ndev: net device * @eth_test: test cmd * @data: test result @@ -642,6 +644,10 @@ static void hns3_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *param) { struct hnae3_handle *h = hns3_get_handle(netdev); + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev); + + if (!test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps)) + return; if (h->ae_algo->ops->get_pauseparam) h->ae_algo->ops->get_pauseparam(h, ¶m->autoneg, @@ -652,6 +658,10 @@ static int hns3_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *param) { struct hnae3_handle *h = hns3_get_handle(netdev); + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev); + + if (!test_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps)) + return -EOPNOTSUPP; netif_dbg(h, drv, netdev, "set pauseparam: autoneg=%u, rx:%u, tx:%u\n", @@ -692,6 +702,7 @@ static int hns3_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { struct hnae3_handle *h = hns3_get_handle(netdev); + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev); const struct hnae3_ae_ops *ops; u8 module_type; u8 media_type; @@ -722,7 +733,10 @@ static int hns3_get_link_ksettings(struct net_device *netdev, break; case HNAE3_MEDIA_TYPE_COPPER: cmd->base.port = PORT_TP; - if (!netdev->phydev) + if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && + ops->get_phy_link_ksettings) + ops->get_phy_link_ksettings(h, cmd); + else if (!netdev->phydev) hns3_get_ksettings(h, cmd); else phy_ethtool_ksettings_get(netdev->phydev, cmd); @@ -815,6 +829,9 @@ static int hns3_set_link_ksettings(struct net_device *netdev, return -EINVAL; return phy_ethtool_ksettings_set(netdev->phydev, cmd); + } else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && + ops->set_phy_link_ksettings) { + return ops->set_phy_link_ksettings(handle, cmd); } if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 1bd0ddfaec4d..76a482456f1f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -353,7 +353,10 @@ static void hclge_set_default_capability(struct hclge_dev *hdev) set_bit(HNAE3_DEV_SUPPORT_FD_B, ae_dev->caps); set_bit(HNAE3_DEV_SUPPORT_GRO_B, ae_dev->caps); - set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps); + if (hdev->ae_dev->dev_version == HNAE3_DEVICE_VERSION_V2) { + set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps); + set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps); + } } static void hclge_parse_capability(struct hclge_dev *hdev, @@ -363,7 +366,6 @@ static void hclge_parse_capability(struct hclge_dev *hdev, u32 caps; caps = __le32_to_cpu(cmd->caps[0]); - if (hnae3_get_bit(caps, HCLGE_CAP_UDP_GSO_B)) set_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps); if (hnae3_get_bit(caps, HCLGE_CAP_PTP_B)) @@ -378,6 +380,12 @@ static void hclge_parse_capability(struct hclge_dev *hdev, set_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps); if (hnae3_get_bit(caps, HCLGE_CAP_FD_FORWARD_TC_B)) set_bit(HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B, ae_dev->caps); + if (hnae3_get_bit(caps, HCLGE_CAP_FEC_B)) + set_bit(HNAE3_DEV_SUPPORT_FEC_B, ae_dev->caps); + if (hnae3_get_bit(caps, HCLGE_CAP_PAUSE_B)) + set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps); + if (hnae3_get_bit(caps, HCLGE_CAP_PHY_IMP_B)) + set_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps); } static __le32 hclge_build_api_caps(void) @@ -467,6 +475,8 @@ static int hclge_firmware_compat_config(struct hclge_dev *hdev) hnae3_set_bit(compat, HCLGE_LINK_EVENT_REPORT_EN_B, 1); hnae3_set_bit(compat, HCLGE_NCSI_ERROR_REPORT_EN_B, 1); + if (hnae3_dev_phy_imp_supported(hdev)) + hnae3_set_bit(compat, HCLGE_PHY_IMP_EN_B, 1); req->compat = cpu_to_le32(compat); return hclge_cmd_send(&hdev->hw, &desc, 1); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 057dda735492..c6fc22e29581 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -127,7 +127,7 @@ enum hclge_opcode_type { HCLGE_OPC_QUERY_MAC_TNL_INT = 0x0310, HCLGE_OPC_MAC_TNL_INT_EN = 0x0311, HCLGE_OPC_CLEAR_MAC_TNL_INT = 0x0312, - HCLGE_OPC_SERDES_LOOPBACK = 0x0315, + HCLGE_OPC_COMMON_LOOPBACK = 0x0315, HCLGE_OPC_CONFIG_FEC_MODE = 0x031A, /* PFC/Pause commands */ @@ -243,6 +243,7 @@ enum hclge_opcode_type { HCLGE_OPC_FD_KEY_CONFIG = 0x1202, HCLGE_OPC_FD_TCAM_OP = 0x1203, HCLGE_OPC_FD_AD_OP = 0x1204, + HCLGE_OPC_FD_USER_DEF_OP = 0x1207, /* MDIO command */ HCLGE_OPC_MDIO_CONFIG = 0x1900, @@ -303,6 +304,10 @@ enum hclge_opcode_type { HCLGE_PPP_CMD1_INT_CMD = 0x2101, HCLGE_MAC_ETHERTYPE_IDX_RD = 0x2105, HCLGE_NCSI_INT_EN = 0x2401, + + /* PHY command */ + HCLGE_OPC_PHY_LINK_KSETTING = 0x7025, + HCLGE_OPC_PHY_REG = 0x7026, }; #define HCLGE_TQP_REG_OFFSET 0x80000 @@ -384,6 +389,8 @@ enum HCLGE_CAP_BITS { HCLGE_CAP_HW_PAD_B, HCLGE_CAP_STASH_B, HCLGE_CAP_UDP_TUNNEL_CSUM_B, + HCLGE_CAP_FEC_B = 13, + HCLGE_CAP_PAUSE_B = 14, }; enum HCLGE_API_CAP_BITS { @@ -499,8 +506,6 @@ struct hclge_pf_res_cmd { #define HCLGE_CFG_RD_LEN_BYTES 16 #define HCLGE_CFG_RD_LEN_UNIT 4 -#define HCLGE_CFG_VMDQ_S 0 -#define HCLGE_CFG_VMDQ_M GENMASK(7, 0) #define HCLGE_CFG_TC_NUM_S 8 #define HCLGE_CFG_TC_NUM_M GENMASK(15, 8) #define HCLGE_CFG_TQP_DESC_N_S 16 @@ -943,10 +948,16 @@ struct hclge_reset_tqp_queue_cmd { #define HCLGE_CFG_RESET_MAC_B 3 #define HCLGE_CFG_RESET_FUNC_B 7 +#define HCLGE_CFG_RESET_RCB_B 1 struct hclge_reset_cmd { u8 mac_func_reset; u8 fun_reset_vfid; - u8 rsv[22]; + u8 fun_reset_rcb; + u8 rsv; + __le16 fun_reset_rcb_vqid_start; + __le16 fun_reset_rcb_vqid_num; + u8 fun_reset_rcb_return_status; + u8 rsv1[15]; }; #define HCLGE_PF_RESET_DONE_BIT BIT(0) @@ -958,9 +969,10 @@ struct hclge_pf_rst_done_cmd { #define HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B BIT(0) #define HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B BIT(2) -#define HCLGE_CMD_SERDES_DONE_B BIT(0) -#define HCLGE_CMD_SERDES_SUCCESS_B BIT(1) -struct hclge_serdes_lb_cmd { +#define HCLGE_CMD_GE_PHY_INNER_LOOP_B BIT(3) +#define HCLGE_CMD_COMMON_LB_DONE_B BIT(0) +#define HCLGE_CMD_COMMON_LB_SUCCESS_B BIT(1) +struct hclge_common_lb_cmd { u8 mask; u8 enable; u8 result; @@ -1075,6 +1087,19 @@ struct hclge_fd_ad_config_cmd { u8 rsv2[8]; }; +#define HCLGE_FD_USER_DEF_OFT_S 0 +#define HCLGE_FD_USER_DEF_OFT_M GENMASK(14, 0) +#define HCLGE_FD_USER_DEF_EN_B 15 +struct hclge_fd_user_def_cfg_cmd { + __le16 ol2_cfg; + __le16 l2_cfg; + __le16 ol3_cfg; + __le16 l3_cfg; + __le16 ol4_cfg; + __le16 l4_cfg; + u8 rsv[12]; +}; + struct hclge_get_m7_bd_cmd { __le32 bd_num; u8 rsv[20]; @@ -1096,6 +1121,7 @@ struct hclge_query_ppu_pf_other_int_dfx_cmd { #define HCLGE_LINK_EVENT_REPORT_EN_B 0 #define HCLGE_NCSI_ERROR_REPORT_EN_B 1 +#define HCLGE_PHY_IMP_EN_B 2 struct hclge_firmware_compat_cmd { __le32 compat; u8 rsv[20]; @@ -1137,6 +1163,36 @@ struct hclge_dev_specs_1_cmd { u8 rsv1[18]; }; +#define HCLGE_PHY_LINK_SETTING_BD_NUM 2 + +struct hclge_phy_link_ksetting_0_cmd { + __le32 speed; + u8 duplex; + u8 autoneg; + u8 eth_tp_mdix; + u8 eth_tp_mdix_ctrl; + u8 port; + u8 transceiver; + u8 phy_address; + u8 rsv; + __le32 supported; + __le32 advertising; + __le32 lp_advertising; +}; + +struct hclge_phy_link_ksetting_1_cmd { + u8 master_slave_cfg; + u8 master_slave_state; + u8 rsv[22]; +}; + +struct hclge_phy_reg_cmd { + __le16 reg_addr; + u8 rsv0[2]; + __le16 reg_val; + u8 rsv1[18]; +}; + int hclge_cmd_init(struct hclge_dev *hdev); static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 6b1d197df881..85d306459e36 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -1541,18 +1541,17 @@ static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev, } } -static void hclge_dbg_dump_loopback(struct hclge_dev *hdev, - const char *cmd_buf) +static void hclge_dbg_dump_loopback(struct hclge_dev *hdev) { struct phy_device *phydev = hdev->hw.mac.phydev; struct hclge_config_mac_mode_cmd *req_app; - struct hclge_serdes_lb_cmd *req_serdes; + struct hclge_common_lb_cmd *req_common; struct hclge_desc desc; u8 loopback_en; int ret; req_app = (struct hclge_config_mac_mode_cmd *)desc.data; - req_serdes = (struct hclge_serdes_lb_cmd *)desc.data; + req_common = (struct hclge_common_lb_cmd *)desc.data; dev_info(&hdev->pdev->dev, "mac id: %u\n", hdev->hw.mac.mac_id); @@ -1569,27 +1568,33 @@ static void hclge_dbg_dump_loopback(struct hclge_dev *hdev, dev_info(&hdev->pdev->dev, "app loopback: %s\n", loopback_en ? "on" : "off"); - hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, true); + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, true); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, - "failed to dump serdes loopback status, ret = %d\n", + "failed to dump common loopback status, ret = %d\n", ret); return; } - loopback_en = req_serdes->enable & HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B; + loopback_en = req_common->enable & HCLGE_CMD_SERDES_SERIAL_INNER_LOOP_B; dev_info(&hdev->pdev->dev, "serdes serial loopback: %s\n", loopback_en ? "on" : "off"); - loopback_en = req_serdes->enable & + loopback_en = req_common->enable & HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B; dev_info(&hdev->pdev->dev, "serdes parallel loopback: %s\n", loopback_en ? "on" : "off"); - if (phydev) + if (phydev) { dev_info(&hdev->pdev->dev, "phy loopback: %s\n", phydev->loopback_enabled ? "on" : "off"); + } else if (hnae3_dev_phy_imp_supported(hdev)) { + loopback_en = req_common->enable & + HCLGE_CMD_GE_PHY_INNER_LOOP_B; + dev_info(&hdev->pdev->dev, "phy loopback: %s\n", + loopback_en ? "on" : "off"); + } } /* hclge_dbg_dump_mac_tnl_status: print message about mac tnl interrupt @@ -1772,7 +1777,7 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf) hclge_dbg_dump_mac_tnl_status(hdev); } else if (strncmp(cmd_buf, DUMP_LOOPBACK, strlen(DUMP_LOOPBACK)) == 0) { - hclge_dbg_dump_loopback(hdev, &cmd_buf[sizeof(DUMP_LOOPBACK)]); + hclge_dbg_dump_loopback(hdev); } else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) { hclge_dbg_dump_qs_shaper(hdev, &cmd_buf[sizeof("dump qs shaper")]); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 0ca7f1b984bf..d25291916b31 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -865,13 +865,7 @@ static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en) } /* configure TM QCN hw errors */ - ret = hclge_cmd_query_error(hdev, &desc, HCLGE_TM_QCN_MEM_INT_CFG, 0); - if (ret) { - dev_err(dev, "fail(%d) to read TM QCN CFG status\n", ret); - return ret; - } - - hclge_cmd_reuse_desc(&desc, false); + hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_QCN_MEM_INT_CFG, false); if (en) desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN); @@ -1497,7 +1491,6 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) } status = le32_to_cpu(desc[0].data[0]); - if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) { if (status & HCLGE_ROCEE_RERR_INT_MASK) dev_err(dev, "ROCEE RAS AXI rresp error\n"); @@ -1647,7 +1640,6 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) } status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); - if (status & HCLGE_RAS_REG_NFE_MASK || status & HCLGE_RAS_REG_ROCEE_ERR_MASK) ae_dev->hw_err_reset_req = 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e3f81c7e0ce7..c296ab64fb0a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -62,7 +62,7 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev); static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev); static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle); static void hclge_rfs_filter_expire(struct hclge_dev *hdev); -static void hclge_clear_arfs_rules(struct hnae3_handle *handle); +static int hclge_clear_arfs_rules(struct hclge_dev *hdev); static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev, unsigned long *addr); static int hclge_set_default_loopback(struct hclge_dev *hdev); @@ -70,6 +70,7 @@ static int hclge_set_default_loopback(struct hclge_dev *hdev); static void hclge_sync_mac_table(struct hclge_dev *hdev); static void hclge_restore_hw_table(struct hclge_dev *hdev); static void hclge_sync_promisc_mode(struct hclge_dev *hdev); +static void hclge_sync_fd_table(struct hclge_dev *hdev); static struct hnae3_ae_algo ae_algo; @@ -384,36 +385,62 @@ static const struct key_info meta_data_key_info[] = { }; static const struct key_info tuple_key_info[] = { - { OUTER_DST_MAC, 48}, - { OUTER_SRC_MAC, 48}, - { OUTER_VLAN_TAG_FST, 16}, - { OUTER_VLAN_TAG_SEC, 16}, - { OUTER_ETH_TYPE, 16}, - { OUTER_L2_RSV, 16}, - { OUTER_IP_TOS, 8}, - { OUTER_IP_PROTO, 8}, - { OUTER_SRC_IP, 32}, - { OUTER_DST_IP, 32}, - { OUTER_L3_RSV, 16}, - { OUTER_SRC_PORT, 16}, - { OUTER_DST_PORT, 16}, - { OUTER_L4_RSV, 32}, - { OUTER_TUN_VNI, 24}, - { OUTER_TUN_FLOW_ID, 8}, - { INNER_DST_MAC, 48}, - { INNER_SRC_MAC, 48}, - { INNER_VLAN_TAG_FST, 16}, - { INNER_VLAN_TAG_SEC, 16}, - { INNER_ETH_TYPE, 16}, - { INNER_L2_RSV, 16}, - { INNER_IP_TOS, 8}, - { INNER_IP_PROTO, 8}, - { INNER_SRC_IP, 32}, - { INNER_DST_IP, 32}, - { INNER_L3_RSV, 16}, - { INNER_SRC_PORT, 16}, - { INNER_DST_PORT, 16}, - { INNER_L4_RSV, 32}, + { OUTER_DST_MAC, 48, KEY_OPT_MAC, -1, -1 }, + { OUTER_SRC_MAC, 48, KEY_OPT_MAC, -1, -1 }, + { OUTER_VLAN_TAG_FST, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_ETH_TYPE, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_L2_RSV, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_IP_TOS, 8, KEY_OPT_U8, -1, -1 }, + { OUTER_IP_PROTO, 8, KEY_OPT_U8, -1, -1 }, + { OUTER_SRC_IP, 32, KEY_OPT_IP, -1, -1 }, + { OUTER_DST_IP, 32, KEY_OPT_IP, -1, -1 }, + { OUTER_L3_RSV, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_SRC_PORT, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_DST_PORT, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_L4_RSV, 32, KEY_OPT_LE32, -1, -1 }, + { OUTER_TUN_VNI, 24, KEY_OPT_VNI, -1, -1 }, + { OUTER_TUN_FLOW_ID, 8, KEY_OPT_U8, -1, -1 }, + { INNER_DST_MAC, 48, KEY_OPT_MAC, + offsetof(struct hclge_fd_rule, tuples.dst_mac), + offsetof(struct hclge_fd_rule, tuples_mask.dst_mac) }, + { INNER_SRC_MAC, 48, KEY_OPT_MAC, + offsetof(struct hclge_fd_rule, tuples.src_mac), + offsetof(struct hclge_fd_rule, tuples_mask.src_mac) }, + { INNER_VLAN_TAG_FST, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.vlan_tag1), + offsetof(struct hclge_fd_rule, tuples_mask.vlan_tag1) }, + { INNER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 }, + { INNER_ETH_TYPE, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.ether_proto), + offsetof(struct hclge_fd_rule, tuples_mask.ether_proto) }, + { INNER_L2_RSV, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.l2_user_def), + offsetof(struct hclge_fd_rule, tuples_mask.l2_user_def) }, + { INNER_IP_TOS, 8, KEY_OPT_U8, + offsetof(struct hclge_fd_rule, tuples.ip_tos), + offsetof(struct hclge_fd_rule, tuples_mask.ip_tos) }, + { INNER_IP_PROTO, 8, KEY_OPT_U8, + offsetof(struct hclge_fd_rule, tuples.ip_proto), + offsetof(struct hclge_fd_rule, tuples_mask.ip_proto) }, + { INNER_SRC_IP, 32, KEY_OPT_IP, + offsetof(struct hclge_fd_rule, tuples.src_ip), + offsetof(struct hclge_fd_rule, tuples_mask.src_ip) }, + { INNER_DST_IP, 32, KEY_OPT_IP, + offsetof(struct hclge_fd_rule, tuples.dst_ip), + offsetof(struct hclge_fd_rule, tuples_mask.dst_ip) }, + { INNER_L3_RSV, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.l3_user_def), + offsetof(struct hclge_fd_rule, tuples_mask.l3_user_def) }, + { INNER_SRC_PORT, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.src_port), + offsetof(struct hclge_fd_rule, tuples_mask.src_port) }, + { INNER_DST_PORT, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.dst_port), + offsetof(struct hclge_fd_rule, tuples_mask.dst_port) }, + { INNER_L4_RSV, 32, KEY_OPT_LE32, + offsetof(struct hclge_fd_rule, tuples.l4_user_def), + offsetof(struct hclge_fd_rule, tuples_mask.l4_user_def) }, }; static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) @@ -526,7 +553,6 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev) int ret; ret = hclge_mac_query_reg_num(hdev, &desc_num); - /* The firmware supports the new statistics acquisition method */ if (!ret) ret = hclge_mac_update_stats_complete(hdev, desc_num); @@ -751,12 +777,12 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset) handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK; handle->flags |= HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK; - if (hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv && - hdev->hw.mac.phydev->drv->set_loopback) { + if ((hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv && + hdev->hw.mac.phydev->drv->set_loopback) || + hnae3_dev_phy_imp_supported(hdev)) { count += 1; handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK; } - } else if (stringset == ETH_SS_STATS) { count = ARRAY_SIZE(g_mac_stats_string) + hclge_tqps_get_sset_count(handle, stringset); @@ -1150,8 +1176,10 @@ static void hclge_parse_fiber_link_mode(struct hclge_dev *hdev, if (hnae3_dev_fec_supported(hdev)) hclge_convert_setting_fec(mac); + if (hnae3_dev_pause_supported(hdev)) + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mac->supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported); linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported); } @@ -1163,8 +1191,11 @@ static void hclge_parse_backplane_link_mode(struct hclge_dev *hdev, hclge_convert_setting_kr(mac, speed_ability); if (hnae3_dev_fec_supported(hdev)) hclge_convert_setting_fec(mac); + + if (hnae3_dev_pause_supported(hdev)) + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, mac->supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported); linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported); } @@ -1193,10 +1224,13 @@ static void hclge_parse_copper_link_mode(struct hclge_dev *hdev, linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, supported); } + if (hnae3_dev_pause_supported(hdev)) { + linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported); + } + linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported); linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); - linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported); } static void hclge_parse_link_mode(struct hclge_dev *hdev, u16 speed_ability) @@ -1256,9 +1290,6 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) req = (struct hclge_cfg_param_cmd *)desc[0].data; /* get the configuration */ - cfg->vmdq_vport_num = hnae3_get_field(__le32_to_cpu(req->param[0]), - HCLGE_CFG_VMDQ_M, - HCLGE_CFG_VMDQ_S); cfg->tc_num = hnae3_get_field(__le32_to_cpu(req->param[0]), HCLGE_CFG_TC_NUM_M, HCLGE_CFG_TC_NUM_S); cfg->tqp_desc_num = hnae3_get_field(__le32_to_cpu(req->param[0]), @@ -1475,7 +1506,7 @@ static void hclge_init_kdump_kernel_config(struct hclge_dev *hdev) "Running kdump kernel. Using minimal resources\n"); /* minimal queue pairs equals to the number of vports */ - hdev->num_tqps = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + hdev->num_tqps = hdev->num_req_vfs + 1; hdev->num_tx_desc = HCLGE_MIN_TX_DESC; hdev->num_rx_desc = HCLGE_MIN_RX_DESC; } @@ -1490,7 +1521,6 @@ static int hclge_configure(struct hclge_dev *hdev) if (ret) return ret; - hdev->num_vmdq_vport = cfg.vmdq_vport_num; hdev->base_tqp_pid = 0; hdev->vf_rss_size_max = cfg.vf_rss_size_max; hdev->pf_rss_size_max = cfg.pf_rss_size_max; @@ -1741,7 +1771,7 @@ static int hclge_map_tqp(struct hclge_dev *hdev) struct hclge_vport *vport = hdev->vport; u16 i, num_vport; - num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + num_vport = hdev->num_req_vfs + 1; for (i = 0; i < num_vport; i++) { int ret; @@ -1783,7 +1813,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) int ret; /* We need to alloc a vport for main NIC of PF */ - num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + num_vport = hdev->num_req_vfs + 1; if (hdev->num_tqps < num_vport) { dev_err(&hdev->pdev->dev, "tqps(%u) is less than vports(%d)", @@ -2159,7 +2189,6 @@ static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev, COMPENSATE_HALF_MPS_NUM * half_mps; min_rx_priv = round_up(min_rx_priv, HCLGE_BUF_SIZE_UNIT); rx_priv = round_down(rx_priv, HCLGE_BUF_SIZE_UNIT); - if (rx_priv < min_rx_priv) return false; @@ -2188,7 +2217,7 @@ static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev, /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs * @hdev: pointer to struct hclge_dev * @buf_alloc: pointer to buffer calculation data - * @return: 0: calculate sucessful, negative: fail + * @return: 0: calculate successful, negative: fail */ static int hclge_rx_buffer_calc(struct hclge_dev *hdev, struct hclge_pkt_buf_alloc *buf_alloc) @@ -2851,15 +2880,36 @@ static int hclge_get_mac_phy_link(struct hclge_dev *hdev, int *link_status) return hclge_get_mac_link_status(hdev, link_status); } +static void hclge_push_link_status(struct hclge_dev *hdev) +{ + struct hclge_vport *vport; + int ret; + u16 i; + + for (i = 0; i < pci_num_vf(hdev->pdev); i++) { + vport = &hdev->vport[i + HCLGE_VF_VPORT_START_NUM]; + + if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state) || + vport->vf_info.link_state != IFLA_VF_LINK_STATE_AUTO) + continue; + + ret = hclge_push_vf_link_status(vport); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to push link status to vf%u, ret = %d\n", + i, ret); + } + } +} + static void hclge_update_link_status(struct hclge_dev *hdev) { + struct hnae3_handle *rhandle = &hdev->vport[0].roce; + struct hnae3_handle *handle = &hdev->vport[0].nic; struct hnae3_client *rclient = hdev->roce_client; struct hnae3_client *client = hdev->nic_client; - struct hnae3_handle *rhandle; - struct hnae3_handle *handle; int state; int ret; - int i; if (!client) return; @@ -2874,25 +2924,24 @@ static void hclge_update_link_status(struct hclge_dev *hdev) } if (state != hdev->hw.mac.link) { - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - handle = &hdev->vport[i].nic; - client->ops->link_status_change(handle, state); - hclge_config_mac_tnl_int(hdev, state); - rhandle = &hdev->vport[i].roce; - if (rclient && rclient->ops->link_status_change) - rclient->ops->link_status_change(rhandle, - state); - } + client->ops->link_status_change(handle, state); + hclge_config_mac_tnl_int(hdev, state); + if (rclient && rclient->ops->link_status_change) + rclient->ops->link_status_change(rhandle, state); + hdev->hw.mac.link = state; + hclge_push_link_status(hdev); } clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); } -static void hclge_update_port_capability(struct hclge_mac *mac) +static void hclge_update_port_capability(struct hclge_dev *hdev, + struct hclge_mac *mac) { - /* update fec ability by speed */ - hclge_convert_setting_fec(mac); + if (hnae3_dev_fec_supported(hdev)) + /* update fec ability by speed */ + hclge_convert_setting_fec(mac); /* firmware can not identify back plane type, the media type * read from configuration can help deal it @@ -2984,6 +3033,141 @@ static int hclge_get_sfp_info(struct hclge_dev *hdev, struct hclge_mac *mac) return 0; } +static int hclge_get_phy_link_ksettings(struct hnae3_handle *handle, + struct ethtool_link_ksettings *cmd) +{ + struct hclge_desc desc[HCLGE_PHY_LINK_SETTING_BD_NUM]; + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_phy_link_ksetting_0_cmd *req0; + struct hclge_phy_link_ksetting_1_cmd *req1; + u32 supported, advertising, lp_advertising; + struct hclge_dev *hdev = vport->back; + int ret; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_PHY_LINK_KSETTING, + true); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_PHY_LINK_KSETTING, + true); + + ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PHY_LINK_SETTING_BD_NUM); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get phy link ksetting, ret = %d.\n", ret); + return ret; + } + + req0 = (struct hclge_phy_link_ksetting_0_cmd *)desc[0].data; + cmd->base.autoneg = req0->autoneg; + cmd->base.speed = le32_to_cpu(req0->speed); + cmd->base.duplex = req0->duplex; + cmd->base.port = req0->port; + cmd->base.transceiver = req0->transceiver; + cmd->base.phy_address = req0->phy_address; + cmd->base.eth_tp_mdix = req0->eth_tp_mdix; + cmd->base.eth_tp_mdix_ctrl = req0->eth_tp_mdix_ctrl; + supported = le32_to_cpu(req0->supported); + advertising = le32_to_cpu(req0->advertising); + lp_advertising = le32_to_cpu(req0->lp_advertising); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising, + lp_advertising); + + req1 = (struct hclge_phy_link_ksetting_1_cmd *)desc[1].data; + cmd->base.master_slave_cfg = req1->master_slave_cfg; + cmd->base.master_slave_state = req1->master_slave_state; + + return 0; +} + +static int +hclge_set_phy_link_ksettings(struct hnae3_handle *handle, + const struct ethtool_link_ksettings *cmd) +{ + struct hclge_desc desc[HCLGE_PHY_LINK_SETTING_BD_NUM]; + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_phy_link_ksetting_0_cmd *req0; + struct hclge_phy_link_ksetting_1_cmd *req1; + struct hclge_dev *hdev = vport->back; + u32 advertising; + int ret; + + if (cmd->base.autoneg == AUTONEG_DISABLE && + ((cmd->base.speed != SPEED_100 && cmd->base.speed != SPEED_10) || + (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL))) + return -EINVAL; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_PHY_LINK_KSETTING, + false); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_PHY_LINK_KSETTING, + false); + + req0 = (struct hclge_phy_link_ksetting_0_cmd *)desc[0].data; + req0->autoneg = cmd->base.autoneg; + req0->speed = cpu_to_le32(cmd->base.speed); + req0->duplex = cmd->base.duplex; + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + req0->advertising = cpu_to_le32(advertising); + req0->eth_tp_mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; + + req1 = (struct hclge_phy_link_ksetting_1_cmd *)desc[1].data; + req1->master_slave_cfg = cmd->base.master_slave_cfg; + + ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PHY_LINK_SETTING_BD_NUM); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to set phy link ksettings, ret = %d.\n", ret); + return ret; + } + + hdev->hw.mac.autoneg = cmd->base.autoneg; + hdev->hw.mac.speed = cmd->base.speed; + hdev->hw.mac.duplex = cmd->base.duplex; + linkmode_copy(hdev->hw.mac.advertising, cmd->link_modes.advertising); + + return 0; +} + +static int hclge_update_tp_port_info(struct hclge_dev *hdev) +{ + struct ethtool_link_ksettings cmd; + int ret; + + if (!hnae3_dev_phy_imp_supported(hdev)) + return 0; + + ret = hclge_get_phy_link_ksettings(&hdev->vport->nic, &cmd); + if (ret) + return ret; + + hdev->hw.mac.autoneg = cmd.base.autoneg; + hdev->hw.mac.speed = cmd.base.speed; + hdev->hw.mac.duplex = cmd.base.duplex; + + return 0; +} + +static int hclge_tp_port_init(struct hclge_dev *hdev) +{ + struct ethtool_link_ksettings cmd; + + if (!hnae3_dev_phy_imp_supported(hdev)) + return 0; + + cmd.base.autoneg = hdev->hw.mac.autoneg; + cmd.base.speed = hdev->hw.mac.speed; + cmd.base.duplex = hdev->hw.mac.duplex; + linkmode_copy(cmd.link_modes.advertising, hdev->hw.mac.advertising); + + return hclge_set_phy_link_ksettings(&hdev->vport->nic, &cmd); +} + static int hclge_update_port_info(struct hclge_dev *hdev) { struct hclge_mac *mac = &hdev->hw.mac; @@ -2992,7 +3176,7 @@ static int hclge_update_port_info(struct hclge_dev *hdev) /* get the port info from SFP cmd if not copper port */ if (mac->media_type == HNAE3_MEDIA_TYPE_COPPER) - return 0; + return hclge_update_tp_port_info(hdev); /* if IMP does not support get SFP/qSFP info, return directly */ if (!hdev->support_sfp_query) @@ -3012,7 +3196,7 @@ static int hclge_update_port_info(struct hclge_dev *hdev) if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) { if (mac->speed_type == QUERY_ACTIVE_SPEED) { - hclge_update_port_capability(mac); + hclge_update_port_capability(hdev, mac); return 0; } return hclge_cfg_mac_speed_dup(hdev, mac->speed, @@ -3085,14 +3269,24 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf, { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + int link_state_old; + int ret; vport = hclge_get_vf_vport(hdev, vf); if (!vport) return -EINVAL; + link_state_old = vport->vf_info.link_state; vport->vf_info.link_state = link_state; - return 0; + ret = hclge_push_vf_link_status(vport); + if (ret) { + vport->vf_info.link_state = link_state_old; + dev_err(&hdev->pdev->dev, + "failed to push vf%d link status, ret = %d\n", vf, ret); + } + + return ret; } static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) @@ -3197,7 +3391,7 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data) * caused this event. Therefore, we will do below for now: * 1. Assert HNAE3_UNKNOWN_RESET type of reset. This means we * have defered type of reset to be used. - * 2. Schedule the reset serivce task. + * 2. Schedule the reset service task. * 3. When service task receives HNAE3_UNKNOWN_RESET type it * will fetch the correct type of reset. This would be done * by first decoding the types of errors. @@ -3325,8 +3519,9 @@ static void hclge_misc_irq_uninit(struct hclge_dev *hdev) int hclge_notify_client(struct hclge_dev *hdev, enum hnae3_reset_notify_type type) { + struct hnae3_handle *handle = &hdev->vport[0].nic; struct hnae3_client *client = hdev->nic_client; - u16 i; + int ret; if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state) || !client) return 0; @@ -3334,27 +3529,20 @@ int hclge_notify_client(struct hclge_dev *hdev, if (!client->ops->reset_notify) return -EOPNOTSUPP; - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - struct hnae3_handle *handle = &hdev->vport[i].nic; - int ret; - - ret = client->ops->reset_notify(handle, type); - if (ret) { - dev_err(&hdev->pdev->dev, - "notify nic client failed %d(%d)\n", type, ret); - return ret; - } - } + ret = client->ops->reset_notify(handle, type); + if (ret) + dev_err(&hdev->pdev->dev, "notify nic client failed %d(%d)\n", + type, ret); - return 0; + return ret; } static int hclge_notify_roce_client(struct hclge_dev *hdev, enum hnae3_reset_notify_type type) { + struct hnae3_handle *handle = &hdev->vport[0].roce; struct hnae3_client *client = hdev->roce_client; int ret; - u16 i; if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) || !client) return 0; @@ -3362,17 +3550,10 @@ static int hclge_notify_roce_client(struct hclge_dev *hdev, if (!client->ops->reset_notify) return -EOPNOTSUPP; - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - struct hnae3_handle *handle = &hdev->vport[i].roce; - - ret = client->ops->reset_notify(handle, type); - if (ret) { - dev_err(&hdev->pdev->dev, - "notify roce client failed %d(%d)", - type, ret); - return ret; - } - } + ret = client->ops->reset_notify(handle, type); + if (ret) + dev_err(&hdev->pdev->dev, "notify roce client failed %d(%d)", + type, ret); return ret; } @@ -3440,7 +3621,7 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) { int i; - for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) { + for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++) { struct hclge_vport *vport = &hdev->vport[i]; int ret; @@ -3521,14 +3702,12 @@ void hclge_report_hw_error(struct hclge_dev *hdev, enum hnae3_hw_error_type type) { struct hnae3_client *client = hdev->nic_client; - u16 i; if (!client || !client->ops->process_hw_error || !test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state)) return; - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) - client->ops->process_hw_error(&hdev->vport[i].nic, type); + client->ops->process_hw_error(&hdev->vport[0].nic, type); } static void hclge_handle_imp_error(struct hclge_dev *hdev) @@ -3794,6 +3973,21 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev) return false; } +static void hclge_update_reset_level(struct hclge_dev *hdev) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + enum hnae3_reset_type reset_level; + + /* if default_reset_request has a higher level reset request, + * it should be handled as soon as possible. since some errors + * need this kind of reset to fix. + */ + reset_level = hclge_get_reset_level(ae_dev, + &hdev->default_reset_request); + if (reset_level != HNAE3_NONE_RESET) + set_bit(reset_level, &hdev->reset_request); +} + static int hclge_set_rst_done(struct hclge_dev *hdev) { struct hclge_pf_rst_done_cmd *req; @@ -3881,8 +4075,6 @@ static int hclge_reset_prepare(struct hclge_dev *hdev) static int hclge_reset_rebuild(struct hclge_dev *hdev) { - struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); - enum hnae3_reset_type reset_level; int ret; hdev->rst_stats.hw_reset_done_cnt++; @@ -3926,14 +4118,7 @@ static int hclge_reset_rebuild(struct hclge_dev *hdev) hdev->rst_stats.reset_done_cnt++; clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state); - /* if default_reset_request has a higher level reset request, - * it should be handled as soon as possible. since some errors - * need this kind of reset to fix. - */ - reset_level = hclge_get_reset_level(ae_dev, - &hdev->default_reset_request); - if (reset_level != HNAE3_NONE_RESET) - set_bit(reset_level, &hdev->reset_request); + hclge_update_reset_level(hdev); return 0; } @@ -3966,7 +4151,6 @@ static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle) * normalcy is to reset. * 2. A new reset request from the stack due to timeout * - * For the first case,error event might not have ae handle available. * check if this is a new reset request and we are not here just because * last reset attempt did not succeed and watchdog hit us again. We will * know this if last reset request did not occur very recently (watchdog @@ -3976,14 +4160,14 @@ static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle) * want to make sure we throttle the reset request. Therefore, we will * not allow it again before 3*HZ times. */ - if (!handle) - handle = &hdev->vport[0].nic; if (time_before(jiffies, (hdev->last_reset_time + HCLGE_RESET_INTERVAL))) { mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL); return; - } else if (hdev->default_reset_request) { + } + + if (hdev->default_reset_request) { hdev->reset_level = hclge_get_reset_level(ae_dev, &hdev->default_reset_request); @@ -4095,6 +4279,7 @@ static void hclge_periodic_service_task(struct hclge_dev *hdev) hclge_update_link_status(hdev); hclge_sync_mac_table(hdev); hclge_sync_promisc_mode(hdev); + hclge_sync_fd_table(hdev); if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { delta = jiffies - hdev->last_serv_processed; @@ -4739,58 +4924,44 @@ int hclge_rss_init_hw(struct hclge_dev *hdev) void hclge_rss_indir_init_cfg(struct hclge_dev *hdev) { - struct hclge_vport *vport = hdev->vport; - int i, j; + struct hclge_vport *vport = &hdev->vport[0]; + int i; - for (j = 0; j < hdev->num_vmdq_vport + 1; j++) { - for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++) - vport[j].rss_indirection_tbl[i] = - i % vport[j].alloc_rss_size; - } + for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++) + vport->rss_indirection_tbl[i] = i % vport->alloc_rss_size; } static int hclge_rss_init_cfg(struct hclge_dev *hdev) { u16 rss_ind_tbl_size = hdev->ae_dev->dev_specs.rss_ind_tbl_size; - int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; - struct hclge_vport *vport = hdev->vport; + int rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; + struct hclge_vport *vport = &hdev->vport[0]; + u16 *rss_ind_tbl; if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE; - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - u16 *rss_ind_tbl; - - vport[i].rss_tuple_sets.ipv4_tcp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv4_udp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv4_sctp_en = - HCLGE_RSS_INPUT_TUPLE_SCTP; - vport[i].rss_tuple_sets.ipv4_fragment_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv6_tcp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv6_udp_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - vport[i].rss_tuple_sets.ipv6_sctp_en = - hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? - HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT : - HCLGE_RSS_INPUT_TUPLE_SCTP; - vport[i].rss_tuple_sets.ipv6_fragment_en = - HCLGE_RSS_INPUT_TUPLE_OTHER; - - vport[i].rss_algo = rss_algo; - - rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size, - sizeof(*rss_ind_tbl), GFP_KERNEL); - if (!rss_ind_tbl) - return -ENOMEM; + vport->rss_tuple_sets.ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP; + vport->rss_tuple_sets.ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv6_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + vport->rss_tuple_sets.ipv6_sctp_en = + hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? + HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT : + HCLGE_RSS_INPUT_TUPLE_SCTP; + vport->rss_tuple_sets.ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + + vport->rss_algo = rss_algo; + + rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size, + sizeof(*rss_ind_tbl), GFP_KERNEL); + if (!rss_ind_tbl) + return -ENOMEM; - vport[i].rss_indirection_tbl = rss_ind_tbl; - memcpy(vport[i].rss_hash_key, hclge_hash_key, - HCLGE_RSS_KEY_SIZE); - } + vport->rss_indirection_tbl = rss_ind_tbl; + memcpy(vport->rss_hash_key, hclge_hash_key, HCLGE_RSS_KEY_SIZE); hclge_rss_indir_init_cfg(hdev); @@ -4996,6 +5167,285 @@ static void hclge_request_update_promisc_mode(struct hnae3_handle *handle) set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state); } +static void hclge_sync_fd_state(struct hclge_dev *hdev) +{ + if (hlist_empty(&hdev->fd_rule_list)) + hdev->fd_active_type = HCLGE_FD_RULE_NONE; +} + +static void hclge_fd_inc_rule_cnt(struct hclge_dev *hdev, u16 location) +{ + if (!test_bit(location, hdev->fd_bmap)) { + set_bit(location, hdev->fd_bmap); + hdev->hclge_fd_rule_num++; + } +} + +static void hclge_fd_dec_rule_cnt(struct hclge_dev *hdev, u16 location) +{ + if (test_bit(location, hdev->fd_bmap)) { + clear_bit(location, hdev->fd_bmap); + hdev->hclge_fd_rule_num--; + } +} + +static void hclge_fd_free_node(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + hlist_del(&rule->rule_node); + kfree(rule); + hclge_sync_fd_state(hdev); +} + +static void hclge_update_fd_rule_node(struct hclge_dev *hdev, + struct hclge_fd_rule *old_rule, + struct hclge_fd_rule *new_rule, + enum HCLGE_FD_NODE_STATE state) +{ + switch (state) { + case HCLGE_FD_TO_ADD: + case HCLGE_FD_ACTIVE: + /* 1) if the new state is TO_ADD, just replace the old rule + * with the same location, no matter its state, because the + * new rule will be configured to the hardware. + * 2) if the new state is ACTIVE, it means the new rule + * has been configured to the hardware, so just replace + * the old rule node with the same location. + * 3) for it doesn't add a new node to the list, so it's + * unnecessary to update the rule number and fd_bmap. + */ + new_rule->rule_node.next = old_rule->rule_node.next; + new_rule->rule_node.pprev = old_rule->rule_node.pprev; + memcpy(old_rule, new_rule, sizeof(*old_rule)); + kfree(new_rule); + break; + case HCLGE_FD_DELETED: + hclge_fd_dec_rule_cnt(hdev, old_rule->location); + hclge_fd_free_node(hdev, old_rule); + break; + case HCLGE_FD_TO_DEL: + /* if new request is TO_DEL, and old rule is existent + * 1) the state of old rule is TO_DEL, we need do nothing, + * because we delete rule by location, other rule content + * is unncessary. + * 2) the state of old rule is ACTIVE, we need to change its + * state to TO_DEL, so the rule will be deleted when periodic + * task being scheduled. + * 3) the state of old rule is TO_ADD, it means the rule hasn't + * been added to hardware, so we just delete the rule node from + * fd_rule_list directly. + */ + if (old_rule->state == HCLGE_FD_TO_ADD) { + hclge_fd_dec_rule_cnt(hdev, old_rule->location); + hclge_fd_free_node(hdev, old_rule); + return; + } + old_rule->state = HCLGE_FD_TO_DEL; + break; + } +} + +static struct hclge_fd_rule *hclge_find_fd_rule(struct hlist_head *hlist, + u16 location, + struct hclge_fd_rule **parent) +{ + struct hclge_fd_rule *rule; + struct hlist_node *node; + + hlist_for_each_entry_safe(rule, node, hlist, rule_node) { + if (rule->location == location) + return rule; + else if (rule->location > location) + return NULL; + /* record the parent node, use to keep the nodes in fd_rule_list + * in ascend order. + */ + *parent = rule; + } + + return NULL; +} + +/* insert fd rule node in ascend order according to rule->location */ +static void hclge_fd_insert_rule_node(struct hlist_head *hlist, + struct hclge_fd_rule *rule, + struct hclge_fd_rule *parent) +{ + INIT_HLIST_NODE(&rule->rule_node); + + if (parent) + hlist_add_behind(&rule->rule_node, &parent->rule_node); + else + hlist_add_head(&rule->rule_node, hlist); +} + +static int hclge_fd_set_user_def_cmd(struct hclge_dev *hdev, + struct hclge_fd_user_def_cfg *cfg) +{ + struct hclge_fd_user_def_cfg_cmd *req; + struct hclge_desc desc; + u16 data = 0; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_USER_DEF_OP, false); + + req = (struct hclge_fd_user_def_cfg_cmd *)desc.data; + + hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[0].ref_cnt > 0); + hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M, + HCLGE_FD_USER_DEF_OFT_S, cfg[0].offset); + req->ol2_cfg = cpu_to_le16(data); + + data = 0; + hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[1].ref_cnt > 0); + hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M, + HCLGE_FD_USER_DEF_OFT_S, cfg[1].offset); + req->ol3_cfg = cpu_to_le16(data); + + data = 0; + hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[2].ref_cnt > 0); + hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M, + HCLGE_FD_USER_DEF_OFT_S, cfg[2].offset); + req->ol4_cfg = cpu_to_le16(data); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to set fd user def data, ret= %d\n", ret); + return ret; +} + +static void hclge_sync_fd_user_def_cfg(struct hclge_dev *hdev, bool locked) +{ + int ret; + + if (!test_and_clear_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state)) + return; + + if (!locked) + spin_lock_bh(&hdev->fd_rule_lock); + + ret = hclge_fd_set_user_def_cmd(hdev, hdev->fd_cfg.user_def_cfg); + if (ret) + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); + + if (!locked) + spin_unlock_bh(&hdev->fd_rule_lock); +} + +static int hclge_fd_check_user_def_refcnt(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + struct hlist_head *hlist = &hdev->fd_rule_list; + struct hclge_fd_rule *fd_rule, *parent = NULL; + struct hclge_fd_user_def_info *info, *old_info; + struct hclge_fd_user_def_cfg *cfg; + + if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE || + rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE) + return 0; + + /* for valid layer is start from 1, so need minus 1 to get the cfg */ + cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1]; + info = &rule->ep.user_def; + + if (!cfg->ref_cnt || cfg->offset == info->offset) + return 0; + + if (cfg->ref_cnt > 1) + goto error; + + fd_rule = hclge_find_fd_rule(hlist, rule->location, &parent); + if (fd_rule) { + old_info = &fd_rule->ep.user_def; + if (info->layer == old_info->layer) + return 0; + } + +error: + dev_err(&hdev->pdev->dev, + "No available offset for layer%d fd rule, each layer only support one user def offset.\n", + info->layer + 1); + return -ENOSPC; +} + +static void hclge_fd_inc_user_def_refcnt(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + struct hclge_fd_user_def_cfg *cfg; + + if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE || + rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE) + return; + + cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1]; + if (!cfg->ref_cnt) { + cfg->offset = rule->ep.user_def.offset; + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); + } + cfg->ref_cnt++; +} + +static void hclge_fd_dec_user_def_refcnt(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + struct hclge_fd_user_def_cfg *cfg; + + if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE || + rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE) + return; + + cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1]; + if (!cfg->ref_cnt) + return; + + cfg->ref_cnt--; + if (!cfg->ref_cnt) { + cfg->offset = 0; + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); + } +} + +static void hclge_update_fd_list(struct hclge_dev *hdev, + enum HCLGE_FD_NODE_STATE state, u16 location, + struct hclge_fd_rule *new_rule) +{ + struct hlist_head *hlist = &hdev->fd_rule_list; + struct hclge_fd_rule *fd_rule, *parent = NULL; + + fd_rule = hclge_find_fd_rule(hlist, location, &parent); + if (fd_rule) { + hclge_fd_dec_user_def_refcnt(hdev, fd_rule); + if (state == HCLGE_FD_ACTIVE) + hclge_fd_inc_user_def_refcnt(hdev, new_rule); + hclge_sync_fd_user_def_cfg(hdev, true); + + hclge_update_fd_rule_node(hdev, fd_rule, new_rule, state); + return; + } + + /* it's unlikely to fail here, because we have checked the rule + * exist before. + */ + if (unlikely(state == HCLGE_FD_TO_DEL || state == HCLGE_FD_DELETED)) { + dev_warn(&hdev->pdev->dev, + "failed to delete fd rule %u, it's inexistent\n", + location); + return; + } + + hclge_fd_inc_user_def_refcnt(hdev, new_rule); + hclge_sync_fd_user_def_cfg(hdev, true); + + hclge_fd_insert_rule_node(hlist, new_rule, parent); + hclge_fd_inc_rule_cnt(hdev, new_rule->location); + + if (state == HCLGE_FD_TO_ADD) { + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + hclge_task_schedule(hdev, 0); + } +} + static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode) { struct hclge_get_fd_mode_cmd *req; @@ -5074,6 +5524,17 @@ static int hclge_set_fd_key_config(struct hclge_dev *hdev, return ret; } +static void hclge_fd_disable_user_def(struct hclge_dev *hdev) +{ + struct hclge_fd_user_def_cfg *cfg = hdev->fd_cfg.user_def_cfg; + + spin_lock_bh(&hdev->fd_rule_lock); + memset(cfg, 0, sizeof(hdev->fd_cfg.user_def_cfg)); + spin_unlock_bh(&hdev->fd_rule_lock); + + hclge_fd_set_user_def_cmd(hdev, cfg); +} + static int hclge_init_fd_config(struct hclge_dev *hdev) { #define LOW_2_WORDS 0x03 @@ -5114,9 +5575,12 @@ static int hclge_init_fd_config(struct hclge_dev *hdev) BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT); /* If use max 400bit key, we can support tuples for ether type */ - if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) + if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) { key_cfg->tuple_active |= BIT(INNER_DST_MAC) | BIT(INNER_SRC_MAC); + if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) + key_cfg->tuple_active |= HCLGE_FD_TUPLE_USER_DEF_TUPLES; + } /* roce_type is used to filter roce frames * dst_vport is used to specify the rule @@ -5225,96 +5689,57 @@ static int hclge_fd_ad_config(struct hclge_dev *hdev, u8 stage, int loc, static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y, struct hclge_fd_rule *rule) { + int offset, moffset, ip_offset; + enum HCLGE_FD_KEY_OPT key_opt; u16 tmp_x_s, tmp_y_s; u32 tmp_x_l, tmp_y_l; + u8 *p = (u8 *)rule; int i; - if (rule->unused_tuple & tuple_bit) + if (rule->unused_tuple & BIT(tuple_bit)) return true; - switch (tuple_bit) { - case BIT(INNER_DST_MAC): - for (i = 0; i < ETH_ALEN; i++) { - calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i], - rule->tuples_mask.dst_mac[i]); - calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i], - rule->tuples_mask.dst_mac[i]); - } + key_opt = tuple_key_info[tuple_bit].key_opt; + offset = tuple_key_info[tuple_bit].offset; + moffset = tuple_key_info[tuple_bit].moffset; - return true; - case BIT(INNER_SRC_MAC): - for (i = 0; i < ETH_ALEN; i++) { - calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], - rule->tuples_mask.src_mac[i]); - calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], - rule->tuples_mask.src_mac[i]); - } + switch (key_opt) { + case KEY_OPT_U8: + calc_x(*key_x, p[offset], p[moffset]); + calc_y(*key_y, p[offset], p[moffset]); return true; - case BIT(INNER_VLAN_TAG_FST): - calc_x(tmp_x_s, rule->tuples.vlan_tag1, - rule->tuples_mask.vlan_tag1); - calc_y(tmp_y_s, rule->tuples.vlan_tag1, - rule->tuples_mask.vlan_tag1); + case KEY_OPT_LE16: + calc_x(tmp_x_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset])); + calc_y(tmp_y_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset])); *(__le16 *)key_x = cpu_to_le16(tmp_x_s); *(__le16 *)key_y = cpu_to_le16(tmp_y_s); return true; - case BIT(INNER_ETH_TYPE): - calc_x(tmp_x_s, rule->tuples.ether_proto, - rule->tuples_mask.ether_proto); - calc_y(tmp_y_s, rule->tuples.ether_proto, - rule->tuples_mask.ether_proto); - *(__le16 *)key_x = cpu_to_le16(tmp_x_s); - *(__le16 *)key_y = cpu_to_le16(tmp_y_s); - - return true; - case BIT(INNER_IP_TOS): - calc_x(*key_x, rule->tuples.ip_tos, rule->tuples_mask.ip_tos); - calc_y(*key_y, rule->tuples.ip_tos, rule->tuples_mask.ip_tos); - - return true; - case BIT(INNER_IP_PROTO): - calc_x(*key_x, rule->tuples.ip_proto, - rule->tuples_mask.ip_proto); - calc_y(*key_y, rule->tuples.ip_proto, - rule->tuples_mask.ip_proto); - - return true; - case BIT(INNER_SRC_IP): - calc_x(tmp_x_l, rule->tuples.src_ip[IPV4_INDEX], - rule->tuples_mask.src_ip[IPV4_INDEX]); - calc_y(tmp_y_l, rule->tuples.src_ip[IPV4_INDEX], - rule->tuples_mask.src_ip[IPV4_INDEX]); - *(__le32 *)key_x = cpu_to_le32(tmp_x_l); - *(__le32 *)key_y = cpu_to_le32(tmp_y_l); - - return true; - case BIT(INNER_DST_IP): - calc_x(tmp_x_l, rule->tuples.dst_ip[IPV4_INDEX], - rule->tuples_mask.dst_ip[IPV4_INDEX]); - calc_y(tmp_y_l, rule->tuples.dst_ip[IPV4_INDEX], - rule->tuples_mask.dst_ip[IPV4_INDEX]); + case KEY_OPT_LE32: + calc_x(tmp_x_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset])); + calc_y(tmp_y_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset])); *(__le32 *)key_x = cpu_to_le32(tmp_x_l); *(__le32 *)key_y = cpu_to_le32(tmp_y_l); return true; - case BIT(INNER_SRC_PORT): - calc_x(tmp_x_s, rule->tuples.src_port, - rule->tuples_mask.src_port); - calc_y(tmp_y_s, rule->tuples.src_port, - rule->tuples_mask.src_port); - *(__le16 *)key_x = cpu_to_le16(tmp_x_s); - *(__le16 *)key_y = cpu_to_le16(tmp_y_s); + case KEY_OPT_MAC: + for (i = 0; i < ETH_ALEN; i++) { + calc_x(key_x[ETH_ALEN - 1 - i], p[offset + i], + p[moffset + i]); + calc_y(key_y[ETH_ALEN - 1 - i], p[offset + i], + p[moffset + i]); + } return true; - case BIT(INNER_DST_PORT): - calc_x(tmp_x_s, rule->tuples.dst_port, - rule->tuples_mask.dst_port); - calc_y(tmp_y_s, rule->tuples.dst_port, - rule->tuples_mask.dst_port); - *(__le16 *)key_x = cpu_to_le16(tmp_x_s); - *(__le16 *)key_y = cpu_to_le16(tmp_y_s); + case KEY_OPT_IP: + ip_offset = IPV4_INDEX * sizeof(u32); + calc_x(tmp_x_l, *(u32 *)(&p[offset + ip_offset]), + *(u32 *)(&p[moffset + ip_offset])); + calc_y(tmp_y_l, *(u32 *)(&p[offset + ip_offset]), + *(u32 *)(&p[moffset + ip_offset])); + *(__le32 *)key_x = cpu_to_le32(tmp_x_l); + *(__le32 *)key_y = cpu_to_le32(tmp_y_l); return true; default: @@ -5402,12 +5827,12 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage, for (i = 0 ; i < MAX_TUPLE; i++) { bool tuple_valid; - u32 check_tuple; tuple_size = tuple_key_info[i].key_length / 8; - check_tuple = key_cfg->tuple_active & BIT(i); + if (!(key_cfg->tuple_active & BIT(i))) + continue; - tuple_valid = hclge_fd_convert_tuple(check_tuple, cur_key_x, + tuple_valid = hclge_fd_convert_tuple(i, cur_key_x, cur_key_y, rule); if (tuple_valid) { cur_key_x += tuple_size; @@ -5538,8 +5963,7 @@ static int hclge_fd_check_tcpip6_tuple(struct ethtool_tcpip6_spec *spec, if (!spec || !unused_tuple) return -EINVAL; - *unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) | - BIT(INNER_IP_TOS); + *unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC); /* check whether src/dst ip address used */ if (ipv6_addr_any((struct in6_addr *)spec->ip6src)) @@ -5554,8 +5978,8 @@ static int hclge_fd_check_tcpip6_tuple(struct ethtool_tcpip6_spec *spec, if (!spec->pdst) *unused_tuple |= BIT(INNER_DST_PORT); - if (spec->tclass) - return -EOPNOTSUPP; + if (!spec->tclass) + *unused_tuple |= BIT(INNER_IP_TOS); return 0; } @@ -5567,7 +5991,7 @@ static int hclge_fd_check_ip6_tuple(struct ethtool_usrip6_spec *spec, return -EINVAL; *unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) | - BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT); + BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT); /* check whether src/dst ip address used */ if (ipv6_addr_any((struct in6_addr *)spec->ip6src)) @@ -5579,8 +6003,8 @@ static int hclge_fd_check_ip6_tuple(struct ethtool_usrip6_spec *spec, if (!spec->l4_proto) *unused_tuple |= BIT(INNER_IP_PROTO); - if (spec->tclass) - return -EOPNOTSUPP; + if (!spec->tclass) + *unused_tuple |= BIT(INNER_IP_TOS); if (spec->l4_4_bytes) return -EOPNOTSUPP; @@ -5650,9 +6074,98 @@ static int hclge_fd_check_ext_tuple(struct hclge_dev *hdev, return 0; } +static int hclge_fd_get_user_def_layer(u32 flow_type, u32 *unused_tuple, + struct hclge_fd_user_def_info *info) +{ + switch (flow_type) { + case ETHER_FLOW: + info->layer = HCLGE_FD_USER_DEF_L2; + *unused_tuple &= ~BIT(INNER_L2_RSV); + break; + case IP_USER_FLOW: + case IPV6_USER_FLOW: + info->layer = HCLGE_FD_USER_DEF_L3; + *unused_tuple &= ~BIT(INNER_L3_RSV); + break; + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case TCP_V6_FLOW: + case UDP_V6_FLOW: + info->layer = HCLGE_FD_USER_DEF_L4; + *unused_tuple &= ~BIT(INNER_L4_RSV); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static bool hclge_fd_is_user_def_all_masked(struct ethtool_rx_flow_spec *fs) +{ + return be32_to_cpu(fs->m_ext.data[1] | fs->m_ext.data[0]) == 0; +} + +static int hclge_fd_parse_user_def_field(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + u32 *unused_tuple, + struct hclge_fd_user_def_info *info) +{ + u32 tuple_active = hdev->fd_cfg.key_cfg[HCLGE_FD_STAGE_1].tuple_active; + u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + u16 data, offset, data_mask, offset_mask; + int ret; + + info->layer = HCLGE_FD_USER_DEF_NONE; + *unused_tuple |= HCLGE_FD_TUPLE_USER_DEF_TUPLES; + + if (!(fs->flow_type & FLOW_EXT) || hclge_fd_is_user_def_all_masked(fs)) + return 0; + + /* user-def data from ethtool is 64 bit value, the bit0~15 is used + * for data, and bit32~47 is used for offset. + */ + data = be32_to_cpu(fs->h_ext.data[1]) & HCLGE_FD_USER_DEF_DATA; + data_mask = be32_to_cpu(fs->m_ext.data[1]) & HCLGE_FD_USER_DEF_DATA; + offset = be32_to_cpu(fs->h_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET; + offset_mask = be32_to_cpu(fs->m_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET; + + if (!(tuple_active & HCLGE_FD_TUPLE_USER_DEF_TUPLES)) { + dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n"); + return -EOPNOTSUPP; + } + + if (offset > HCLGE_FD_MAX_USER_DEF_OFFSET) { + dev_err(&hdev->pdev->dev, + "user-def offset[%u] should be no more than %u\n", + offset, HCLGE_FD_MAX_USER_DEF_OFFSET); + return -EINVAL; + } + + if (offset_mask != HCLGE_FD_USER_DEF_OFFSET_UNMASK) { + dev_err(&hdev->pdev->dev, "user-def offset can't be masked\n"); + return -EINVAL; + } + + ret = hclge_fd_get_user_def_layer(flow_type, unused_tuple, info); + if (ret) { + dev_err(&hdev->pdev->dev, + "unsupported flow type for user-def bytes, ret = %d\n", + ret); + return ret; + } + + info->data = data; + info->data_mask = data_mask; + info->offset = offset; + + return 0; +} + static int hclge_fd_check_spec(struct hclge_dev *hdev, struct ethtool_rx_flow_spec *fs, - u32 *unused_tuple) + u32 *unused_tuple, + struct hclge_fd_user_def_info *info) { u32 flow_type; int ret; @@ -5665,11 +6178,9 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev, return -EINVAL; } - if ((fs->flow_type & FLOW_EXT) && - (fs->h_ext.data[0] != 0 || fs->h_ext.data[1] != 0)) { - dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n"); - return -EOPNOTSUPP; - } + ret = hclge_fd_parse_user_def_field(hdev, fs, unused_tuple, info); + if (ret) + return ret; flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); switch (flow_type) { @@ -5721,217 +6232,194 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev, return hclge_fd_check_ext_tuple(hdev, fs, unused_tuple); } -static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location) +static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule, u8 ip_proto) { - struct hclge_fd_rule *rule = NULL; - struct hlist_node *node2; + rule->tuples.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src); + rule->tuples_mask.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src); - spin_lock_bh(&hdev->fd_rule_lock); - hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) { - if (rule->location >= location) - break; - } + rule->tuples.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst); + rule->tuples_mask.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst); - spin_unlock_bh(&hdev->fd_rule_lock); + rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc); + rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc); - return rule && rule->location == location; -} + rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst); + rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst); -/* make sure being called after lock up with fd_rule_lock */ -static int hclge_fd_update_rule_list(struct hclge_dev *hdev, - struct hclge_fd_rule *new_rule, - u16 location, - bool is_add) -{ - struct hclge_fd_rule *rule = NULL, *parent = NULL; - struct hlist_node *node2; - - if (is_add && !new_rule) - return -EINVAL; - - hlist_for_each_entry_safe(rule, node2, - &hdev->fd_rule_list, rule_node) { - if (rule->location >= location) - break; - parent = rule; - } + rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos; + rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos; - if (rule && rule->location == location) { - hlist_del(&rule->rule_node); - kfree(rule); - hdev->hclge_fd_rule_num--; + rule->tuples.ether_proto = ETH_P_IP; + rule->tuples_mask.ether_proto = 0xFFFF; - if (!is_add) { - if (!hdev->hclge_fd_rule_num) - hdev->fd_active_type = HCLGE_FD_RULE_NONE; - clear_bit(location, hdev->fd_bmap); + rule->tuples.ip_proto = ip_proto; + rule->tuples_mask.ip_proto = 0xFF; +} - return 0; - } - } else if (!is_add) { - dev_err(&hdev->pdev->dev, - "delete fail, rule %u is inexistent\n", - location); - return -EINVAL; - } +static void hclge_fd_get_ip4_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + rule->tuples.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src); + rule->tuples_mask.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src); - INIT_HLIST_NODE(&new_rule->rule_node); + rule->tuples.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst); + rule->tuples_mask.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst); - if (parent) - hlist_add_behind(&new_rule->rule_node, &parent->rule_node); - else - hlist_add_head(&new_rule->rule_node, &hdev->fd_rule_list); + rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos; + rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos; - set_bit(location, hdev->fd_bmap); - hdev->hclge_fd_rule_num++; - hdev->fd_active_type = new_rule->rule_type; + rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto; + rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto; - return 0; + rule->tuples.ether_proto = ETH_P_IP; + rule->tuples_mask.ether_proto = 0xFFFF; } -static int hclge_fd_get_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, - struct hclge_fd_rule *rule) +static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule, u8 ip_proto) { - u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.tcp_ip6_spec.ip6src, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.tcp_ip6_spec.ip6src, + IPV6_SIZE); - switch (flow_type) { - case SCTP_V4_FLOW: - case TCP_V4_FLOW: - case UDP_V4_FLOW: - rule->tuples.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src); - rule->tuples_mask.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src); - - rule->tuples.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst); - rule->tuples_mask.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst); + be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.tcp_ip6_spec.ip6dst, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.tcp_ip6_spec.ip6dst, + IPV6_SIZE); - rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc); - rule->tuples_mask.src_port = - be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc); + rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc); + rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc); - rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst); - rule->tuples_mask.dst_port = - be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst); + rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst); + rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst); - rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos; - rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos; + rule->tuples.ether_proto = ETH_P_IPV6; + rule->tuples_mask.ether_proto = 0xFFFF; - rule->tuples.ether_proto = ETH_P_IP; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass; + rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass; - break; - case IP_USER_FLOW: - rule->tuples.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src); - rule->tuples_mask.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src); - - rule->tuples.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst); - rule->tuples_mask.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst); + rule->tuples.ip_proto = ip_proto; + rule->tuples_mask.ip_proto = 0xFF; +} - rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos; - rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos; +static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.usr_ip6_spec.ip6src, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.usr_ip6_spec.ip6src, + IPV6_SIZE); - rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto; - rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto; + be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.usr_ip6_spec.ip6dst, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.usr_ip6_spec.ip6dst, + IPV6_SIZE); - rule->tuples.ether_proto = ETH_P_IP; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto; + rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto; - break; - case SCTP_V6_FLOW: - case TCP_V6_FLOW: - case UDP_V6_FLOW: - be32_to_cpu_array(rule->tuples.src_ip, - fs->h_u.tcp_ip6_spec.ip6src, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.src_ip, - fs->m_u.tcp_ip6_spec.ip6src, IPV6_SIZE); + rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass; + rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass; - be32_to_cpu_array(rule->tuples.dst_ip, - fs->h_u.tcp_ip6_spec.ip6dst, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.dst_ip, - fs->m_u.tcp_ip6_spec.ip6dst, IPV6_SIZE); + rule->tuples.ether_proto = ETH_P_IPV6; + rule->tuples_mask.ether_proto = 0xFFFF; +} - rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc); - rule->tuples_mask.src_port = - be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc); +static void hclge_fd_get_ether_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + ether_addr_copy(rule->tuples.src_mac, fs->h_u.ether_spec.h_source); + ether_addr_copy(rule->tuples_mask.src_mac, fs->m_u.ether_spec.h_source); - rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst); - rule->tuples_mask.dst_port = - be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst); + ether_addr_copy(rule->tuples.dst_mac, fs->h_u.ether_spec.h_dest); + ether_addr_copy(rule->tuples_mask.dst_mac, fs->m_u.ether_spec.h_dest); - rule->tuples.ether_proto = ETH_P_IPV6; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ether_proto = be16_to_cpu(fs->h_u.ether_spec.h_proto); + rule->tuples_mask.ether_proto = be16_to_cpu(fs->m_u.ether_spec.h_proto); +} +static void hclge_fd_get_user_def_tuple(struct hclge_fd_user_def_info *info, + struct hclge_fd_rule *rule) +{ + switch (info->layer) { + case HCLGE_FD_USER_DEF_L2: + rule->tuples.l2_user_def = info->data; + rule->tuples_mask.l2_user_def = info->data_mask; break; - case IPV6_USER_FLOW: - be32_to_cpu_array(rule->tuples.src_ip, - fs->h_u.usr_ip6_spec.ip6src, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.src_ip, - fs->m_u.usr_ip6_spec.ip6src, IPV6_SIZE); - - be32_to_cpu_array(rule->tuples.dst_ip, - fs->h_u.usr_ip6_spec.ip6dst, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.dst_ip, - fs->m_u.usr_ip6_spec.ip6dst, IPV6_SIZE); - - rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto; - rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto; - - rule->tuples.ether_proto = ETH_P_IPV6; - rule->tuples_mask.ether_proto = 0xFFFF; - + case HCLGE_FD_USER_DEF_L3: + rule->tuples.l3_user_def = info->data; + rule->tuples_mask.l3_user_def = info->data_mask; break; - case ETHER_FLOW: - ether_addr_copy(rule->tuples.src_mac, - fs->h_u.ether_spec.h_source); - ether_addr_copy(rule->tuples_mask.src_mac, - fs->m_u.ether_spec.h_source); - - ether_addr_copy(rule->tuples.dst_mac, - fs->h_u.ether_spec.h_dest); - ether_addr_copy(rule->tuples_mask.dst_mac, - fs->m_u.ether_spec.h_dest); - - rule->tuples.ether_proto = - be16_to_cpu(fs->h_u.ether_spec.h_proto); - rule->tuples_mask.ether_proto = - be16_to_cpu(fs->m_u.ether_spec.h_proto); - + case HCLGE_FD_USER_DEF_L4: + rule->tuples.l4_user_def = (u32)info->data << 16; + rule->tuples_mask.l4_user_def = (u32)info->data_mask << 16; break; default: - return -EOPNOTSUPP; + break; } + rule->ep.user_def = *info; +} + +static int hclge_fd_get_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule, + struct hclge_fd_user_def_info *info) +{ + u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + switch (flow_type) { case SCTP_V4_FLOW: - case SCTP_V6_FLOW: - rule->tuples.ip_proto = IPPROTO_SCTP; - rule->tuples_mask.ip_proto = 0xFF; + hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_SCTP); break; case TCP_V4_FLOW: - case TCP_V6_FLOW: - rule->tuples.ip_proto = IPPROTO_TCP; - rule->tuples_mask.ip_proto = 0xFF; + hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_TCP); break; case UDP_V4_FLOW: + hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_UDP); + break; + case IP_USER_FLOW: + hclge_fd_get_ip4_tuple(hdev, fs, rule); + break; + case SCTP_V6_FLOW: + hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_SCTP); + break; + case TCP_V6_FLOW: + hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_TCP); + break; case UDP_V6_FLOW: - rule->tuples.ip_proto = IPPROTO_UDP; - rule->tuples_mask.ip_proto = 0xFF; + hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_UDP); break; - default: + case IPV6_USER_FLOW: + hclge_fd_get_ip6_tuple(hdev, fs, rule); + break; + case ETHER_FLOW: + hclge_fd_get_ether_tuple(hdev, fs, rule); break; + default: + return -EOPNOTSUPP; } if (fs->flow_type & FLOW_EXT) { rule->tuples.vlan_tag1 = be16_to_cpu(fs->h_ext.vlan_tci); rule->tuples_mask.vlan_tag1 = be16_to_cpu(fs->m_ext.vlan_tci); + hclge_fd_get_user_def_tuple(info, rule); } if (fs->flow_type & FLOW_MAC_EXT) { @@ -5942,33 +6430,53 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev, return 0; } -/* make sure being called after lock up with fd_rule_lock */ static int hclge_fd_config_rule(struct hclge_dev *hdev, struct hclge_fd_rule *rule) { int ret; - if (!rule) { + ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); + if (ret) + return ret; + + return hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); +} + +static int hclge_add_fd_entry_common(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + int ret; + + spin_lock_bh(&hdev->fd_rule_lock); + + if (hdev->fd_active_type != rule->rule_type && + (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE || + hdev->fd_active_type == HCLGE_FD_EP_ACTIVE)) { dev_err(&hdev->pdev->dev, - "The flow director rule is NULL\n"); + "mode conflict(new type %d, active type %d), please delete existent rules first\n", + rule->rule_type, hdev->fd_active_type); + spin_unlock_bh(&hdev->fd_rule_lock); return -EINVAL; } - /* it will never fail here, so needn't to check return value */ - hclge_fd_update_rule_list(hdev, rule, rule->location, true); + ret = hclge_fd_check_user_def_refcnt(hdev, rule); + if (ret) + goto out; - ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); + ret = hclge_clear_arfs_rules(hdev); if (ret) - goto clear_rule; + goto out; - ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); + ret = hclge_fd_config_rule(hdev, rule); if (ret) - goto clear_rule; + goto out; - return 0; + rule->state = HCLGE_FD_ACTIVE; + hdev->fd_active_type = rule->rule_type; + hclge_update_fd_list(hdev, rule->state, rule->location, rule); -clear_rule: - hclge_fd_update_rule_list(hdev, rule, rule->location, false); +out: + spin_unlock_bh(&hdev->fd_rule_lock); return ret; } @@ -5980,11 +6488,48 @@ static bool hclge_is_cls_flower_active(struct hnae3_handle *handle) return hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE; } +static int hclge_fd_parse_ring_cookie(struct hclge_dev *hdev, u64 ring_cookie, + u16 *vport_id, u8 *action, u16 *queue_id) +{ + struct hclge_vport *vport = hdev->vport; + + if (ring_cookie == RX_CLS_FLOW_DISC) { + *action = HCLGE_FD_ACTION_DROP_PACKET; + } else { + u32 ring = ethtool_get_flow_spec_ring(ring_cookie); + u8 vf = ethtool_get_flow_spec_ring_vf(ring_cookie); + u16 tqps; + + if (vf > hdev->num_req_vfs) { + dev_err(&hdev->pdev->dev, + "Error: vf id (%u) > max vf num (%u)\n", + vf, hdev->num_req_vfs); + return -EINVAL; + } + + *vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id; + tqps = hdev->vport[vf].nic.kinfo.num_tqps; + + if (ring >= tqps) { + dev_err(&hdev->pdev->dev, + "Error: queue id (%u) > max tqp num (%u)\n", + ring, tqps - 1); + return -EINVAL; + } + + *action = HCLGE_FD_ACTION_SELECT_QUEUE; + *queue_id = ring; + } + + return 0; +} + static int hclge_add_fd_entry(struct hnae3_handle *handle, struct ethtool_rxnfc *cmd) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + struct hclge_fd_user_def_info info; u16 dst_vport_id = 0, q_index = 0; struct ethtool_rx_flow_spec *fs; struct hclge_fd_rule *rule; @@ -6004,51 +6549,22 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, return -EOPNOTSUPP; } - if (hclge_is_cls_flower_active(handle)) { - dev_err(&hdev->pdev->dev, - "please delete all exist cls flower rules first\n"); - return -EINVAL; - } - fs = (struct ethtool_rx_flow_spec *)&cmd->fs; - ret = hclge_fd_check_spec(hdev, fs, &unused); + ret = hclge_fd_check_spec(hdev, fs, &unused, &info); if (ret) return ret; - if (fs->ring_cookie == RX_CLS_FLOW_DISC) { - action = HCLGE_FD_ACTION_DROP_PACKET; - } else { - u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie); - u8 vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); - u16 tqps; - - if (vf > hdev->num_req_vfs) { - dev_err(&hdev->pdev->dev, - "Error: vf id (%u) > max vf num (%u)\n", - vf, hdev->num_req_vfs); - return -EINVAL; - } - - dst_vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id; - tqps = vf ? hdev->vport[vf].alloc_tqps : vport->alloc_tqps; - - if (ring >= tqps) { - dev_err(&hdev->pdev->dev, - "Error: queue id (%u) > max tqp num (%u)\n", - ring, tqps - 1); - return -EINVAL; - } - - action = HCLGE_FD_ACTION_SELECT_QUEUE; - q_index = ring; - } + ret = hclge_fd_parse_ring_cookie(hdev, fs->ring_cookie, &dst_vport_id, + &action, &q_index); + if (ret) + return ret; rule = kzalloc(sizeof(*rule), GFP_KERNEL); if (!rule) return -ENOMEM; - ret = hclge_fd_get_tuple(hdev, fs, rule); + ret = hclge_fd_get_tuple(hdev, fs, rule, &info); if (ret) { kfree(rule); return ret; @@ -6062,15 +6578,9 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, rule->action = action; rule->rule_type = HCLGE_FD_EP_ACTIVE; - /* to avoid rule conflict, when user configure rule by ethtool, - * we need to clear all arfs rules - */ - spin_lock_bh(&hdev->fd_rule_lock); - hclge_clear_arfs_rules(handle); - - ret = hclge_fd_config_rule(hdev, rule); - - spin_unlock_bh(&hdev->fd_rule_lock); + ret = hclge_add_fd_entry_common(hdev, rule); + if (ret) + kfree(rule); return ret; } @@ -6091,32 +6601,30 @@ static int hclge_del_fd_entry(struct hnae3_handle *handle, if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) return -EINVAL; - if (hclge_is_cls_flower_active(handle) || !hdev->hclge_fd_rule_num || - !hclge_fd_rule_exist(hdev, fs->location)) { + spin_lock_bh(&hdev->fd_rule_lock); + if (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE || + !test_bit(fs->location, hdev->fd_bmap)) { dev_err(&hdev->pdev->dev, "Delete fail, rule %u is inexistent\n", fs->location); + spin_unlock_bh(&hdev->fd_rule_lock); return -ENOENT; } ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, fs->location, NULL, false); if (ret) - return ret; + goto out; - spin_lock_bh(&hdev->fd_rule_lock); - ret = hclge_fd_update_rule_list(hdev, NULL, fs->location, false); + hclge_update_fd_list(hdev, HCLGE_FD_DELETED, fs->location, NULL); +out: spin_unlock_bh(&hdev->fd_rule_lock); - return ret; } -/* make sure being called after lock up with fd_rule_lock */ -static void hclge_del_all_fd_entries(struct hnae3_handle *handle, - bool clear_list) +static void hclge_clear_fd_rules_in_list(struct hclge_dev *hdev, + bool clear_list) { - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; struct hlist_node *node; u16 location; @@ -6124,6 +6632,8 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle, if (!hnae3_dev_fd_supported(hdev)) return; + spin_lock_bh(&hdev->fd_rule_lock); + for_each_set_bit(location, hdev->fd_bmap, hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location, @@ -6140,6 +6650,14 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle, bitmap_zero(hdev->fd_bmap, hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]); } + + spin_unlock_bh(&hdev->fd_rule_lock); +} + +static void hclge_del_all_fd_entries(struct hclge_dev *hdev) +{ + hclge_clear_fd_rules_in_list(hdev, true); + hclge_fd_disable_user_def(hdev); } static int hclge_restore_fd_entries(struct hnae3_handle *handle) @@ -6148,7 +6666,6 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle) struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; struct hlist_node *node; - int ret; /* Return ok here, because reset error handling will check this * return value. If error is returned here, the reset process will @@ -6163,25 +6680,11 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle) spin_lock_bh(&hdev->fd_rule_lock); hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { - ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); - if (!ret) - ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); - - if (ret) { - dev_warn(&hdev->pdev->dev, - "Restore rule %u failed, remove it\n", - rule->location); - clear_bit(rule->location, hdev->fd_bmap); - hlist_del(&rule->rule_node); - kfree(rule); - hdev->hclge_fd_rule_num--; - } + if (rule->state == HCLGE_FD_ACTIVE) + rule->state = HCLGE_FD_TO_ADD; } - - if (hdev->hclge_fd_rule_num) - hdev->fd_active_type = HCLGE_FD_EP_ACTIVE; - spin_unlock_bh(&hdev->fd_rule_lock); + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); return 0; } @@ -6269,6 +6772,10 @@ static void hclge_fd_get_tcpip6_info(struct hclge_fd_rule *rule, cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip, IPV6_SIZE); + spec->tclass = rule->tuples.ip_tos; + spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ? + 0 : rule->tuples_mask.ip_tos; + spec->psrc = cpu_to_be16(rule->tuples.src_port); spec_mask->psrc = rule->unused_tuple & BIT(INNER_SRC_PORT) ? 0 : cpu_to_be16(rule->tuples_mask.src_port); @@ -6296,6 +6803,10 @@ static void hclge_fd_get_ip6_info(struct hclge_fd_rule *rule, cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip, IPV6_SIZE); + spec->tclass = rule->tuples.ip_tos; + spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ? + 0 : rule->tuples_mask.ip_tos; + spec->l4_proto = rule->tuples.ip_proto; spec_mask->l4_proto = rule->unused_tuple & BIT(INNER_IP_PROTO) ? 0 : rule->tuples_mask.ip_proto; @@ -6323,6 +6834,24 @@ static void hclge_fd_get_ether_info(struct hclge_fd_rule *rule, 0 : cpu_to_be16(rule->tuples_mask.ether_proto); } +static void hclge_fd_get_user_def_info(struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + if ((rule->unused_tuple & HCLGE_FD_TUPLE_USER_DEF_TUPLES) == + HCLGE_FD_TUPLE_USER_DEF_TUPLES) { + fs->h_ext.data[0] = 0; + fs->h_ext.data[1] = 0; + fs->m_ext.data[0] = 0; + fs->m_ext.data[1] = 0; + } else { + fs->h_ext.data[0] = cpu_to_be32(rule->ep.user_def.offset); + fs->h_ext.data[1] = cpu_to_be32(rule->ep.user_def.data); + fs->m_ext.data[0] = + cpu_to_be32(HCLGE_FD_USER_DEF_OFFSET_UNMASK); + fs->m_ext.data[1] = cpu_to_be32(rule->ep.user_def.data_mask); + } +} + static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule) { @@ -6331,6 +6860,8 @@ static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs, fs->m_ext.vlan_tci = rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ? 0 : cpu_to_be16(rule->tuples_mask.vlan_tag1); + + hclge_fd_get_user_def_info(fs, rule); } if (fs->flow_type & FLOW_MAC_EXT) { @@ -6442,6 +6973,9 @@ static int hclge_get_all_rules(struct hnae3_handle *handle, return -EMSGSIZE; } + if (rule->state == HCLGE_FD_TO_DEL) + continue; + rule_locs[cnt] = rule->location; cnt++; } @@ -6501,6 +7035,7 @@ static void hclge_fd_build_arfs_rule(const struct hclge_fd_rule_tuples *tuples, rule->action = 0; rule->vf_id = 0; rule->rule_type = HCLGE_FD_ARFS_ACTIVE; + rule->state = HCLGE_FD_TO_ADD; if (tuples->ether_proto == ETH_P_IP) { if (tuples->ip_proto == IPPROTO_TCP) rule->flow_type = TCP_V4_FLOW; @@ -6523,9 +7058,7 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id, struct hclge_fd_rule_tuples new_tuples = {}; struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; - u16 tmp_queue_id; u16 bit_id; - int ret; if (!hnae3_dev_fd_supported(hdev)) return -EOPNOTSUPP; @@ -6561,34 +7094,19 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id, return -ENOMEM; } - set_bit(bit_id, hdev->fd_bmap); rule->location = bit_id; rule->arfs.flow_id = flow_id; rule->queue_id = queue_id; hclge_fd_build_arfs_rule(&new_tuples, rule); - ret = hclge_fd_config_rule(hdev, rule); - - spin_unlock_bh(&hdev->fd_rule_lock); - - if (ret) - return ret; - - return rule->location; + hclge_update_fd_list(hdev, rule->state, rule->location, rule); + hdev->fd_active_type = HCLGE_FD_ARFS_ACTIVE; + } else if (rule->queue_id != queue_id) { + rule->queue_id = queue_id; + rule->state = HCLGE_FD_TO_ADD; + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + hclge_task_schedule(hdev, 0); } - spin_unlock_bh(&hdev->fd_rule_lock); - - if (rule->queue_id == queue_id) - return rule->location; - - tmp_queue_id = rule->queue_id; - rule->queue_id = queue_id; - ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); - if (ret) { - rule->queue_id = tmp_queue_id; - return ret; - } - return rule->location; } @@ -6598,7 +7116,6 @@ static void hclge_rfs_filter_expire(struct hclge_dev *hdev) struct hnae3_handle *handle = &hdev->vport[0].nic; struct hclge_fd_rule *rule; struct hlist_node *node; - HLIST_HEAD(del_list); spin_lock_bh(&hdev->fd_rule_lock); if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) { @@ -6606,34 +7123,51 @@ static void hclge_rfs_filter_expire(struct hclge_dev *hdev) return; } hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + if (rule->state != HCLGE_FD_ACTIVE) + continue; if (rps_may_expire_flow(handle->netdev, rule->queue_id, rule->arfs.flow_id, rule->location)) { - hlist_del_init(&rule->rule_node); - hlist_add_head(&rule->rule_node, &del_list); - hdev->hclge_fd_rule_num--; - clear_bit(rule->location, hdev->fd_bmap); + rule->state = HCLGE_FD_TO_DEL; + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); } } spin_unlock_bh(&hdev->fd_rule_lock); - - hlist_for_each_entry_safe(rule, node, &del_list, rule_node) { - hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, - rule->location, NULL, false); - kfree(rule); - } #endif } /* make sure being called after lock up with fd_rule_lock */ -static void hclge_clear_arfs_rules(struct hnae3_handle *handle) +static int hclge_clear_arfs_rules(struct hclge_dev *hdev) { #ifdef CONFIG_RFS_ACCEL - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; + struct hclge_fd_rule *rule; + struct hlist_node *node; + int ret; + + if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) + return 0; + + hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + switch (rule->state) { + case HCLGE_FD_TO_DEL: + case HCLGE_FD_ACTIVE: + ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, + rule->location, NULL, false); + if (ret) + return ret; + fallthrough; + case HCLGE_FD_TO_ADD: + hclge_fd_dec_rule_cnt(hdev, rule->location); + hlist_del(&rule->rule_node); + kfree(rule); + break; + default: + break; + } + } + hclge_sync_fd_state(hdev); - if (hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE) - hclge_del_all_fd_entries(handle, true); #endif + return 0; } static void hclge_get_cls_key_basic(const struct flow_rule *flow, @@ -6815,12 +7349,6 @@ static int hclge_add_cls_flower(struct hnae3_handle *handle, struct hclge_fd_rule *rule; int ret; - if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) { - dev_err(&hdev->pdev->dev, - "please remove all exist fd rules via ethtool first\n"); - return -EINVAL; - } - ret = hclge_check_cls_flower(hdev, cls_flower, tc); if (ret) { dev_err(&hdev->pdev->dev, @@ -6833,8 +7361,10 @@ static int hclge_add_cls_flower(struct hnae3_handle *handle, return -ENOMEM; ret = hclge_parse_cls_flower(hdev, cls_flower, rule); - if (ret) - goto err; + if (ret) { + kfree(rule); + return ret; + } rule->action = HCLGE_FD_ACTION_SELECT_TC; rule->cls_flower.tc = tc; @@ -6843,22 +7373,10 @@ static int hclge_add_cls_flower(struct hnae3_handle *handle, rule->cls_flower.cookie = cls_flower->cookie; rule->rule_type = HCLGE_FD_TC_FLOWER_ACTIVE; - spin_lock_bh(&hdev->fd_rule_lock); - hclge_clear_arfs_rules(handle); - - ret = hclge_fd_config_rule(hdev, rule); - - spin_unlock_bh(&hdev->fd_rule_lock); - - if (ret) { - dev_err(&hdev->pdev->dev, - "failed to add cls flower rule, ret = %d\n", ret); - goto err; - } + ret = hclge_add_fd_entry_common(hdev, rule); + if (ret) + kfree(rule); - return 0; -err: - kfree(rule); return ret; } @@ -6895,25 +7413,66 @@ static int hclge_del_cls_flower(struct hnae3_handle *handle, ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, rule->location, NULL, false); if (ret) { - dev_err(&hdev->pdev->dev, - "failed to delete cls flower rule %u, ret = %d\n", - rule->location, ret); spin_unlock_bh(&hdev->fd_rule_lock); return ret; } - ret = hclge_fd_update_rule_list(hdev, NULL, rule->location, false); - if (ret) { - dev_err(&hdev->pdev->dev, - "failed to delete cls flower rule %u in list, ret = %d\n", - rule->location, ret); - spin_unlock_bh(&hdev->fd_rule_lock); - return ret; + hclge_update_fd_list(hdev, HCLGE_FD_DELETED, rule->location, NULL); + spin_unlock_bh(&hdev->fd_rule_lock); + + return 0; +} + +static void hclge_sync_fd_list(struct hclge_dev *hdev, struct hlist_head *hlist) +{ + struct hclge_fd_rule *rule; + struct hlist_node *node; + int ret = 0; + + if (!test_and_clear_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state)) + return; + + spin_lock_bh(&hdev->fd_rule_lock); + + hlist_for_each_entry_safe(rule, node, hlist, rule_node) { + switch (rule->state) { + case HCLGE_FD_TO_ADD: + ret = hclge_fd_config_rule(hdev, rule); + if (ret) + goto out; + rule->state = HCLGE_FD_ACTIVE; + break; + case HCLGE_FD_TO_DEL: + ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, + rule->location, NULL, false); + if (ret) + goto out; + hclge_fd_dec_rule_cnt(hdev, rule->location); + hclge_fd_free_node(hdev, rule); + break; + default: + break; + } } +out: + if (ret) + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + spin_unlock_bh(&hdev->fd_rule_lock); +} - return 0; +static void hclge_sync_fd_table(struct hclge_dev *hdev) +{ + if (test_and_clear_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state)) { + bool clear_list = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE; + + hclge_clear_fd_rules_in_list(hdev, clear_list); + } + + hclge_sync_fd_user_def_cfg(hdev, false); + + hclge_sync_fd_list(hdev, &hdev->fd_rule_list); } static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle) @@ -6953,18 +7512,15 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - bool clear; hdev->fd_en = enable; - clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE; - if (!enable) { - spin_lock_bh(&hdev->fd_rule_lock); - hclge_del_all_fd_entries(handle, clear); - spin_unlock_bh(&hdev->fd_rule_lock); - } else { + if (!enable) + set_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state); + else hclge_restore_fd_entries(handle); - } + + hclge_task_schedule(hdev, 0); } static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) @@ -7125,19 +7681,19 @@ static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en) return ret; } -static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en, +static int hclge_cfg_common_loopback(struct hclge_dev *hdev, bool en, enum hnae3_loop loop_mode) { -#define HCLGE_SERDES_RETRY_MS 10 -#define HCLGE_SERDES_RETRY_NUM 100 +#define HCLGE_COMMON_LB_RETRY_MS 10 +#define HCLGE_COMMON_LB_RETRY_NUM 100 - struct hclge_serdes_lb_cmd *req; + struct hclge_common_lb_cmd *req; struct hclge_desc desc; int ret, i = 0; u8 loop_mode_b; - req = (struct hclge_serdes_lb_cmd *)desc.data; - hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, false); + req = (struct hclge_common_lb_cmd *)desc.data; + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, false); switch (loop_mode) { case HNAE3_LOOP_SERIAL_SERDES: @@ -7146,9 +7702,12 @@ static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en, case HNAE3_LOOP_PARALLEL_SERDES: loop_mode_b = HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B; break; + case HNAE3_LOOP_PHY: + loop_mode_b = HCLGE_CMD_GE_PHY_INNER_LOOP_B; + break; default: dev_err(&hdev->pdev->dev, - "unsupported serdes loopback mode %d\n", loop_mode); + "unsupported common loopback mode %d\n", loop_mode); return -ENOTSUPP; } @@ -7162,39 +7721,39 @@ static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en, ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, - "serdes loopback set fail, ret = %d\n", ret); + "common loopback set fail, ret = %d\n", ret); return ret; } do { - msleep(HCLGE_SERDES_RETRY_MS); - hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, + msleep(HCLGE_COMMON_LB_RETRY_MS); + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, true); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, - "serdes loopback get, ret = %d\n", ret); + "common loopback get, ret = %d\n", ret); return ret; } - } while (++i < HCLGE_SERDES_RETRY_NUM && - !(req->result & HCLGE_CMD_SERDES_DONE_B)); + } while (++i < HCLGE_COMMON_LB_RETRY_NUM && + !(req->result & HCLGE_CMD_COMMON_LB_DONE_B)); - if (!(req->result & HCLGE_CMD_SERDES_DONE_B)) { - dev_err(&hdev->pdev->dev, "serdes loopback set timeout\n"); + if (!(req->result & HCLGE_CMD_COMMON_LB_DONE_B)) { + dev_err(&hdev->pdev->dev, "common loopback set timeout\n"); return -EBUSY; - } else if (!(req->result & HCLGE_CMD_SERDES_SUCCESS_B)) { - dev_err(&hdev->pdev->dev, "serdes loopback set failed in fw\n"); + } else if (!(req->result & HCLGE_CMD_COMMON_LB_SUCCESS_B)) { + dev_err(&hdev->pdev->dev, "common loopback set failed in fw\n"); return -EIO; } return ret; } -static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en, +static int hclge_set_common_loopback(struct hclge_dev *hdev, bool en, enum hnae3_loop loop_mode) { int ret; - ret = hclge_cfg_serdes_loopback(hdev, en, loop_mode); + ret = hclge_cfg_common_loopback(hdev, en, loop_mode); if (ret) return ret; @@ -7243,8 +7802,12 @@ static int hclge_set_phy_loopback(struct hclge_dev *hdev, bool en) struct phy_device *phydev = hdev->hw.mac.phydev; int ret; - if (!phydev) + if (!phydev) { + if (hnae3_dev_phy_imp_supported(hdev)) + return hclge_set_common_loopback(hdev, en, + HNAE3_LOOP_PHY); return -ENOTSUPP; + } if (en) ret = hclge_enable_phy_loopback(hdev, phydev); @@ -7266,13 +7829,12 @@ static int hclge_set_phy_loopback(struct hclge_dev *hdev, bool en) return ret; } -static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id, - int stream_id, bool enable) +static int hclge_tqp_enable_cmd_send(struct hclge_dev *hdev, u16 tqp_id, + u16 stream_id, bool enable) { struct hclge_desc desc; struct hclge_cfg_com_tqp_queue_cmd *req = (struct hclge_cfg_com_tqp_queue_cmd *)desc.data; - int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false); req->tqp_id = cpu_to_le16(tqp_id); @@ -7280,20 +7842,30 @@ static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id, if (enable) req->enable |= 1U << HCLGE_TQP_ENABLE_B; - ret = hclge_cmd_send(&hdev->hw, &desc, 1); - if (ret) - dev_err(&hdev->pdev->dev, - "Tqp enable fail, status =%d.\n", ret); - return ret; + return hclge_cmd_send(&hdev->hw, &desc, 1); +} + +static int hclge_tqp_enable(struct hnae3_handle *handle, bool enable) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + int ret; + u16 i; + + for (i = 0; i < handle->kinfo.num_tqps; i++) { + ret = hclge_tqp_enable_cmd_send(hdev, i, 0, enable); + if (ret) + return ret; + } + return 0; } static int hclge_set_loopback(struct hnae3_handle *handle, enum hnae3_loop loop_mode, bool en) { struct hclge_vport *vport = hclge_get_vport(handle); - struct hnae3_knic_private_info *kinfo; struct hclge_dev *hdev = vport->back; - int i, ret; + int ret; /* Loopback can be enabled in three places: SSU, MAC, and serdes. By * default, SSU loopback is enabled, so if the SMAC and the DMAC are @@ -7315,7 +7887,7 @@ static int hclge_set_loopback(struct hnae3_handle *handle, break; case HNAE3_LOOP_SERIAL_SERDES: case HNAE3_LOOP_PARALLEL_SERDES: - ret = hclge_set_serdes_loopback(hdev, en, loop_mode); + ret = hclge_set_common_loopback(hdev, en, loop_mode); break; case HNAE3_LOOP_PHY: ret = hclge_set_phy_loopback(hdev, en); @@ -7330,14 +7902,12 @@ static int hclge_set_loopback(struct hnae3_handle *handle, if (ret) return ret; - kinfo = &vport->nic.kinfo; - for (i = 0; i < kinfo->num_tqps; i++) { - ret = hclge_tqp_enable(hdev, i, 0, en); - if (ret) - return ret; - } + ret = hclge_tqp_enable(handle, en); + if (ret) + dev_err(&hdev->pdev->dev, "failed to %s tqp in loopback, ret = %d\n", + en ? "enable" : "disable", ret); - return 0; + return ret; } static int hclge_set_default_loopback(struct hclge_dev *hdev) @@ -7348,11 +7918,11 @@ static int hclge_set_default_loopback(struct hclge_dev *hdev) if (ret) return ret; - ret = hclge_cfg_serdes_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES); + ret = hclge_cfg_common_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES); if (ret) return ret; - return hclge_cfg_serdes_loopback(hdev, false, + return hclge_cfg_common_loopback(hdev, false, HNAE3_LOOP_PARALLEL_SERDES); } @@ -7424,11 +7994,10 @@ static void hclge_ae_stop(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - int i; set_bit(HCLGE_STATE_DOWN, &hdev->state); spin_lock_bh(&hdev->fd_rule_lock); - hclge_clear_arfs_rules(handle); + hclge_clear_arfs_rules(hdev); spin_unlock_bh(&hdev->fd_rule_lock); /* If it is not PF reset, the firmware will disable the MAC, @@ -7441,8 +8010,7 @@ static void hclge_ae_stop(struct hnae3_handle *handle) return; } - for (i = 0; i < handle->kinfo.num_tqps; i++) - hclge_reset_tqp(handle, i); + hclge_reset_tqp(handle); hclge_config_mac_tnl_int(hdev, false); @@ -7892,7 +8460,7 @@ int hclge_update_mac_list(struct hclge_vport *vport, /* if the mac addr is already in the mac list, no need to add a new * one into it, just check the mac addr state, convert it to a new - * new state, or just remove it, or do nothing. + * state, or just remove it, or do nothing. */ mac_node = hclge_find_mac_node(list, addr); if (mac_node) { @@ -8081,7 +8649,6 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport, if (status) return status; status = hclge_add_mac_vlan_tbl(vport, &req, desc); - /* if already overflow, not to print each time */ if (status == -ENOSPC && !(vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE)) @@ -8130,7 +8697,6 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport, else /* Not all the vfid is zero, update the vfid */ status = hclge_add_mac_vlan_tbl(vport, &req, desc); - } else if (status == -ENOENT) { status = 0; } @@ -8565,7 +9131,7 @@ static bool hclge_check_vf_mac_exist(struct hclge_vport *vport, int vf_idx, return true; vf_idx += HCLGE_VF_VPORT_START_NUM; - for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) + for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++) if (i != vf_idx && ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac)) return true; @@ -8759,6 +9325,29 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p, return 0; } +static int hclge_mii_ioctl(struct hclge_dev *hdev, struct ifreq *ifr, int cmd) +{ + struct mii_ioctl_data *data = if_mii(ifr); + + if (!hnae3_dev_phy_imp_supported(hdev)) + return -EOPNOTSUPP; + + switch (cmd) { + case SIOCGMIIPHY: + data->phy_id = hdev->hw.mac.phy_addr; + /* this command reads phy id and register at the same time */ + fallthrough; + case SIOCGMIIREG: + data->val_out = hclge_read_phy_reg(hdev, data->reg_num); + return 0; + + case SIOCSMIIREG: + return hclge_write_phy_reg(hdev, data->reg_num, data->val_in); + default: + return -EOPNOTSUPP; + } +} + static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr, int cmd) { @@ -8766,7 +9355,7 @@ static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr, struct hclge_dev *hdev = vport->back; if (!hdev->hw.mac.phydev) - return -EOPNOTSUPP; + return hclge_mii_ioctl(hdev, ifr, cmd); return phy_mii_ioctl(hdev->hw.mac.phydev, ifr, cmd); } @@ -8923,8 +9512,7 @@ static int hclge_check_vf_vlan_cmd_status(struct hclge_dev *hdev, u16 vfid, } static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, - bool is_kill, u16 vlan, - __be16 proto) + bool is_kill, u16 vlan) { struct hclge_vport *vport = &hdev->vport[vfid]; struct hclge_desc desc[2]; @@ -8990,8 +9578,7 @@ static int hclge_set_vlan_filter_hw(struct hclge_dev *hdev, __be16 proto, if (is_kill && !vlan_id) return 0; - ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id, - proto); + ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id); if (ret) { dev_err(&hdev->pdev->dev, "Set %u vport vlan filter config fail, ret =%d.\n", @@ -9441,7 +10028,7 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev) hclge_restore_mac_table_common(vport); hclge_restore_vport_vlan_table(vport); set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state); - + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); hclge_restore_fd_entries(handle); } @@ -9797,7 +10384,7 @@ out: return ret; } -static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id, +static int hclge_reset_tqp_cmd_send(struct hclge_dev *hdev, u16 queue_id, bool enable) { struct hclge_reset_tqp_queue_cmd *req; @@ -9853,94 +10440,114 @@ u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id) return tqp->index; } -int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id) +static int hclge_reset_tqp_cmd(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - int reset_try_times = 0; + u16 reset_try_times = 0; int reset_status; u16 queue_gid; int ret; + u16 i; - queue_gid = hclge_covert_handle_qid_global(handle, queue_id); - - ret = hclge_tqp_enable(hdev, queue_id, 0, false); - if (ret) { - dev_err(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret); - return ret; - } + for (i = 0; i < handle->kinfo.num_tqps; i++) { + queue_gid = hclge_covert_handle_qid_global(handle, i); + ret = hclge_reset_tqp_cmd_send(hdev, queue_gid, true); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to send reset tqp cmd, ret = %d\n", + ret); + return ret; + } - ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true); - if (ret) { - dev_err(&hdev->pdev->dev, - "Send reset tqp cmd fail, ret = %d\n", ret); - return ret; - } + while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { + reset_status = hclge_get_reset_status(hdev, queue_gid); + if (reset_status) + break; - while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { - reset_status = hclge_get_reset_status(hdev, queue_gid); - if (reset_status) - break; + /* Wait for tqp hw reset */ + usleep_range(1000, 1200); + } - /* Wait for tqp hw reset */ - usleep_range(1000, 1200); - } + if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { + dev_err(&hdev->pdev->dev, + "wait for tqp hw reset timeout\n"); + return -ETIME; + } - if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { - dev_err(&hdev->pdev->dev, "Reset TQP fail\n"); - return ret; + ret = hclge_reset_tqp_cmd_send(hdev, queue_gid, false); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to deassert soft reset, ret = %d\n", + ret); + return ret; + } + reset_try_times = 0; } - - ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false); - if (ret) - dev_err(&hdev->pdev->dev, - "Deassert the soft reset fail, ret = %d\n", ret); - - return ret; + return 0; } -void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id) +static int hclge_reset_rcb(struct hnae3_handle *handle) { - struct hnae3_handle *handle = &vport->nic; +#define HCLGE_RESET_RCB_NOT_SUPPORT 0U +#define HCLGE_RESET_RCB_SUCCESS 1U + + struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - int reset_try_times = 0; - int reset_status; + struct hclge_reset_cmd *req; + struct hclge_desc desc; + u8 return_status; u16 queue_gid; int ret; - if (queue_id >= handle->kinfo.num_tqps) { - dev_warn(&hdev->pdev->dev, "Invalid vf queue id(%u)\n", - queue_id); - return; - } + queue_gid = hclge_covert_handle_qid_global(handle, 0); - queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id); + req = (struct hclge_reset_cmd *)desc.data; + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_RST_TRIGGER, false); + hnae3_set_bit(req->fun_reset_rcb, HCLGE_CFG_RESET_RCB_B, 1); + req->fun_reset_rcb_vqid_start = cpu_to_le16(queue_gid); + req->fun_reset_rcb_vqid_num = cpu_to_le16(handle->kinfo.num_tqps); - ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { - dev_warn(&hdev->pdev->dev, - "Send reset tqp cmd fail, ret = %d\n", ret); - return; + dev_err(&hdev->pdev->dev, + "failed to send rcb reset cmd, ret = %d\n", ret); + return ret; } - while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { - reset_status = hclge_get_reset_status(hdev, queue_gid); - if (reset_status) - break; + return_status = req->fun_reset_rcb_return_status; + if (return_status == HCLGE_RESET_RCB_SUCCESS) + return 0; - /* Wait for tqp hw reset */ - usleep_range(1000, 1200); + if (return_status != HCLGE_RESET_RCB_NOT_SUPPORT) { + dev_err(&hdev->pdev->dev, "failed to reset rcb, ret = %u\n", + return_status); + return -EIO; } - if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) { - dev_warn(&hdev->pdev->dev, "Reset TQP fail\n"); - return; + /* if reset rcb cmd is unsupported, we need to send reset tqp cmd + * again to reset all tqps + */ + return hclge_reset_tqp_cmd(handle); +} + +int hclge_reset_tqp(struct hnae3_handle *handle) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + int ret; + + /* only need to disable PF's tqp */ + if (!vport->vport_id) { + ret = hclge_tqp_enable(handle, false); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to disable tqp, ret = %d\n", ret); + return ret; + } } - ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false); - if (ret) - dev_warn(&hdev->pdev->dev, - "Deassert the soft reset fail, ret = %d\n", ret); + return hclge_reset_rcb(handle); } static u32 hclge_get_fw_version(struct hnae3_handle *handle) @@ -10013,9 +10620,10 @@ static void hclge_get_pauseparam(struct hnae3_handle *handle, u32 *auto_neg, { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - struct phy_device *phydev = hdev->hw.mac.phydev; + u8 media_type = hdev->hw.mac.media_type; - *auto_neg = phydev ? hclge_get_autoneg(handle) : 0; + *auto_neg = (media_type == HNAE3_MEDIA_TYPE_COPPER) ? + hclge_get_autoneg(handle) : 0; if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) { *rx_en = 0; @@ -10061,7 +10669,7 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg, struct phy_device *phydev = hdev->hw.mac.phydev; u32 fc_autoneg; - if (phydev) { + if (phydev || hnae3_dev_phy_imp_supported(hdev)) { fc_autoneg = hclge_get_autoneg(handle); if (auto_neg != fc_autoneg) { dev_info(&hdev->pdev->dev, @@ -10080,7 +10688,7 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg, hclge_record_user_pauseparam(hdev, rx_en, tx_en); - if (!auto_neg) + if (!auto_neg || hnae3_dev_phy_imp_supported(hdev)) return hclge_cfg_pauseparam(hdev, rx_en, tx_en); if (phydev) @@ -10182,7 +10790,6 @@ static void hclge_info_show(struct hclge_dev *hdev) dev_info(dev, "Desc num per TX queue: %u\n", hdev->num_tx_desc); dev_info(dev, "Desc num per RX queue: %u\n", hdev->num_rx_desc); dev_info(dev, "Numbers of vports: %u\n", hdev->num_alloc_vport); - dev_info(dev, "Numbers of vmdp vports: %u\n", hdev->num_vmdq_vport); dev_info(dev, "Numbers of VF for this PF: %u\n", hdev->num_req_vfs); dev_info(dev, "HW tc map: 0x%x\n", hdev->hw_tc_map); dev_info(dev, "Total buffer size for TX/RX: %u\n", hdev->pkt_buf_size); @@ -10297,39 +10904,35 @@ static int hclge_init_client_instance(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; - struct hclge_vport *vport; - int i, ret; - - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - vport = &hdev->vport[i]; + struct hclge_vport *vport = &hdev->vport[0]; + int ret; - switch (client->type) { - case HNAE3_CLIENT_KNIC: - hdev->nic_client = client; - vport->nic.client = client; - ret = hclge_init_nic_client_instance(ae_dev, vport); - if (ret) - goto clear_nic; + switch (client->type) { + case HNAE3_CLIENT_KNIC: + hdev->nic_client = client; + vport->nic.client = client; + ret = hclge_init_nic_client_instance(ae_dev, vport); + if (ret) + goto clear_nic; - ret = hclge_init_roce_client_instance(ae_dev, vport); - if (ret) - goto clear_roce; + ret = hclge_init_roce_client_instance(ae_dev, vport); + if (ret) + goto clear_roce; - break; - case HNAE3_CLIENT_ROCE: - if (hnae3_dev_roce_supported(hdev)) { - hdev->roce_client = client; - vport->roce.client = client; - } + break; + case HNAE3_CLIENT_ROCE: + if (hnae3_dev_roce_supported(hdev)) { + hdev->roce_client = client; + vport->roce.client = client; + } - ret = hclge_init_roce_client_instance(ae_dev, vport); - if (ret) - goto clear_roce; + ret = hclge_init_roce_client_instance(ae_dev, vport); + if (ret) + goto clear_roce; - break; - default: - return -EINVAL; - } + break; + default: + return -EINVAL; } return 0; @@ -10348,32 +10951,27 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; - struct hclge_vport *vport; - int i; + struct hclge_vport *vport = &hdev->vport[0]; - for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { - vport = &hdev->vport[i]; - if (hdev->roce_client) { - clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); - while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) - msleep(HCLGE_WAIT_RESET_DONE); - - hdev->roce_client->ops->uninit_instance(&vport->roce, - 0); - hdev->roce_client = NULL; - vport->roce.client = NULL; - } - if (client->type == HNAE3_CLIENT_ROCE) - return; - if (hdev->nic_client && client->ops->uninit_instance) { - clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state); - while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) - msleep(HCLGE_WAIT_RESET_DONE); - - client->ops->uninit_instance(&vport->nic, 0); - hdev->nic_client = NULL; - vport->nic.client = NULL; - } + if (hdev->roce_client) { + clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); + while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGE_WAIT_RESET_DONE); + + hdev->roce_client->ops->uninit_instance(&vport->roce, 0); + hdev->roce_client = NULL; + vport->roce.client = NULL; + } + if (client->type == HNAE3_CLIENT_ROCE) + return; + if (hdev->nic_client && client->ops->uninit_instance) { + clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state); + while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGE_WAIT_RESET_DONE); + + client->ops->uninit_instance(&vport->nic, 0); + hdev->nic_client = NULL; + vport->nic.client = NULL; } } @@ -10492,10 +11090,11 @@ static void hclge_state_uninit(struct hclge_dev *hdev) cancel_delayed_work_sync(&hdev->service_task); } -static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev) +static void hclge_reset_prepare_general(struct hnae3_ae_dev *ae_dev, + enum hnae3_reset_type rst_type) { -#define HCLGE_FLR_RETRY_WAIT_MS 500 -#define HCLGE_FLR_RETRY_CNT 5 +#define HCLGE_RESET_RETRY_WAIT_MS 500 +#define HCLGE_RESET_RETRY_CNT 5 struct hclge_dev *hdev = ae_dev->priv; int retry_cnt = 0; @@ -10504,30 +11103,32 @@ static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev) retry: down(&hdev->reset_sem); set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); - hdev->reset_type = HNAE3_FLR_RESET; + hdev->reset_type = rst_type; ret = hclge_reset_prepare(hdev); if (ret || hdev->reset_pending) { - dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n", + dev_err(&hdev->pdev->dev, "fail to prepare to reset, ret=%d\n", ret); if (hdev->reset_pending || - retry_cnt++ < HCLGE_FLR_RETRY_CNT) { + retry_cnt++ < HCLGE_RESET_RETRY_CNT) { dev_err(&hdev->pdev->dev, "reset_pending:0x%lx, retry_cnt:%d\n", hdev->reset_pending, retry_cnt); clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); up(&hdev->reset_sem); - msleep(HCLGE_FLR_RETRY_WAIT_MS); + msleep(HCLGE_RESET_RETRY_WAIT_MS); goto retry; } } - /* disable misc vector before FLR done */ + /* disable misc vector before reset done */ hclge_enable_vector(&hdev->misc_vector, false); set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); - hdev->rst_stats.flr_rst_cnt++; + + if (hdev->reset_type == HNAE3_FLR_RESET) + hdev->rst_stats.flr_rst_cnt++; } -static void hclge_flr_done(struct hnae3_ae_dev *ae_dev) +static void hclge_reset_done(struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; int ret; @@ -10638,7 +11239,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto err_msi_irq_uninit; - if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) { + if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER && + !hnae3_dev_phy_imp_supported(hdev)) { ret = hclge_mac_mdio_config(hdev); if (ret) goto err_msi_irq_uninit; @@ -11031,6 +11633,13 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } + ret = hclge_tp_port_init(hdev); + if (ret) { + dev_err(&pdev->dev, "failed to init tp port, ret = %d\n", + ret); + return ret; + } + ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX); if (ret) { dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret); @@ -11121,6 +11730,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_misc_affinity_teardown(hdev); hclge_state_uninit(hdev); hclge_uninit_mac_table(hdev); + hclge_del_all_fd_entries(hdev); if (mac->phydev) mdiobus_unregister(mac->mdio_bus); @@ -11211,7 +11821,7 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, if (ret) return ret; - /* RSS indirection table has been configuared by user */ + /* RSS indirection table has been configured by user */ if (rxfh_configured) goto out; @@ -11380,7 +11990,6 @@ static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num, #define REG_LEN_PER_LINE (REG_NUM_PER_LINE * sizeof(u32)) #define REG_SEPARATOR_LINE 1 #define REG_NUM_REMAIN_MASK 3 -#define BD_LIST_MAX_NUM 30 int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc) { @@ -11474,15 +12083,19 @@ static int hclge_get_dfx_reg_len(struct hclge_dev *hdev, int *len) { u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list); int data_len_per_desc, bd_num, i; - int bd_num_list[BD_LIST_MAX_NUM]; + int *bd_num_list; u32 data_len; int ret; + bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL); + if (!bd_num_list) + return -ENOMEM; + ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num); if (ret) { dev_err(&hdev->pdev->dev, "Get dfx reg bd num fail, status is %d.\n", ret); - return ret; + goto out; } data_len_per_desc = sizeof_field(struct hclge_desc, data); @@ -11493,6 +12106,8 @@ static int hclge_get_dfx_reg_len(struct hclge_dev *hdev, int *len) *len += (data_len / REG_LEN_PER_LINE + 1) * REG_LEN_PER_LINE; } +out: + kfree(bd_num_list); return ret; } @@ -11500,16 +12115,20 @@ static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data) { u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list); int bd_num, bd_num_max, buf_len, i; - int bd_num_list[BD_LIST_MAX_NUM]; struct hclge_desc *desc_src; + int *bd_num_list; u32 *reg = data; int ret; + bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL); + if (!bd_num_list) + return -ENOMEM; + ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num); if (ret) { dev_err(&hdev->pdev->dev, "Get dfx reg bd num fail, status is %d.\n", ret); - return ret; + goto out; } bd_num_max = bd_num_list[0]; @@ -11518,8 +12137,10 @@ static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data) buf_len = sizeof(*desc_src) * bd_num_max; desc_src = kzalloc(buf_len, GFP_KERNEL); - if (!desc_src) - return -ENOMEM; + if (!desc_src) { + ret = -ENOMEM; + goto out; + } for (i = 0; i < dfx_reg_type_num; i++) { bd_num = bd_num_list[i]; @@ -11535,6 +12156,8 @@ static int hclge_get_dfx_reg(struct hclge_dev *hdev, void *data) } kfree(desc_src); +out: + kfree(bd_num_list); return ret; } @@ -11878,8 +12501,8 @@ static int hclge_get_module_eeprom(struct hnae3_handle *handle, u32 offset, static const struct hnae3_ae_ops hclge_ops = { .init_ae_dev = hclge_init_ae_dev, .uninit_ae_dev = hclge_uninit_ae_dev, - .flr_prepare = hclge_flr_prepare, - .flr_done = hclge_flr_done, + .reset_prepare = hclge_reset_prepare_general, + .reset_done = hclge_reset_done, .init_client_instance = hclge_init_client_instance, .uninit_client_instance = hclge_uninit_client_instance, .map_ring_to_vector = hclge_map_ring_to_vector, @@ -11944,7 +12567,6 @@ static const struct hnae3_ae_ops hclge_ops = { .get_link_mode = hclge_get_link_mode, .add_fd_entry = hclge_add_fd_entry, .del_fd_entry = hclge_del_fd_entry, - .del_all_fd_entries = hclge_del_all_fd_entries, .get_fd_rule_cnt = hclge_get_fd_rule_cnt, .get_fd_rule_info = hclge_get_fd_rule_info, .get_fd_all_rules = hclge_get_all_rules, @@ -11972,6 +12594,8 @@ static const struct hnae3_ae_ops hclge_ops = { .add_cls_flower = hclge_add_cls_flower, .del_cls_flower = hclge_del_cls_flower, .cls_flower_active = hclge_is_cls_flower_active, + .get_phy_link_ksettings = hclge_get_phy_link_ksettings, + .set_phy_link_ksettings = hclge_set_phy_link_ksettings, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 19d7f28773f3..ff1d47308c2d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -223,6 +223,9 @@ enum HCLGE_DEV_STATE { HCLGE_STATE_LINK_UPDATING, HCLGE_STATE_PROMISC_CHANGED, HCLGE_STATE_RST_FAIL, + HCLGE_STATE_FD_TBL_CHANGED, + HCLGE_STATE_FD_CLEAR_ALL, + HCLGE_STATE_FD_USER_DEF_CHANGED, HCLGE_STATE_MAX }; @@ -345,7 +348,6 @@ struct hclge_tc_info { }; struct hclge_cfg { - u8 vmdq_vport_num; u8 tc_num; u16 tqp_desc_num; u16 rx_buf_len; @@ -536,6 +538,9 @@ enum HCLGE_FD_TUPLE { MAX_TUPLE, }; +#define HCLGE_FD_TUPLE_USER_DEF_TUPLES \ + (BIT(INNER_L2_RSV) | BIT(INNER_L3_RSV) | BIT(INNER_L4_RSV)) + enum HCLGE_FD_META_DATA { PACKET_TYPE_ID, IP_FRAGEMENT, @@ -548,9 +553,21 @@ enum HCLGE_FD_META_DATA { MAX_META_DATA, }; +enum HCLGE_FD_KEY_OPT { + KEY_OPT_U8, + KEY_OPT_LE16, + KEY_OPT_LE32, + KEY_OPT_MAC, + KEY_OPT_IP, + KEY_OPT_VNI, +}; + struct key_info { u8 key_type; u8 key_length; /* use bit as unit */ + enum HCLGE_FD_KEY_OPT key_opt; + int offset; + int moffset; }; #define MAX_KEY_LENGTH 400 @@ -558,6 +575,11 @@ struct key_info { #define MAX_KEY_BYTES (MAX_KEY_DWORDS * 4) #define MAX_META_DATA_LENGTH 32 +#define HCLGE_FD_MAX_USER_DEF_OFFSET 9000 +#define HCLGE_FD_USER_DEF_DATA GENMASK(15, 0) +#define HCLGE_FD_USER_DEF_OFFSET GENMASK(15, 0) +#define HCLGE_FD_USER_DEF_OFFSET_UNMASK GENMASK(15, 0) + /* assigned by firmware, the real filter number for each pf may be less */ #define MAX_FD_FILTER_NUM 4096 #define HCLGE_ARFS_EXPIRE_INTERVAL 5UL @@ -580,6 +602,33 @@ enum HCLGE_FD_ACTION { HCLGE_FD_ACTION_SELECT_TC, }; +enum HCLGE_FD_NODE_STATE { + HCLGE_FD_TO_ADD, + HCLGE_FD_TO_DEL, + HCLGE_FD_ACTIVE, + HCLGE_FD_DELETED, +}; + +enum HCLGE_FD_USER_DEF_LAYER { + HCLGE_FD_USER_DEF_NONE, + HCLGE_FD_USER_DEF_L2, + HCLGE_FD_USER_DEF_L3, + HCLGE_FD_USER_DEF_L4, +}; + +#define HCLGE_FD_USER_DEF_LAYER_NUM 3 +struct hclge_fd_user_def_cfg { + u16 ref_cnt; + u16 offset; +}; + +struct hclge_fd_user_def_info { + enum HCLGE_FD_USER_DEF_LAYER layer; + u16 data; + u16 data_mask; + u16 offset; +}; + struct hclge_fd_key_cfg { u8 key_sel; u8 inner_sipv6_word_en; @@ -596,6 +645,7 @@ struct hclge_fd_cfg { u32 rule_num[MAX_STAGE_NUM]; /* rule entry number */ u16 cnt_num[MAX_STAGE_NUM]; /* rule hit counter number */ struct hclge_fd_key_cfg key_cfg[MAX_STAGE_NUM]; + struct hclge_fd_user_def_cfg user_def_cfg[HCLGE_FD_USER_DEF_LAYER_NUM]; }; #define IPV4_INDEX 3 @@ -612,6 +662,9 @@ struct hclge_fd_rule_tuples { u16 dst_port; u16 vlan_tag1; u16 ether_proto; + u16 l2_user_def; + u16 l3_user_def; + u32 l4_user_def; u8 ip_tos; u8 ip_proto; }; @@ -630,11 +683,15 @@ struct hclge_fd_rule { struct { u16 flow_id; /* only used for arfs */ } arfs; + struct { + struct hclge_fd_user_def_info user_def; + } ep; }; u16 queue_id; u16 vf_id; u16 location; enum HCLGE_FD_ACTIVE_RULE_TYPE rule_type; + enum HCLGE_FD_NODE_STATE state; u8 action; }; @@ -753,7 +810,6 @@ struct hclge_dev { struct hclge_rst_stats rst_stats; struct semaphore reset_sem; /* protect reset process */ u32 fw_version; - u16 num_vmdq_vport; /* Num vmdq vport this PF has set up */ u16 num_tqps; /* Num task queue pairs of this PF */ u16 num_req_vfs; /* Num VFs requested for this PF */ @@ -997,8 +1053,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev); void hclge_rss_indir_init_cfg(struct hclge_dev *hdev); void hclge_mbx_handler(struct hclge_dev *hdev); -int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id); -void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id); +int hclge_reset_tqp(struct hnae3_handle *handle); int hclge_cfg_flowctrl(struct hclge_dev *hdev); int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id); int hclge_vport_start(struct hclge_vport *vport); @@ -1034,4 +1089,5 @@ void hclge_report_hw_error(struct hclge_dev *hdev, enum hnae3_hw_error_type type); void hclge_inform_vf_promisc_info(struct hclge_vport *vport); void hclge_dbg_dump_rst_info(struct hclge_dev *hdev); +int hclge_push_vf_link_status(struct hclge_vport *vport); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 51a36e74f088..5512ffe0a149 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -490,16 +490,14 @@ static void hclge_get_vf_media_type(struct hclge_vport *vport, resp_msg->len = HCLGE_VF_MEDIA_TYPE_LENGTH; } -static int hclge_get_link_info(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req) +int hclge_push_vf_link_status(struct hclge_vport *vport) { #define HCLGE_VF_LINK_STATE_UP 1U #define HCLGE_VF_LINK_STATE_DOWN 0U struct hclge_dev *hdev = vport->back; u16 link_status; - u8 msg_data[8]; - u8 dest_vfid; + u8 msg_data[9]; u16 duplex; /* mac.link can only be 0 or 1 */ @@ -520,11 +518,11 @@ static int hclge_get_link_info(struct hclge_vport *vport, memcpy(&msg_data[0], &link_status, sizeof(u16)); memcpy(&msg_data[2], &hdev->hw.mac.speed, sizeof(u32)); memcpy(&msg_data[6], &duplex, sizeof(u16)); - dest_vfid = mbx_req->mbx_src_vfid; + msg_data[8] = HCLGE_MBX_PUSH_LINK_STATUS_EN; /* send this requested info to VF */ return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data), - HCLGE_MBX_LINK_STAT_CHANGE, dest_vfid); + HCLGE_MBX_LINK_STAT_CHANGE, vport->vport_id); } static void hclge_get_link_mode(struct hclge_vport *vport, @@ -550,14 +548,32 @@ static void hclge_get_link_mode(struct hclge_vport *vport, HCLGE_MBX_LINK_STAT_MODE, dest_vfid); } -static void hclge_mbx_reset_vf_queue(struct hclge_vport *vport, - struct hclge_mbx_vf_to_pf_cmd *mbx_req) +static int hclge_mbx_reset_vf_queue(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *mbx_req, + struct hclge_respond_to_vf_msg *resp_msg) { +#define HCLGE_RESET_ALL_QUEUE_DONE 1U + struct hnae3_handle *handle = &vport->nic; + struct hclge_dev *hdev = vport->back; u16 queue_id; + int ret; memcpy(&queue_id, mbx_req->msg.data, sizeof(queue_id)); + resp_msg->data[0] = HCLGE_RESET_ALL_QUEUE_DONE; + resp_msg->len = sizeof(u8); + + /* pf will reset vf's all queues at a time. So it is unnecessary + * to reset queues if queue_id > 0, just return success. + */ + if (queue_id > 0) + return 0; - hclge_reset_vf_queue(vport, queue_id); + ret = hclge_reset_tqp(handle); + if (ret) + dev_err(&hdev->pdev->dev, "failed to reset vf %u queue, ret = %d\n", + vport->vport_id - HCLGE_VF_VPORT_START_NUM, ret); + + return ret; } static int hclge_reset_vf(struct hclge_vport *vport) @@ -776,14 +792,14 @@ void hclge_mbx_handler(struct hclge_dev *hdev) hclge_get_vf_tcinfo(vport, &resp_msg); break; case HCLGE_MBX_GET_LINK_STATUS: - ret = hclge_get_link_info(vport, req); + ret = hclge_push_vf_link_status(vport); if (ret) dev_err(&hdev->pdev->dev, "failed to inform link stat to VF, ret = %d\n", ret); break; case HCLGE_MBX_QUEUE_RESET: - hclge_mbx_reset_vf_queue(vport, req); + ret = hclge_mbx_reset_vf_queue(vport, req, &resp_msg); break; case HCLGE_MBX_RESET: ret = hclge_reset_vf(vport); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index e89820702540..08e88d9422cd 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -268,3 +268,42 @@ void hclge_mac_stop_phy(struct hclge_dev *hdev) phy_stop(phydev); } + +u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr) +{ + struct hclge_phy_reg_cmd *req; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PHY_REG, true); + + req = (struct hclge_phy_reg_cmd *)desc.data; + req->reg_addr = cpu_to_le16(reg_addr); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to read phy reg, ret = %d.\n", ret); + + return le16_to_cpu(req->reg_val); +} + +int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val) +{ + struct hclge_phy_reg_cmd *req; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PHY_REG, false); + + req = (struct hclge_phy_reg_cmd *)desc.data; + req->reg_addr = cpu_to_le16(reg_addr); + req->reg_val = cpu_to_le16(val); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to write phy reg, ret = %d.\n", ret); + + return ret; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h index dd9a1218a7b0..fd0e20190b90 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h @@ -9,5 +9,7 @@ int hclge_mac_connect_phy(struct hnae3_handle *handle); void hclge_mac_disconnect_phy(struct hnae3_handle *handle); void hclge_mac_start_phy(struct hclge_dev *hdev); void hclge_mac_stop_phy(struct hclge_dev *hdev); +u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr); +int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 151afd1f0688..ebb962bad451 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -631,13 +631,12 @@ static u16 hclge_vport_get_tqp_num(struct hclge_vport *vport) return sum; } -static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) +static void hclge_tm_update_kinfo_rss_size(struct hclge_vport *vport) { struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; struct hclge_dev *hdev = vport->back; u16 vport_max_rss_size; u16 max_rss_size; - u8 i; /* TC configuration is shared by PF/VF in one port, only allow * one tc for VF for simplicity. VF's vport_id is non zero. @@ -665,19 +664,18 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) kinfo->rss_size = kinfo->req_rss_size; } else if (kinfo->rss_size > max_rss_size || (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) { - /* if user not set rss, the rss_size should compare with the - * valid msi numbers to ensure one to one map between tqp and - * irq as default. - */ - if (!kinfo->req_rss_size) - max_rss_size = min_t(u16, max_rss_size, - (hdev->num_nic_msi - 1) / - kinfo->tc_info.num_tc); - /* Set to the maximum specification value (max_rss_size). */ kinfo->rss_size = max_rss_size; } +} + +static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) +{ + struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; + struct hclge_dev *hdev = vport->back; + u8 i; + hclge_tm_update_kinfo_rss_size(vport); kinfo->num_tqps = hclge_vport_get_tqp_num(vport); vport->dwrr = 100; /* 100 percent as init */ vport->alloc_rss_size = kinfo->rss_size; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index 46700c427849..d8c5c5810b99 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -349,7 +349,6 @@ static void hclgevf_parse_capability(struct hclgevf_dev *hdev, u32 caps; caps = __le32_to_cpu(cmd->caps[0]); - if (hnae3_get_bit(caps, HCLGEVF_CAP_UDP_GSO_B)) set_bit(HNAE3_DEV_SUPPORT_UDP_GSO_B, ae_dev->caps); if (hnae3_get_bit(caps, HCLGEVF_CAP_INT_QL_B)) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h index 8a37a22a176b..c6dc11b32aa7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h @@ -223,11 +223,14 @@ struct hclgevf_rss_indirection_table_cmd { }; #define HCLGEVF_RSS_TC_OFFSET_S 0 -#define HCLGEVF_RSS_TC_OFFSET_M (0x3ff << HCLGEVF_RSS_TC_OFFSET_S) +#define HCLGEVF_RSS_TC_OFFSET_M GENMASK(10, 0) +#define HCLGEVF_RSS_TC_SIZE_MSB_B 11 #define HCLGEVF_RSS_TC_SIZE_S 12 -#define HCLGEVF_RSS_TC_SIZE_M (0x7 << HCLGEVF_RSS_TC_SIZE_S) +#define HCLGEVF_RSS_TC_SIZE_M GENMASK(14, 12) #define HCLGEVF_RSS_TC_VALID_B 15 #define HCLGEVF_MAX_TC_NUM 8 +#define HCLGEVF_RSS_TC_SIZE_MSB_OFFSET 3 + struct hclgevf_rss_tc_mode_cmd { __le16 rss_tc_mode[HCLGEVF_MAX_TC_NUM]; u8 rsv[8]; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 700e068764c8..0db51ef15ef6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -497,7 +497,6 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state) link_state = test_bit(HCLGEVF_STATE_DOWN, &hdev->state) ? 0 : link_state; - if (link_state != hdev->hw.mac.link) { client->ops->link_status_change(handle, !!link_state); if (rclient && rclient->ops->link_status_change) @@ -707,6 +706,9 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size) (tc_valid[i] & 0x1)); hnae3_set_field(mode, HCLGEVF_RSS_TC_SIZE_M, HCLGEVF_RSS_TC_SIZE_S, tc_size[i]); + hnae3_set_bit(mode, HCLGEVF_RSS_TC_SIZE_MSB_B, + tc_size[i] >> HCLGEVF_RSS_TC_SIZE_MSB_OFFSET & + 0x1); hnae3_set_field(mode, HCLGEVF_RSS_TC_OFFSET_M, HCLGEVF_RSS_TC_OFFSET_S, tc_offset[i]); @@ -1241,12 +1243,11 @@ static void hclgevf_sync_promisc_mode(struct hclgevf_dev *hdev) } } -static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id, - int stream_id, bool enable) +static int hclgevf_tqp_enable_cmd_send(struct hclgevf_dev *hdev, u16 tqp_id, + u16 stream_id, bool enable) { struct hclgevf_cfg_com_tqp_queue_cmd *req; struct hclgevf_desc desc; - int status; req = (struct hclgevf_cfg_com_tqp_queue_cmd *)desc.data; @@ -1257,12 +1258,22 @@ static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id, if (enable) req->enable |= 1U << HCLGEVF_TQP_ENABLE_B; - status = hclgevf_cmd_send(&hdev->hw, &desc, 1); - if (status) - dev_err(&hdev->pdev->dev, - "TQP enable fail, status =%d.\n", status); + return hclgevf_cmd_send(&hdev->hw, &desc, 1); +} - return status; +static int hclgevf_tqp_enable(struct hnae3_handle *handle, bool enable) +{ + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + int ret; + u16 i; + + for (i = 0; i < handle->kinfo.num_tqps; i++) { + ret = hclgevf_tqp_enable_cmd_send(hdev, i, 0, enable); + if (ret) + return ret; + } + + return 0; } static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle) @@ -1711,20 +1722,39 @@ static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); } -static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id) +static int hclgevf_reset_tqp(struct hnae3_handle *handle) { +#define HCLGEVF_RESET_ALL_QUEUE_DONE 1U struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); struct hclge_vf_to_pf_msg send_msg; + u8 return_status = 0; int ret; + u16 i; /* disable vf queue before send queue reset msg to PF */ - ret = hclgevf_tqp_enable(hdev, queue_id, 0, false); - if (ret) + ret = hclgevf_tqp_enable(handle, false); + if (ret) { + dev_err(&hdev->pdev->dev, "failed to disable tqp, ret = %d\n", + ret); return ret; + } hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0); - memcpy(send_msg.data, &queue_id, sizeof(queue_id)); - return hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); + + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, &return_status, + sizeof(return_status)); + if (ret || return_status == HCLGEVF_RESET_ALL_QUEUE_DONE) + return ret; + + for (i = 1; i < handle->kinfo.num_tqps; i++) { + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0); + memcpy(send_msg.data, &i, sizeof(i)); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0); + if (ret) + return ret; + } + + return 0; } static int hclgevf_set_mtu(struct hnae3_handle *handle, int new_mtu) @@ -2084,10 +2114,11 @@ static void hclgevf_enable_vector(struct hclgevf_misc_vector *vector, bool en) writel(en ? 1 : 0, vector->addr); } -static void hclgevf_flr_prepare(struct hnae3_ae_dev *ae_dev) +static void hclgevf_reset_prepare_general(struct hnae3_ae_dev *ae_dev, + enum hnae3_reset_type rst_type) { -#define HCLGEVF_FLR_RETRY_WAIT_MS 500 -#define HCLGEVF_FLR_RETRY_CNT 5 +#define HCLGEVF_RESET_RETRY_WAIT_MS 500 +#define HCLGEVF_RESET_RETRY_CNT 5 struct hclgevf_dev *hdev = ae_dev->priv; int retry_cnt = 0; @@ -2096,29 +2127,31 @@ static void hclgevf_flr_prepare(struct hnae3_ae_dev *ae_dev) retry: down(&hdev->reset_sem); set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); - hdev->reset_type = HNAE3_FLR_RESET; + hdev->reset_type = rst_type; ret = hclgevf_reset_prepare(hdev); if (ret) { - dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n", + dev_err(&hdev->pdev->dev, "fail to prepare to reset, ret=%d\n", ret); if (hdev->reset_pending || - retry_cnt++ < HCLGEVF_FLR_RETRY_CNT) { + retry_cnt++ < HCLGEVF_RESET_RETRY_CNT) { dev_err(&hdev->pdev->dev, "reset_pending:0x%lx, retry_cnt:%d\n", hdev->reset_pending, retry_cnt); clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); up(&hdev->reset_sem); - msleep(HCLGEVF_FLR_RETRY_WAIT_MS); + msleep(HCLGEVF_RESET_RETRY_WAIT_MS); goto retry; } } - /* disable misc vector before FLR done */ + /* disable misc vector before reset done */ hclgevf_enable_vector(&hdev->misc_vector, false); - hdev->rst_stats.flr_rst_cnt++; + + if (hdev->reset_type == HNAE3_FLR_RESET) + hdev->rst_stats.flr_rst_cnt++; } -static void hclgevf_flr_done(struct hnae3_ae_dev *ae_dev) +static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev) { struct hclgevf_dev *hdev = ae_dev->priv; int ret; @@ -2193,7 +2226,7 @@ static void hclgevf_reset_service_task(struct hclgevf_dev *hdev) if (test_and_clear_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state)) { - /* PF has initmated that it is about to reset the hardware. + /* PF has intimated that it is about to reset the hardware. * We now have to poll & check if hardware has actually * completed the reset sequence. On hardware reset completion, * VF needs to reset the client and ae device. @@ -2307,10 +2340,11 @@ static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev) if (!(hdev->serv_processed_cnt % HCLGEVF_STATS_TIMER_INTERVAL)) hclgevf_tqps_update_stats(handle); - /* request the link status from the PF. PF would be able to tell VF - * about such updates in future so we might remove this later + /* VF does not need to request link status when this bit is set, because + * PF will push its link status to VFs when link status changed. */ - hclgevf_request_link_info(hdev); + if (!test_bit(HCLGEVF_STATE_PF_PUSH_LINK_STATUS, &hdev->state)) + hclgevf_request_link_info(hdev); hclgevf_update_link_mode(hdev); @@ -2356,7 +2390,6 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev, /* fetch the events from their corresponding regs */ cmdq_stat_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_STATE_REG); - if (BIT(HCLGEVF_VECTOR0_RST_INT_B) & cmdq_stat_reg) { rst_ing_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING); dev_info(&hdev->pdev->dev, @@ -2624,28 +2657,26 @@ static int hclgevf_ae_start(struct hnae3_handle *handle) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + clear_bit(HCLGEVF_STATE_DOWN, &hdev->state); + clear_bit(HCLGEVF_STATE_PF_PUSH_LINK_STATUS, &hdev->state); + hclgevf_reset_tqp_stats(handle); hclgevf_request_link_info(hdev); hclgevf_update_link_mode(hdev); - clear_bit(HCLGEVF_STATE_DOWN, &hdev->state); - return 0; } static void hclgevf_ae_stop(struct hnae3_handle *handle) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - int i; set_bit(HCLGEVF_STATE_DOWN, &hdev->state); if (hdev->reset_type != HNAE3_VF_RESET) - for (i = 0; i < handle->kinfo.num_tqps; i++) - if (hclgevf_reset_tqp(handle, i)) - break; + hclgevf_reset_tqp(handle); hclgevf_reset_tqp_stats(handle); hclgevf_update_link_status(hdev, 0); @@ -3497,7 +3528,7 @@ static int hclgevf_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, if (ret) return ret; - /* RSS indirection table has been configuared by user */ + /* RSS indirection table has been configured by user */ if (rxfh_configured) goto out; @@ -3615,7 +3646,7 @@ static void hclgevf_get_link_mode(struct hnae3_handle *handle, } #define MAX_SEPARATE_NUM 4 -#define SEPARATOR_VALUE 0xFFFFFFFF +#define SEPARATOR_VALUE 0xFDFCFBFA #define REG_NUM_PER_LINE 4 #define REG_LEN_PER_LINE (REG_NUM_PER_LINE * sizeof(u32)) @@ -3722,8 +3753,8 @@ void hclgevf_update_port_base_vlan_info(struct hclgevf_dev *hdev, u16 state, static const struct hnae3_ae_ops hclgevf_ops = { .init_ae_dev = hclgevf_init_ae_dev, .uninit_ae_dev = hclgevf_uninit_ae_dev, - .flr_prepare = hclgevf_flr_prepare, - .flr_done = hclgevf_flr_done, + .reset_prepare = hclgevf_reset_prepare_general, + .reset_done = hclgevf_reset_done, .init_client_instance = hclgevf_init_client_instance, .uninit_client_instance = hclgevf_uninit_client_instance, .start = hclgevf_ae_start, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 8c27ecd819af..265c9b0b4728 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -152,6 +152,7 @@ enum hclgevf_states { HCLGEVF_STATE_LINK_UPDATING, HCLGEVF_STATE_PROMISC_CHANGED, HCLGEVF_STATE_RST_FAIL, + HCLGEVF_STATE_PF_PUSH_LINK_STATUS, }; struct hclgevf_mac { @@ -176,9 +177,9 @@ struct hclgevf_hw { /* TQP stats */ struct hlcgevf_tqp_stats { - /* query_tqp_tx_queue_statistics ,opcode id: 0x0B03 */ + /* query_tqp_tx_queue_statistics, opcode id: 0x0B03 */ u64 rcb_tx_ring_pktnum_rcd; /* 32bit */ - /* query_tqp_rx_queue_statistics ,opcode id: 0x0B13 */ + /* query_tqp_rx_queue_statistics, opcode id: 0x0B13 */ u64 rcb_rx_ring_pktnum_rcd; /* 32bit */ }; @@ -192,7 +193,6 @@ struct hclgevf_tqp { }; struct hclgevf_cfg { - u8 vmdq_vport_num; u8 tc_num; u16 tqp_desc_num; u16 rx_buf_len; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index 5b2dcd97c107..9b17735b9f4c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -276,6 +276,7 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) u8 duplex; u32 speed; u32 tail; + u8 flag; u8 idx; /* we can safely clear it now as we are at start of the async message @@ -300,11 +301,16 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) link_status = msg_q[1]; memcpy(&speed, &msg_q[2], sizeof(speed)); duplex = (u8)msg_q[4]; + flag = (u8)msg_q[5]; /* update upper layer with new link link status */ hclgevf_update_link_status(hdev, link_status); hclgevf_update_speed_duplex(hdev, speed, duplex); + if (flag & HCLGE_MBX_PUSH_LINK_STATUS_EN) + set_bit(HCLGEVF_STATE_PF_PUSH_LINK_STATUS, + &hdev->state); + break; case HCLGE_MBX_LINK_STAT_MODE: idx = (u8)msg_q[1]; diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 883d0d7c6858..3e54017a2a5b 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -279,7 +279,7 @@ static int hns_mdio_write(struct mii_bus *bus, static int hns_mdio_read(struct mii_bus *bus, int phy_id, int regnum) { int ret; - u16 reg_val = 0; + u16 reg_val; u8 devad = ((regnum >> 16) & 0x1f); u8 is_c45 = !!(regnum & MII_ADDR_C45); u16 reg = (u16)(regnum & 0xffff); @@ -420,7 +420,7 @@ static int hns_mdio_probe(struct platform_device *pdev) { struct hns_mdio_device *mdio_dev; struct mii_bus *new_bus; - int ret = -ENODEV; + int ret; if (!pdev) { dev_err(NULL, "pdev is NULL!\r\n"); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c index c340d9acba80..dc024ef521c0 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c @@ -34,7 +34,7 @@ #include "hinic_rx.h" #include "hinic_dev.h" -#define SET_LINK_STR_MAX_LEN 128 +#define SET_LINK_STR_MAX_LEN 16 #define GET_SUPPORTED_MODE 0 #define GET_ADVERTISED_MODE 1 @@ -462,24 +462,19 @@ static int hinic_set_settings_to_hw(struct hinic_dev *nic_dev, { struct hinic_link_ksettings_info settings = {0}; char set_link_str[SET_LINK_STR_MAX_LEN] = {0}; + const char *autoneg_str; struct net_device *netdev = nic_dev->netdev; enum nic_speed_level speed_level = 0; int err; - err = snprintf(set_link_str, SET_LINK_STR_MAX_LEN, "%s", - (set_settings & HILINK_LINK_SET_AUTONEG) ? - (autoneg ? "autong enable " : "autong disable ") : ""); - if (err < 0 || err >= SET_LINK_STR_MAX_LEN) { - netif_err(nic_dev, drv, netdev, "Failed to snprintf link state, function return(%d) and dest_len(%d)\n", - err, SET_LINK_STR_MAX_LEN); - return -EFAULT; - } + autoneg_str = (set_settings & HILINK_LINK_SET_AUTONEG) ? + (autoneg ? "autong enable " : "autong disable ") : ""; if (set_settings & HILINK_LINK_SET_SPEED) { speed_level = hinic_ethtool_to_hw_speed_level(speed); err = snprintf(set_link_str, SET_LINK_STR_MAX_LEN, - "%sspeed %d ", set_link_str, speed); - if (err <= 0 || err >= SET_LINK_STR_MAX_LEN) { + "speed %d ", speed); + if (err >= SET_LINK_STR_MAX_LEN) { netif_err(nic_dev, drv, netdev, "Failed to snprintf link speed, function return(%d) and dest_len(%d)\n", err, SET_LINK_STR_MAX_LEN); return -EFAULT; @@ -494,11 +489,11 @@ static int hinic_set_settings_to_hw(struct hinic_dev *nic_dev, err = hinic_set_link_settings(nic_dev->hwdev, &settings); if (err != HINIC_MGMT_CMD_UNSUPPORTED) { if (err) - netif_err(nic_dev, drv, netdev, "Set %s failed\n", - set_link_str); + netif_err(nic_dev, drv, netdev, "Set %s%sfailed\n", + autoneg_str, set_link_str); else - netif_info(nic_dev, drv, netdev, "Set %s successfully\n", - set_link_str); + netif_info(nic_dev, drv, netdev, "Set %s%ssuccessfully\n", + autoneg_str, set_link_str); return err; } @@ -543,8 +538,8 @@ static void hinic_get_drvinfo(struct net_device *netdev, struct hinic_hwif *hwif = hwdev->hwif; int err; - strlcpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver)); - strlcpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info)); + strscpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver)); + strscpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info)); err = hinic_get_mgmt_version(nic_dev, mgmt_ver); if (err) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c index 4e4029d5c8e1..06586173add7 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_api_cmd.c @@ -629,10 +629,8 @@ static int alloc_cmd_buf(struct hinic_api_cmd_chain *chain, cmd_vaddr = dma_alloc_coherent(&pdev->dev, API_CMD_BUF_SIZE, &cmd_paddr, GFP_KERNEL); - if (!cmd_vaddr) { - dev_err(&pdev->dev, "Failed to allocate API CMD DMA memory\n"); + if (!cmd_vaddr) return -ENOMEM; - } cell_ctxt = &chain->cell_ctxt[cell_idx]; @@ -679,10 +677,8 @@ static int api_cmd_create_cell(struct hinic_api_cmd_chain *chain, node = dma_alloc_coherent(&pdev->dev, chain->cell_size, &node_paddr, GFP_KERNEL); - if (!node) { - dev_err(&pdev->dev, "Failed to allocate dma API CMD cell\n"); + if (!node) return -ENOMEM; - } node->read.hw_wb_resp_paddr = 0; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c index efbaed389440..cab38ff0713c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c @@ -334,7 +334,7 @@ static void set_dma_attr(struct hinic_hwif *hwif, u32 entry_idx, } /** - * dma_attr_table_init - initialize the the default dma attributes + * dma_attr_table_init - initialize the default dma attributes * @hwif: the HW interface of a pci function device **/ static void dma_attr_init(struct hinic_hwif *hwif) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c index 819fa13034c0..817173f1fbb7 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c @@ -440,18 +440,14 @@ static void mgmt_recv_msg_handler(struct hinic_pf_to_mgmt *pf_to_mgmt, struct hinic_recv_msg *recv_msg) { struct hinic_mgmt_msg_handle_work *mgmt_work = NULL; - struct pci_dev *pdev = pf_to_mgmt->hwif->pdev; mgmt_work = kzalloc(sizeof(*mgmt_work), GFP_KERNEL); - if (!mgmt_work) { - dev_err(&pdev->dev, "Allocate mgmt work memory failed\n"); + if (!mgmt_work) return; - } if (recv_msg->msg_len) { mgmt_work->msg = kzalloc(recv_msg->msg_len, GFP_KERNEL); if (!mgmt_work->msg) { - dev_err(&pdev->dev, "Allocate mgmt msg memory failed\n"); kfree(mgmt_work); return; } diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c index fcf7bfe4aa47..dcba4d009bad 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c @@ -414,7 +414,6 @@ int hinic_init_rq(struct hinic_rq *rq, struct hinic_hwif *hwif, rq->pi_virt_addr = dma_alloc_coherent(&pdev->dev, pi_size, &rq->pi_dma_addr, GFP_KERNEL); if (!rq->pi_virt_addr) { - dev_err(&pdev->dev, "Failed to allocate PI address\n"); err = -ENOMEM; goto err_pi_virt; } diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 070a7cc6392e..cce08647b9b2 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -137,10 +137,8 @@ static struct sk_buff *rx_alloc_skb(struct hinic_rxq *rxq, int err; skb = netdev_alloc_skb_ip_align(rxq->netdev, rxq->rq->buf_sz); - if (!skb) { - netdev_err(rxq->netdev, "Failed to allocate Rx SKB\n"); + if (!skb) return NULL; - } addr = dma_map_single(&pdev->dev, skb->data, rxq->rq->buf_sz, DMA_FROM_DEVICE); @@ -212,10 +210,8 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq) for (i = 0; i < free_wqebbs; i++) { skb = rx_alloc_skb(rxq, &dma_addr); - if (!skb) { - netdev_err(rxq->netdev, "Failed to alloc Rx skb\n"); + if (!skb) goto skb_out; - } hinic_set_sge(&sge, dma_addr, skb->len); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 8da7d46363b2..710c4ff7bc0e 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -377,6 +377,7 @@ static int offload_csum(struct hinic_sq_task *task, u32 *queue_info, } else if (ip.v4->version == 6) { unsigned char *exthdr; __be16 frag_off; + l3_type = IPV6_PKT; tunnel_type = TUNNEL_UDP_CSUM; exthdr = ip.hdr + sizeof(*ip.v6); diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index c2e740475786..ea55314b209d 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -109,6 +109,7 @@ static const struct of_device_id ehea_device_table[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, ehea_device_table); static struct platform_driver ehea_driver = { .driver = { diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 9c6438d3b3a5..5788bb956d73 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -827,6 +827,30 @@ static void release_napi(struct ibmvnic_adapter *adapter) adapter->napi_enabled = false; } +static const char *adapter_state_to_string(enum vnic_state state) +{ + switch (state) { + case VNIC_PROBING: + return "PROBING"; + case VNIC_PROBED: + return "PROBED"; + case VNIC_OPENING: + return "OPENING"; + case VNIC_OPEN: + return "OPEN"; + case VNIC_CLOSING: + return "CLOSING"; + case VNIC_CLOSED: + return "CLOSED"; + case VNIC_REMOVING: + return "REMOVING"; + case VNIC_REMOVED: + return "REMOVED"; + default: + return "UNKNOWN"; + } +} + static int ibmvnic_login(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); @@ -905,7 +929,7 @@ static int ibmvnic_login(struct net_device *netdev) __ibmvnic_set_mac(netdev, adapter->mac_addr); - netdev_dbg(netdev, "[S:%d] Login succeeded\n", adapter->state); + netdev_dbg(netdev, "[S:%s] Login succeeded\n", adapter_state_to_string(adapter->state)); return 0; } @@ -1149,19 +1173,13 @@ static int __ibmvnic_open(struct net_device *netdev) rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); if (rc) { - for (i = 0; i < adapter->req_rx_queues; i++) - napi_disable(&adapter->napi[i]); + ibmvnic_napi_disable(adapter); release_resources(adapter); return rc; } netif_tx_start_all_queues(netdev); - if (prev_state == VNIC_CLOSED) { - for (i = 0; i < adapter->req_rx_queues; i++) - napi_schedule(&adapter->napi[i]); - } - adapter->state = VNIC_OPEN; return rc; } @@ -1185,8 +1203,9 @@ static int ibmvnic_open(struct net_device *netdev) * honor our setting below. */ if (adapter->failover_pending || (test_bit(0, &adapter->resetting))) { - netdev_dbg(netdev, "[S:%d FOP:%d] Resetting, deferring open\n", - adapter->state, adapter->failover_pending); + netdev_dbg(netdev, "[S:%s FOP:%d] Resetting, deferring open\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending); adapter->state = VNIC_OPEN; rc = 0; goto out; @@ -1350,8 +1369,9 @@ static int ibmvnic_close(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); int rc; - netdev_dbg(netdev, "[S:%d FOP:%d FRR:%d] Closing\n", - adapter->state, adapter->failover_pending, + netdev_dbg(netdev, "[S:%s FOP:%d FRR:%d] Closing\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, adapter->force_reset_recovery); /* If device failover is pending, just set device state and return. @@ -1678,9 +1698,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - memcpy(dst + cur, - page_address(skb_frag_page(frag)) + - skb_frag_off(frag), skb_frag_size(frag)); + memcpy(dst + cur, skb_frag_address(frag), + skb_frag_size(frag)); cur += skb_frag_size(frag); } } else { @@ -1912,6 +1931,26 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) return rc; } +static const char *reset_reason_to_string(enum ibmvnic_reset_reason reason) +{ + switch (reason) { + case VNIC_RESET_FAILOVER: + return "FAILOVER"; + case VNIC_RESET_MOBILITY: + return "MOBILITY"; + case VNIC_RESET_FATAL: + return "FATAL"; + case VNIC_RESET_NON_FATAL: + return "NON_FATAL"; + case VNIC_RESET_TIMEOUT: + return "TIMEOUT"; + case VNIC_RESET_CHANGE_PARAM: + return "CHANGE_PARAM"; + default: + return "UNKNOWN"; + } +} + /* * do_reset returns zero if we are able to keep processing reset events, or * non-zero if we hit a fatal error and must halt. @@ -1922,12 +1961,14 @@ static int do_reset(struct ibmvnic_adapter *adapter, u64 old_num_rx_queues, old_num_tx_queues; u64 old_num_rx_slots, old_num_tx_slots; struct net_device *netdev = adapter->netdev; - int i, rc; + int rc; netdev_dbg(adapter->netdev, - "[S:%d FOP:%d] Reset reason %d, reset_state %d\n", - adapter->state, adapter->failover_pending, - rwi->reset_reason, reset_state); + "[S:%s FOP:%d] Reset reason: %s, reset_state: %s\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, + reset_reason_to_string(rwi->reset_reason), + adapter_state_to_string(reset_state)); adapter->reset_reason = rwi->reset_reason; /* requestor of VNIC_RESET_CHANGE_PARAM already has the rtnl lock */ @@ -1987,8 +2028,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, * from VNIC_CLOSING state. */ netdev_dbg(netdev, - "Open changed state from %d, updating.\n", - reset_state); + "Open changed state from %s, updating.\n", + adapter_state_to_string(reset_state)); reset_state = VNIC_OPEN; adapter->state = VNIC_CLOSING; } @@ -2111,10 +2152,6 @@ static int do_reset(struct ibmvnic_adapter *adapter, /* refresh device's multicast list */ ibmvnic_set_multi(netdev); - /* kick napi */ - for (i = 0; i < adapter->req_rx_queues; i++) - napi_schedule(&adapter->napi[i]); - if (adapter->reset_reason == VNIC_RESET_FAILOVER || adapter->reset_reason == VNIC_RESET_MOBILITY) __netdev_notify_peers(netdev); @@ -2129,8 +2166,9 @@ out: if (!(adapter->reset_reason == VNIC_RESET_CHANGE_PARAM)) rtnl_unlock(); - netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Reset done, rc %d\n", - adapter->state, adapter->failover_pending, rc); + netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Reset done, rc %d\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, rc); return rc; } @@ -2140,8 +2178,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter, struct net_device *netdev = adapter->netdev; int rc; - netdev_dbg(adapter->netdev, "Hard resetting driver (%d)\n", - rwi->reset_reason); + netdev_dbg(adapter->netdev, "Hard resetting driver (%s)\n", + reset_reason_to_string(rwi->reset_reason)); /* read the state and check (again) after getting rtnl */ reset_state = adapter->state; @@ -2207,8 +2245,9 @@ out: /* restore adapter state if reset failed */ if (rc) adapter->state = reset_state; - netdev_dbg(adapter->netdev, "[S:%d FOP:%d] Hard reset done, rc %d\n", - adapter->state, adapter->failover_pending, rc); + netdev_dbg(adapter->netdev, "[S:%s FOP:%d] Hard reset done, rc %d\n", + adapter_state_to_string(adapter->state), + adapter->failover_pending, rc); return rc; } @@ -2243,8 +2282,9 @@ static void __ibmvnic_reset(struct work_struct *work) adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); if (test_and_set_bit_lock(0, &adapter->resetting)) { - schedule_delayed_work(&adapter->ibmvnic_delayed_reset, - IBMVNIC_RESET_DELAY); + queue_delayed_work(system_long_wq, + &adapter->ibmvnic_delayed_reset, + IBMVNIC_RESET_DELAY); return; } @@ -2287,8 +2327,8 @@ static void __ibmvnic_reset(struct work_struct *work) if (rc) { /* give backing device time to settle down */ netdev_dbg(adapter->netdev, - "[S:%d] Hard reset failed, waiting 60 secs\n", - adapter->state); + "[S:%s] Hard reset failed, waiting 60 secs\n", + adapter_state_to_string(adapter->state)); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(60 * HZ); } @@ -2316,8 +2356,9 @@ static void __ibmvnic_reset(struct work_struct *work) clear_bit_unlock(0, &adapter->resetting); netdev_dbg(adapter->netdev, - "[S:%d FRR:%d WFR:%d] Done processing resets\n", - adapter->state, adapter->force_reset_recovery, + "[S:%s FRR:%d WFR:%d] Done processing resets\n", + adapter_state_to_string(adapter->state), + adapter->force_reset_recovery, adapter->wait_for_reset); } @@ -2364,8 +2405,8 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, list_for_each(entry, &adapter->rwi_list) { tmp = list_entry(entry, struct ibmvnic_rwi, list); if (tmp->reset_reason == reason) { - netdev_dbg(netdev, "Skipping matching reset, reason=%d\n", - reason); + netdev_dbg(netdev, "Skipping matching reset, reason=%s\n", + reset_reason_to_string(reason)); ret = EBUSY; goto err; } @@ -2385,8 +2426,9 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, } rwi->reset_reason = reason; list_add_tail(&rwi->list, &adapter->rwi_list); - netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason); - schedule_work(&adapter->ibmvnic_reset); + netdev_dbg(adapter->netdev, "Scheduling reset (reason %s)\n", + reset_reason_to_string(reason)); + queue_work(system_long_wq, &adapter->ibmvnic_reset); ret = 0; err: @@ -3204,9 +3246,6 @@ restart_loop: next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { - if (next->tx_comp.rcs[i]) - dev_err(dev, "tx error %x\n", - next->tx_comp.rcs[i]); index = be32_to_cpu(next->tx_comp.correlators[i]); if (index & IBMVNIC_TSO_POOL_MASK) { tx_pool = &adapter->tso_pool[pool]; @@ -3220,7 +3259,13 @@ restart_loop: num_entries += txbuff->num_entries; if (txbuff->skb) { total_bytes += txbuff->skb->len; - dev_consume_skb_irq(txbuff->skb); + if (next->tx_comp.rcs[i]) { + dev_err(dev, "tx error %x\n", + next->tx_comp.rcs[i]); + dev_kfree_skb_irq(txbuff->skb); + } else { + dev_consume_skb_irq(txbuff->skb); + } txbuff->skb = NULL; } else { netdev_warn(adapter->netdev, @@ -5452,7 +5497,7 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr, if (rc) { netdev_err(netdev, "Couldn't retrieve session token, rc %ld\n", rc); - return -EINVAL; + goto last_resort; } session_token = (__be64)retbuf[0]; @@ -5460,15 +5505,17 @@ static ssize_t failover_store(struct device *dev, struct device_attribute *attr, be64_to_cpu(session_token)); rc = plpar_hcall_norets(H_VIOCTL, adapter->vdev->unit_address, H_SESSION_ERR_DETECTED, session_token, 0, 0); - if (rc) { - netdev_err(netdev, "Client initiated failover failed, rc %ld\n", + if (rc) + netdev_err(netdev, + "H_VIOCTL initiated failover failed, rc %ld\n", rc); - return -EINVAL; - } + +last_resort: + netdev_dbg(netdev, "Trying to send CRQ_CMD, the last resort\n"); + ibmvnic_reset(adapter, VNIC_RESET_FAILOVER); return count; } - static DEVICE_ATTR_WO(failover); static unsigned long ibmvnic_get_desired_dma(struct vio_dev *vdev) diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 806aa75a4e86..c1d39a748546 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -412,77 +412,6 @@ struct ibmvnic_control_ip_offload { struct ibmvnic_rc rc; } __packed __aligned(8); -struct ibmvnic_request_dump_size { - u8 first; - u8 cmd; - u8 reserved[6]; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_request_dump { - u8 first; - u8 cmd; - u8 reserved1[2]; - __be32 ioba; - __be32 len; - u8 reserved2[4]; -} __packed __aligned(8); - -struct ibmvnic_request_dump_rsp { - u8 first; - u8 cmd; - u8 reserved[6]; - __be32 dumped_len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_request_ras_comp_num { - u8 first; - u8 cmd; - u8 reserved1[2]; - __be32 num_components; - u8 reserved2[4]; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_request_ras_comps { - u8 first; - u8 cmd; - u8 reserved[2]; - __be32 ioba; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_control_ras { - u8 first; - u8 cmd; - u8 correlator; - u8 level; - u8 op; -#define IBMVNIC_TRACE_LEVEL 1 -#define IBMVNIC_ERROR_LEVEL 2 -#define IBMVNIC_TRACE_PAUSE 3 -#define IBMVNIC_TRACE_RESUME 4 -#define IBMVNIC_TRACE_ON 5 -#define IBMVNIC_TRACE_OFF 6 -#define IBMVNIC_CHG_TRACE_BUFF_SZ 7 - u8 trace_buff_sz[3]; - u8 reserved[4]; - struct ibmvnic_rc rc; -} __packed __aligned(8); - -struct ibmvnic_collect_fw_trace { - u8 first; - u8 cmd; - u8 correlator; - u8 reserved; - __be32 ioba; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - struct ibmvnic_request_statistics { u8 first; u8 cmd; @@ -494,15 +423,6 @@ struct ibmvnic_request_statistics { u8 reserved[4]; } __packed __aligned(8); -struct ibmvnic_request_debug_stats { - u8 first; - u8 cmd; - u8 reserved[2]; - __be32 ioba; - __be32 len; - struct ibmvnic_rc rc; -} __packed __aligned(8); - struct ibmvnic_error_indication { u8 first; u8 cmd; @@ -677,22 +597,8 @@ union ibmvnic_crq { struct ibmvnic_query_ip_offload query_ip_offload_rsp; struct ibmvnic_control_ip_offload control_ip_offload; struct ibmvnic_control_ip_offload control_ip_offload_rsp; - struct ibmvnic_request_dump_size request_dump_size; - struct ibmvnic_request_dump_size request_dump_size_rsp; - struct ibmvnic_request_dump request_dump; - struct ibmvnic_request_dump_rsp request_dump_rsp; - struct ibmvnic_request_ras_comp_num request_ras_comp_num; - struct ibmvnic_request_ras_comp_num request_ras_comp_num_rsp; - struct ibmvnic_request_ras_comps request_ras_comps; - struct ibmvnic_request_ras_comps request_ras_comps_rsp; - struct ibmvnic_control_ras control_ras; - struct ibmvnic_control_ras control_ras_rsp; - struct ibmvnic_collect_fw_trace collect_fw_trace; - struct ibmvnic_collect_fw_trace collect_fw_trace_rsp; struct ibmvnic_request_statistics request_statistics; struct ibmvnic_generic_crq request_statistics_rsp; - struct ibmvnic_request_debug_stats request_debug_stats; - struct ibmvnic_request_debug_stats request_debug_stats_rsp; struct ibmvnic_error_indication error_indication; struct ibmvnic_link_state_indication link_state_indication; struct ibmvnic_change_mac_addr change_mac_addr; diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 5aa86318ed3e..c1d155690341 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -294,6 +294,7 @@ config ICE tristate "Intel(R) Ethernet Connection E800 Series Support" default n depends on PCI_MSI + select DIMLIB select NET_DEVLINK select PLDMFW help diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c index 4c0c9433bd60..19cf36360933 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.c +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c @@ -1183,6 +1183,7 @@ static s32 e1000_copper_link_igp_setup(struct e1000_hw *hw) break; case e1000_ms_auto: phy_data &= ~CR_1000T_MS_ENABLE; + break; default: break; } diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 88faf05e23ba..0b1e890dd583 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -899,6 +899,8 @@ static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active) } else { data &= ~IGP02E1000_PM_D0_LPLU; ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data); + if (ret_val) + return ret_val; /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index 69a2329ea463..db79c4e6413e 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright(c) 1999 - 2018 Intel Corporation. */ -#ifndef _E1000_HW_H_ -#define _E1000_HW_H_ +#ifndef _E1000E_HW_H_ +#define _E1000E_HW_H_ #include "regs.h" #include "defines.h" @@ -714,4 +714,4 @@ struct e1000_hw { #include "80003es2lan.h" #include "ich8lan.h" -#endif +#endif /* _E1000E_HW_H_ */ diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 0ac8d79a7987..590ad110d383 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -2745,7 +2745,7 @@ release: } /** - * e1000_k1_gig_workaround_lv - K1 Si workaround + * e1000_k1_workaround_lv - K1 Si workaround * @hw: pointer to the HW structure * * Workaround to set the K1 beacon duration for 82579 parts in 10Mbps @@ -5220,7 +5220,7 @@ void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, } /** - * e1000_ipg3_phy_powerdown_workaround_ich8lan - Power down workaround on D3 + * e1000e_igp3_phy_powerdown_workaround_ich8lan - Power down workaround on D3 * @hw: pointer to the HW structure * * Workaround for 82566 power-down on D3 entry: diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index e9b82c209c2d..88e9035b75cf 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -25,6 +25,7 @@ #include <linux/pm_runtime.h> #include <linux/aer.h> #include <linux/prefetch.h> +#include <linux/suspend.h> #include "e1000.h" @@ -5974,19 +5975,23 @@ static void e1000_reset_task(struct work_struct *work) struct e1000_adapter *adapter; adapter = container_of(work, struct e1000_adapter, reset_task); + rtnl_lock(); /* don't run the task if already down */ - if (test_bit(__E1000_DOWN, &adapter->state)) + if (test_bit(__E1000_DOWN, &adapter->state)) { + rtnl_unlock(); return; + } if (!(adapter->flags & FLAG_RESTART_NOW)) { e1000e_dump(adapter); e_err("Reset adapter unexpectedly\n"); } e1000e_reinit_locked(adapter); + rtnl_unlock(); } /** - * e1000_get_stats64 - Get System Network Statistics + * e1000e_get_stats64 - Get System Network Statistics * @netdev: network interface device structure * @stats: rtnl_link_stats64 pointer * @@ -6159,7 +6164,7 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, } /** - * e1000e_hwtstamp_ioctl - control hardware time stamping + * e1000e_hwtstamp_set - control hardware time stamping * @netdev: network interface device structure * @ifr: interface request * @@ -6817,7 +6822,7 @@ static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state) } /** - * e1000e_disable_aspm_locked Disable ASPM states. + * e1000e_disable_aspm_locked - Disable ASPM states. * @pdev: pointer to PCI device struct * @state: bit-mask of ASPM states to disable * @@ -6918,6 +6923,12 @@ static int __e1000_resume(struct pci_dev *pdev) return 0; } +static __maybe_unused int e1000e_pm_prepare(struct device *dev) +{ + return pm_runtime_suspended(dev) && + pm_suspend_via_firmware(); +} + static __maybe_unused int e1000e_pm_suspend(struct device *dev) { struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); @@ -7626,9 +7637,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) e1000_print_device_info(adapter); - dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE); - if (pci_dev_run_wake(pdev) && hw->mac.type < e1000_pch_cnp) + if (pci_dev_run_wake(pdev) && hw->mac.type != e1000_pch_cnp) pm_runtime_put_noidle(&pdev->dev); return 0; @@ -7851,6 +7862,7 @@ MODULE_DEVICE_TABLE(pci, e1000_pci_tbl); static const struct dev_pm_ops e1000_pm_ops = { #ifdef CONFIG_PM_SLEEP + .prepare = e1000e_pm_prepare, .suspend = e1000e_pm_suspend, .resume = e1000e_pm_resume, .freeze = e1000e_pm_freeze, diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index bdd9dc163f15..1db35b2c7750 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -371,7 +371,7 @@ s32 e1000e_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data) } /** - * e1000e_write_phy_reg_igp - Write igp PHY register + * __e1000e_write_phy_reg_igp - Write igp PHY register * @hw: pointer to the HW structure * @offset: register offset to write to * @data: data to write at register offset diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index f3f671311855..9e79d672f4f1 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -142,7 +142,7 @@ static int e1000e_phc_get_syncdevicetime(ktime_t *device, } /** - * e1000e_phc_getsynctime - Reads the current system/device cross timestamp + * e1000e_phc_getcrosststamp - Reads the current system/device cross timestamp * @ptp: ptp clock structure * @xtstamp: structure containing timestamp * diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c index c45315472245..86397c564dfc 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c @@ -105,7 +105,7 @@ static int fm10k_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) } /** - * fm10k_dcbnl_ieee_getdcbx - get the DCBX configuration for the device + * fm10k_dcbnl_getdcbx - get the DCBX configuration for the device * @dev: netdev interface for the device * * Returns that we support only IEEE DCB for this interface @@ -116,7 +116,7 @@ static u8 fm10k_dcbnl_getdcbx(struct net_device __always_unused *dev) } /** - * fm10k_dcbnl_ieee_setdcbx - get the DCBX configuration for the device + * fm10k_dcbnl_setdcbx - get the DCBX configuration for the device * @dev: netdev interface for the device * @mode: new mode for this device * diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c index 1d27b2fb23af..5c77054d67c6 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c @@ -185,7 +185,7 @@ void fm10k_dbg_q_vector_init(struct fm10k_q_vector *q_vector) } /** - * fm10k_dbg_free_q_vector_dir - setup debugfs for the q_vectors + * fm10k_dbg_q_vector_exit - setup debugfs for the q_vectors * @q_vector: q_vector to allocate directories for **/ void fm10k_dbg_q_vector_exit(struct fm10k_q_vector *q_vector) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 247f44f4cb30..3362f26d7f99 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1774,7 +1774,7 @@ static void fm10k_free_q_vectors(struct fm10k_intfc *interface) } /** - * f10k_reset_msix_capability - reset MSI-X capability + * fm10k_reset_msix_capability - reset MSI-X capability * @interface: board private structure to initialize * * Reset the MSI-X capability back to its starting state @@ -1787,7 +1787,7 @@ static void fm10k_reset_msix_capability(struct fm10k_intfc *interface) } /** - * f10k_init_msix_capability - configure MSI-X capability + * fm10k_init_msix_capability - configure MSI-X capability * @interface: board private structure to initialize * * Attempt to configure the interrupts using the best available diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c index 8e2e92bf3cd4..30ca9ee1900b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c @@ -692,7 +692,7 @@ static bool fm10k_mbx_tx_complete(struct fm10k_mbx_info *mbx) } /** - * fm10k_mbx_deqeueue_rx - Dequeues the message from the head in the Rx FIFO + * fm10k_mbx_dequeue_rx - Dequeues the message from the head in the Rx FIFO * @hw: pointer to hardware structure * @mbx: pointer to mailbox * @@ -1039,6 +1039,7 @@ static s32 fm10k_mbx_create_reply(struct fm10k_hw *hw, case FM10K_STATE_CLOSED: /* generate new header based on data */ fm10k_mbx_create_disconnect_hdr(mbx); + break; default: break; } @@ -2017,6 +2018,7 @@ static s32 fm10k_sm_mbx_process_reset(struct fm10k_hw *hw, case FM10K_STATE_CONNECT: /* Update remote value to match local value */ mbx->remote = mbx->local; + break; default: break; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index c0780c3624c8..af1b0cde3670 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1417,7 +1417,7 @@ s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *hw, u32 **results, } /** - * fm10k_update_stats_hw_pf - Updates hardware related statistics of PF + * fm10k_update_hw_stats_pf - Updates hardware related statistics of PF * @hw: pointer to hardware structure * @stats: pointer to the stats structure to update * diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index cd53981fa5e0..9067cd3ce243 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -66,6 +66,8 @@ #define I40E_FDIR_RING_COUNT 32 #define I40E_MAX_AQ_BUF_SIZE 4096 #define I40E_AQ_LEN 256 +#define I40E_MIN_ARQ_LEN 1 +#define I40E_MIN_ASQ_LEN 2 #define I40E_AQ_WORK_LIMIT 66 /* max number of VFs + a little */ #define I40E_MAX_USER_PRIORITY 8 #define I40E_DEFAULT_TRAFFIC_CLASS BIT(0) @@ -142,6 +144,7 @@ enum i40e_state_t { __I40E_VIRTCHNL_OP_PENDING, __I40E_RECOVERY_MODE, __I40E_VF_RESETS_DISABLED, /* disable resets during i40e_remove */ + __I40E_VFS_RELEASING, /* This must be last as it determines the size of the BITMAP */ __I40E_STATE_SIZE__, }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index ec19e18305ec..41b813fe07a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -2332,7 +2332,7 @@ i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, } /** - * i40e_get_vsi_params - get VSI configuration info + * i40e_aq_get_vsi_params - get VSI configuration info * @hw: pointer to the hw struct * @vsi_ctx: pointer to a vsi context struct * @cmd_details: pointer to command details structure or NULL @@ -2586,7 +2586,7 @@ i40e_status i40e_get_link_status(struct i40e_hw *hw, bool *link_up) } /** - * i40e_updatelink_status - update status of the HW network link + * i40e_update_link_info - update status of the HW network link * @hw: pointer to the hw struct **/ noinline_for_stack i40e_status i40e_update_link_info(struct i40e_hw *hw) @@ -5059,7 +5059,7 @@ u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num) } /** - * i40e_blink_phy_led + * i40e_blink_phy_link_led * @hw: pointer to the HW structure * @time: time how long led will blinks in secs * @interval: gap between LED on and off in msecs diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 243b0d2b7b72..673f341f4c0c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -234,7 +234,7 @@ static void i40e_parse_ieee_app_tlv(struct i40e_lldp_org_tlv *tlv, } /** - * i40e_parse_ieee_etsrec_tlv + * i40e_parse_ieee_tlv * @tlv: IEEE 802.1Qaz TLV * @dcbcfg: Local store to update ETS REC data * @@ -1588,7 +1588,7 @@ void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share, } /** - * i40e_dcb_hw_rx_ets_bw_config + * i40e_dcb_hw_rx_up2tc_config * @hw: pointer to the hw struct * @prio_tc: priority to tc assignment indexed by priority * diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c index 0345132a0ef5..e32c61909b31 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c @@ -392,7 +392,7 @@ static void i40e_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc, } /** - * i40e_dcbnl_set_pg_tc_cfg_tx - Set CEE PG Tx BW config + * i40e_dcbnl_set_pg_bwg_cfg_tx - Set CEE PG Tx BW config * @netdev: the corresponding netdev * @pgid: the corresponding traffic class * @bw_pct: the BW percentage for the specified traffic class diff --git a/drivers/net/ethernet/intel/i40e/i40e_ddp.c b/drivers/net/ethernet/intel/i40e/i40e_ddp.c index 5e08f100c413..e1069ae658ad 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ddp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ddp.c @@ -77,7 +77,7 @@ static bool i40e_ddp_profiles_overlap(struct i40e_profile_info *new, } /** - * i40e_ddp_does_profiles_ - checks if DDP overlaps with existing one. + * i40e_ddp_does_profile_overlap - checks if DDP overlaps with existing one. * @hw: HW data structure * @pinfo: DDP profile information structure * diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index d7c13ca9be7d..291e61ac3e44 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -578,6 +578,9 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, case RING_TYPE_XDP: ring = kmemdup(vsi->xdp_rings[ring_id], sizeof(*ring), GFP_KERNEL); break; + default: + ring = NULL; + break; } if (!ring) return; @@ -651,7 +654,7 @@ static void i40e_dbg_dump_vsi_no_seid(struct i40e_pf *pf) } /** - * i40e_dbg_dump_stats - handles dump stats write into command datum + * i40e_dbg_dump_eth_stats - handles dump stats write into command datum * @pf: the i40e_pf created in command write * @estats: the eth stats structure to be dumped **/ @@ -1638,7 +1641,7 @@ static const struct file_operations i40e_dbg_command_fops = { static char i40e_dbg_netdev_ops_buf[256] = ""; /** - * i40e_dbg_netdev_ops - read for netdev_ops datum + * i40e_dbg_netdev_ops_read - read for netdev_ops datum * @filp: the opened file * @buffer: where to write the data for the user to read * @count: the size of the user's buffer diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index c70dec65a572..040a01400b85 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -212,7 +212,7 @@ static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[], } /** - * 40e_add_stat_strings - copy stat strings into ethtool buffer + * i40e_add_stat_strings - copy stat strings into ethtool buffer * @p: ethtool supplied buffer * @stats: stat definitions array * @@ -232,6 +232,8 @@ static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[], I40E_STAT(struct i40e_vsi, _name, _stat) #define I40E_VEB_STAT(_name, _stat) \ I40E_STAT(struct i40e_veb, _name, _stat) +#define I40E_VEB_TC_STAT(_name, _stat) \ + I40E_STAT(struct i40e_cp_veb_tc_stats, _name, _stat) #define I40E_PFC_STAT(_name, _stat) \ I40E_STAT(struct i40e_pfc_stats, _name, _stat) #define I40E_QUEUE_STAT(_name, _stat) \ @@ -266,11 +268,18 @@ static const struct i40e_stats i40e_gstrings_veb_stats[] = { I40E_VEB_STAT("veb.rx_unknown_protocol", stats.rx_unknown_protocol), }; +struct i40e_cp_veb_tc_stats { + u64 tc_rx_packets; + u64 tc_rx_bytes; + u64 tc_tx_packets; + u64 tc_tx_bytes; +}; + static const struct i40e_stats i40e_gstrings_veb_tc_stats[] = { - I40E_VEB_STAT("veb.tc_%u_tx_packets", tc_stats.tc_tx_packets), - I40E_VEB_STAT("veb.tc_%u_tx_bytes", tc_stats.tc_tx_bytes), - I40E_VEB_STAT("veb.tc_%u_rx_packets", tc_stats.tc_rx_packets), - I40E_VEB_STAT("veb.tc_%u_rx_bytes", tc_stats.tc_rx_bytes), + I40E_VEB_TC_STAT("veb.tc_%u_tx_packets", tc_tx_packets), + I40E_VEB_TC_STAT("veb.tc_%u_tx_bytes", tc_tx_bytes), + I40E_VEB_TC_STAT("veb.tc_%u_rx_packets", tc_rx_packets), + I40E_VEB_TC_STAT("veb.tc_%u_rx_bytes", tc_rx_bytes), }; static const struct i40e_stats i40e_gstrings_misc_stats[] = { @@ -1101,6 +1110,7 @@ static int i40e_get_link_ksettings(struct net_device *netdev, /* Set flow control settings */ ethtool_link_ksettings_add_link_mode(ks, supported, Pause); + ethtool_link_ksettings_add_link_mode(ks, supported, Asym_Pause); switch (hw->fc.requested_mode) { case I40E_FC_FULL: @@ -2217,6 +2227,29 @@ static int i40e_get_sset_count(struct net_device *netdev, int sset) } /** + * i40e_get_veb_tc_stats - copy VEB TC statistics to formatted structure + * @tc: the TC statistics in VEB structure (veb->tc_stats) + * @i: the index of traffic class in (veb->tc_stats) structure to copy + * + * Copy VEB TC statistics from structure of arrays (veb->tc_stats) to + * one dimensional structure i40e_cp_veb_tc_stats. + * Produce formatted i40e_cp_veb_tc_stats structure of the VEB TC + * statistics for the given TC. + **/ +static struct i40e_cp_veb_tc_stats +i40e_get_veb_tc_stats(struct i40e_veb_tc_stats *tc, unsigned int i) +{ + struct i40e_cp_veb_tc_stats veb_tc = { + .tc_rx_packets = tc->tc_rx_packets[i], + .tc_rx_bytes = tc->tc_rx_bytes[i], + .tc_tx_packets = tc->tc_tx_packets[i], + .tc_tx_bytes = tc->tc_tx_bytes[i], + }; + + return veb_tc; +} + +/** * i40e_get_pfc_stats - copy HW PFC statistics to formatted structure * @pf: the PF device structure * @i: the priority value to copy @@ -2300,8 +2333,16 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, i40e_gstrings_veb_stats); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) - i40e_add_ethtool_stats(&data, veb_stats ? veb : NULL, - i40e_gstrings_veb_tc_stats); + if (veb_stats) { + struct i40e_cp_veb_tc_stats veb_tc = + i40e_get_veb_tc_stats(&veb->tc_stats, i); + + i40e_add_ethtool_stats(&data, &veb_tc, + i40e_gstrings_veb_tc_stats); + } else { + i40e_add_ethtool_stats(&data, NULL, + i40e_gstrings_veb_tc_stats); + } i40e_add_ethtool_stats(&data, pf, i40e_gstrings_stats); @@ -2368,21 +2409,15 @@ static void i40e_get_priv_flag_strings(struct net_device *netdev, u8 *data) struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - char *p = (char *)data; unsigned int i; + u8 *p = data; - for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - i40e_gstrings_priv_flags[i].flag_string); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) + ethtool_sprintf(&p, i40e_gstrings_priv_flags[i].flag_string); if (pf->hw.pf_id != 0) return; - for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - i40e_gl_gstrings_priv_flags[i].flag_string); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++) + ethtool_sprintf(&p, i40e_gl_gstrings_priv_flags[i].flag_string); } static void i40e_get_strings(struct net_device *netdev, u32 stringset, @@ -5439,7 +5474,7 @@ static int i40e_get_module_eeprom(struct net_device *netdev, status = i40e_aq_get_phy_register(hw, I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, - true, addr, offset, &value, NULL); + addr, true, offset, &value, NULL); if (status) return -EIO; data[i] = value; diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c index a3da422ab05b..d6e92ecddfbd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c +++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c @@ -511,7 +511,7 @@ configure_lan_hmc_out: } /** - * i40e_delete_hmc_object - remove hmc objects + * i40e_delete_lan_hmc_object - remove hmc objects * @hw: pointer to the HW structure * @info: pointer to i40e_hmc_delete_obj_info struct * diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 353deae139f9..c2d145a56b5e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6,6 +6,7 @@ #include <linux/pci.h> #include <linux/bpf.h> #include <generated/utsrelease.h> +#include <linux/crash_dump.h> /* Local includes */ #include "i40e.h" @@ -2023,7 +2024,7 @@ static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi, } /** - * i40e_next_entry - Get the next non-broadcast filter from a list + * i40e_next_filter - Get the next non-broadcast filter from a list * @next: pointer to filter in list * * Returns the next non-broadcast filter in the list. Required so that we @@ -2560,8 +2561,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) i40e_stat_str(hw, aq_ret), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { - dev_info(&pf->pdev->dev, "%s is %s allmulti mode.\n", - vsi->netdev->name, + dev_info(&pf->pdev->dev, "%s allmulti mode.\n", cur_multipromisc ? "entering" : "leaving"); } } @@ -3259,6 +3259,17 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) } /** + * i40e_rx_offset - Return expected offset into page to access data + * @rx_ring: Ring we are requesting offset of + * + * Returns the offset value for ring into the data buffer. + */ +static unsigned int i40e_rx_offset(struct i40e_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0; +} + +/** * i40e_configure_rx_ring - Configure a receive ring context * @ring: The Rx ring to configure * @@ -3369,6 +3380,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) else set_ring_build_skb_enabled(ring); + ring->rx_offset = i40e_rx_offset(ring); + /* cache tail for quicker writes, and clear the reg before use */ ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q); writel(0, ring->tail); @@ -5191,7 +5204,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) } /** - * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes + * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes * @pf: PF being queried * * Return a bitmap for enabled traffic classes for this PF. @@ -6725,9 +6738,9 @@ out: set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); } - /* registers are set, lets apply */ - if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) - ret = i40e_hw_set_dcb_config(pf, new_cfg); + /* registers are set, lets apply */ + if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) + ret = i40e_hw_set_dcb_config(pf, new_cfg); } err: @@ -7326,7 +7339,7 @@ static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi) qcount = min_t(int, vsi->alloc_queue_pairs, i40e_pf_get_max_q_per_tc(vsi->back)); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { - /* For the TC that is not enabled set the offset to to default + /* For the TC that is not enabled set the offset to default * queue and allocate one queue for the given TC. */ vsi->tc_config.tc_info[i].qoffset = 0; @@ -9454,7 +9467,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) } /** - * i40e_get_current_atr_count - Get the count of total FD ATR filters programmed + * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed * @pf: board private structure **/ u32 i40e_get_current_atr_cnt(struct i40e_pf *pf) @@ -10560,12 +10573,6 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) goto end_core_reset; } - if (!lock_acquired) - rtnl_lock(); - ret = i40e_setup_pf_switch(pf, reinit); - if (ret) - goto end_unlock; - #ifdef CONFIG_I40E_DCB /* Enable FW to write a default DCB config on link-up * unless I40E_FLAG_TC_MQPRIO was enabled or DCB @@ -10580,7 +10587,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) i40e_aq_set_dcb_parameters(hw, false, NULL); dev_warn(&pf->pdev->dev, "DCB is not supported for X710-T*L 2.5/5G speeds\n"); - pf->flags &= ~I40E_FLAG_DCB_CAPABLE; + pf->flags &= ~I40E_FLAG_DCB_CAPABLE; } else { i40e_aq_set_dcb_parameters(hw, true, NULL); ret = i40e_init_pf_dcb(pf); @@ -10594,6 +10601,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } #endif /* CONFIG_I40E_DCB */ + if (!lock_acquired) + rtnl_lock(); + ret = i40e_setup_pf_switch(pf, reinit); + if (ret) + goto end_unlock; /* The driver only wants link up/down and module qualification * reports from firmware. Note the negative logic. @@ -10612,7 +10624,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) * need to rebuild the switch model in the HW. * * If there were VEBs but the reconstitution failed, we'll try - * try to recover minimal use by getting the basic PF VSI working. + * to recover minimal use by getting the basic PF VSI working. */ if (vsi->uplink_seid != pf->mac_seid) { dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n"); @@ -11028,6 +11040,11 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi) return -ENODATA; } + if (is_kdump_kernel()) { + vsi->num_tx_desc = I40E_MIN_NUM_DESCRIPTORS; + vsi->num_rx_desc = I40E_MIN_NUM_DESCRIPTORS; + } + return 0; } @@ -12346,6 +12363,7 @@ static int i40e_sw_init(struct i40e_pf *pf) { int err = 0; int size; + u16 pow; /* Set default capability flags */ pf->flags = I40E_FLAG_RX_CSUM_ENABLED | @@ -12364,6 +12382,11 @@ static int i40e_sw_init(struct i40e_pf *pf) pf->rss_table_size = pf->hw.func_caps.rss_table_size; pf->rss_size_max = min_t(int, pf->rss_size_max, pf->hw.func_caps.num_tx_qp); + + /* find the next higher power-of-2 of num cpus */ + pow = roundup_pow_of_two(num_online_cpus()); + pf->rss_size_max = min_t(int, pf->rss_size_max, pow); + if (pf->hw.func_caps.rss) { pf->flags |= I40E_FLAG_RSS_ENABLED; pf->alloc_rss_size = min_t(int, pf->rss_size_max, @@ -15127,12 +15150,16 @@ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw) * in order to register the netdev */ v_idx = i40e_vsi_mem_alloc(pf, I40E_VSI_MAIN); - if (v_idx < 0) + if (v_idx < 0) { + err = v_idx; goto err_switch_setup; + } pf->lan_vsi = v_idx; vsi = pf->vsi[v_idx]; - if (!vsi) + if (!vsi) { + err = -EFAULT; goto err_switch_setup; + } vsi->alloc_queue_pairs = 1; err = i40e_config_netdev(vsi); if (err) @@ -15321,8 +15348,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i40e_check_recovery_mode(pf); - hw->aq.num_arq_entries = I40E_AQ_LEN; - hw->aq.num_asq_entries = I40E_AQ_LEN; + if (is_kdump_kernel()) { + hw->aq.num_arq_entries = I40E_MIN_ARQ_LEN; + hw->aq.num_asq_entries = I40E_MIN_ASQ_LEN; + } else { + hw->aq.num_arq_entries = I40E_AQ_LEN; + hw->aq.num_asq_entries = I40E_AQ_LEN; + } hw->aq.arq_buf_size = I40E_MAX_AQ_BUF_SIZE; hw->aq.asq_buf_size = I40E_MAX_AQ_BUF_SIZE; pf->adminq_work_limit = I40E_AQ_WORK_LIMIT; @@ -15485,6 +15517,14 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_switch_setup; + /* Reduce Tx and Rx pairs for kdump + * When MSI-X is enabled, it's not allowed to use more TC queue + * pairs than MSI-X vectors (pf->num_lan_msix) exist. Thus + * vsi->num_queue_pairs will be equal to pf->num_lan_msix, i.e., 1. + */ + if (is_kdump_kernel()) + pf->num_lan_msix = 1; + pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port; pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port; pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP; diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 7164f4ad8120..fe6dca846028 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -4,7 +4,7 @@ #include "i40e_prototype.h" /** - * i40e_init_nvm_ops - Initialize NVM function pointers + * i40e_init_nvm - Initialize NVM function pointers * @hw: pointer to the HW structure * * Setup the function pointers and the NVM info structure. Should be called diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 7a879614ca55..f1f6fc3744e9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -216,7 +216,7 @@ static int i40e_ptp_feature_enable(struct ptp_clock_info *ptp, } /** - * i40e_ptp_update_latch_events - Read I40E_PRTTSYN_STAT_1 and latch events + * i40e_ptp_get_rx_events - Read I40E_PRTTSYN_STAT_1 and latch events * @pf: the PF data structure * * This function reads I40E_PRTTSYN_STAT_1 and updates the corresponding timers diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 627794b31e33..121cd99fdeff 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1570,17 +1570,6 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring) } /** - * i40e_rx_offset - Return expected offset into page to access data - * @rx_ring: Ring we are requesting offset of - * - * Returns the offset value for ring into the data buffer. - */ -static unsigned int i40e_rx_offset(struct i40e_ring *rx_ring) -{ - return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0; -} - -/** * i40e_setup_rx_descriptors - Allocate Rx descriptors * @rx_ring: Rx descriptor ring (for a specific queue) to setup * @@ -1608,7 +1597,6 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; - rx_ring->rx_offset = i40e_rx_offset(rx_ring); /* XDP RX-queue info only needed for RX rings exposed to XDP */ if (rx_ring->vsi->type == I40E_VSI_MAIN) { @@ -2307,8 +2295,7 @@ int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring) * @rx_ring: Rx ring being processed * @xdp: XDP buffer containing the frame **/ -static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, - struct xdp_buff *xdp) +static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp) { int err, result = I40E_XDP_PASS; struct i40e_ring *xdp_ring; @@ -2347,7 +2334,7 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, } xdp_out: rcu_read_unlock(); - return ERR_PTR(-result); + return result; } /** @@ -2460,6 +2447,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) unsigned int xdp_xmit = 0; bool failure = false; struct xdp_buff xdp; + int xdp_res = 0; #if (PAGE_SIZE < 8192) frame_sz = i40e_rx_frame_truesize(rx_ring, 0); @@ -2525,12 +2513,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size); #endif - skb = i40e_run_xdp(rx_ring, &xdp); + xdp_res = i40e_run_xdp(rx_ring, &xdp); } - if (IS_ERR(skb)) { - unsigned int xdp_res = -PTR_ERR(skb); - + if (xdp_res) { if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { xdp_xmit |= xdp_res; i40e_rx_buffer_flip(rx_ring, rx_buffer, size); @@ -3345,7 +3331,7 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, } /** - * i40e_create_tx_ctx Build the Tx context descriptor + * i40e_create_tx_ctx - Build the Tx context descriptor * @tx_ring: ring to create the descriptor on * @cd_type_cmd_tso_mss: Quad Word 1 * @cd_tunneling: Quad Word 0 - bits 0-31 @@ -3847,8 +3833,8 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) * @frames: array of XDP buffer pointers * @flags: XDP extra info * - * Returns number of frames successfully sent. Frames that fail are - * free'ed via XDP return API. + * Returns number of frames successfully sent. Failed frames + * will be free'ed by XDP core. * * For error cases, a negative errno code is returned and no-frames * are transmitted (caller must handle freeing frames). @@ -3861,7 +3847,7 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_ring *xdp_ring; - int drops = 0; + int nxmit = 0; int i; if (test_bit(__I40E_VSI_DOWN, vsi->state)) @@ -3881,14 +3867,13 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, int err; err = i40e_xmit_xdp_ring(xdpf, xdp_ring); - if (err != I40E_XDP_TX) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err != I40E_XDP_TX) + break; + nxmit++; } if (unlikely(flags & XDP_XMIT_FLUSH)) i40e_xdp_ring_update_tail(xdp_ring); - return n - drops; + return nxmit; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 1b6ec9be155a..eff0a30790dd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -40,6 +40,66 @@ static void i40e_vc_vf_broadcast(struct i40e_pf *pf, } /** + * i40e_vc_link_speed2mbps + * converts i40e_aq_link_speed to integer value of Mbps + * @link_speed: the speed to convert + * + * return the speed as direct value of Mbps. + **/ +static u32 +i40e_vc_link_speed2mbps(enum i40e_aq_link_speed link_speed) +{ + switch (link_speed) { + case I40E_LINK_SPEED_100MB: + return SPEED_100; + case I40E_LINK_SPEED_1GB: + return SPEED_1000; + case I40E_LINK_SPEED_2_5GB: + return SPEED_2500; + case I40E_LINK_SPEED_5GB: + return SPEED_5000; + case I40E_LINK_SPEED_10GB: + return SPEED_10000; + case I40E_LINK_SPEED_20GB: + return SPEED_20000; + case I40E_LINK_SPEED_25GB: + return SPEED_25000; + case I40E_LINK_SPEED_40GB: + return SPEED_40000; + case I40E_LINK_SPEED_UNKNOWN: + return SPEED_UNKNOWN; + } + return SPEED_UNKNOWN; +} + +/** + * i40e_set_vf_link_state + * @vf: pointer to the VF structure + * @pfe: pointer to PF event structure + * @ls: pointer to link status structure + * + * set a link state on a single vf + **/ +static void i40e_set_vf_link_state(struct i40e_vf *vf, + struct virtchnl_pf_event *pfe, struct i40e_link_status *ls) +{ + u8 link_status = ls->link_info & I40E_AQ_LINK_UP; + + if (vf->link_forced) + link_status = vf->link_up; + + if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED) { + pfe->event_data.link_event_adv.link_speed = link_status ? + i40e_vc_link_speed2mbps(ls->link_speed) : 0; + pfe->event_data.link_event_adv.link_status = link_status; + } else { + pfe->event_data.link_event.link_speed = link_status ? + i40e_virtchnl_link_speed(ls->link_speed) : 0; + pfe->event_data.link_event.link_status = link_status; + } +} + +/** * i40e_vc_notify_vf_link_state * @vf: pointer to the VF structure * @@ -55,16 +115,9 @@ static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf) pfe.event = VIRTCHNL_EVENT_LINK_CHANGE; pfe.severity = PF_EVENT_SEVERITY_INFO; - if (vf->link_forced) { - pfe.event_data.link_event.link_status = vf->link_up; - pfe.event_data.link_event.link_speed = - (vf->link_up ? i40e_virtchnl_link_speed(ls->link_speed) : 0); - } else { - pfe.event_data.link_event.link_status = - ls->link_info & I40E_AQ_LINK_UP; - pfe.event_data.link_event.link_speed = - i40e_virtchnl_link_speed(ls->link_speed); - } + + i40e_set_vf_link_state(vf, &pfe, ls); + i40e_aq_send_msg_to_vf(hw, abs_vf_id, VIRTCHNL_OP_EVENT, 0, (u8 *)&pfe, sizeof(pfe), NULL); } @@ -137,6 +190,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf) **/ static inline void i40e_vc_disable_vf(struct i40e_vf *vf) { + struct i40e_pf *pf = vf->pf; int i; i40e_vc_notify_vf_reset(vf); @@ -147,6 +201,11 @@ static inline void i40e_vc_disable_vf(struct i40e_vf *vf) * ensure a reset. */ for (i = 0; i < 20; i++) { + /* If PF is in VFs releasing state reset VF is impossible, + * so leave it. + */ + if (test_bit(__I40E_VFS_RELEASING, pf->state)) + return; if (i40e_reset_vf(vf, false)) return; usleep_range(10000, 20000); @@ -1574,6 +1633,8 @@ void i40e_free_vfs(struct i40e_pf *pf) if (!pf->vf) return; + + set_bit(__I40E_VFS_RELEASING, pf->state); while (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) usleep_range(1000, 2000); @@ -1631,6 +1692,7 @@ void i40e_free_vfs(struct i40e_pf *pf) } } clear_bit(__I40E_VF_DISABLE, pf->state); + clear_bit(__I40E_VFS_RELEASING, pf->state); } #ifdef CONFIG_PCI_IOV @@ -1940,6 +2002,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) VIRTCHNL_VF_OFFLOAD_VLAN; vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2; + vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED; vsi = pf->vsi[vf->lan_vsi_idx]; if (!vsi->info.pvid) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN; @@ -3687,26 +3750,8 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) } /* get link speed in MB to validate rate limit */ - switch (ls->link_speed) { - case VIRTCHNL_LINK_SPEED_100MB: - speed = SPEED_100; - break; - case VIRTCHNL_LINK_SPEED_1GB: - speed = SPEED_1000; - break; - case VIRTCHNL_LINK_SPEED_10GB: - speed = SPEED_10000; - break; - case VIRTCHNL_LINK_SPEED_20GB: - speed = SPEED_20000; - break; - case VIRTCHNL_LINK_SPEED_25GB: - speed = SPEED_25000; - break; - case VIRTCHNL_LINK_SPEED_40GB: - speed = SPEED_40000; - break; - default: + speed = i40e_vc_link_speed2mbps(ls->link_speed); + if (speed == SPEED_UNKNOWN) { dev_err(&pf->pdev->dev, "Cannot detect link speed\n"); aq_ret = I40E_ERR_PARAM; @@ -4455,23 +4500,17 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link) switch (link) { case IFLA_VF_LINK_STATE_AUTO: vf->link_forced = false; - pfe.event_data.link_event.link_status = - pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP; - pfe.event_data.link_event.link_speed = - (enum virtchnl_link_speed) - pf->hw.phy.link_info.link_speed; + i40e_set_vf_link_state(vf, &pfe, ls); break; case IFLA_VF_LINK_STATE_ENABLE: vf->link_forced = true; vf->link_up = true; - pfe.event_data.link_event.link_status = true; - pfe.event_data.link_event.link_speed = i40e_virtchnl_link_speed(ls->link_speed); + i40e_set_vf_link_state(vf, &pfe, ls); break; case IFLA_VF_LINK_STATE_DISABLE: vf->link_forced = true; vf->link_up = false; - pfe.event_data.link_event.link_status = false; - pfe.event_data.link_event.link_speed = 0; + i40e_set_vf_link_state(vf, &pfe, ls); break; default: ret = -EINVAL; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index fc32c5019b0f..46d884417c63 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -160,6 +160,13 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) xdp_prog = READ_ONCE(rx_ring->xdp_prog); act = bpf_prog_run_xdp(xdp_prog, xdp); + if (likely(act == XDP_REDIRECT)) { + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED; + rcu_read_unlock(); + return result; + } + switch (act) { case XDP_PASS: break; @@ -167,10 +174,6 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring); break; - case XDP_REDIRECT: - err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED; - break; default: bpf_warn_invalid_xdp_action(act); fallthrough; @@ -471,7 +474,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget); if (!nb_pkts) - return false; + return true; if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { nb_processed = xdp_ring->count - xdp_ring->next_to_use; @@ -488,7 +491,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes); - return true; + return nb_pkts < budget; } /** @@ -625,7 +628,7 @@ void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring) } /** - * i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown + * i40e_xsk_clean_tx_ring - Clean the XDP Tx ring on shutdown * @tx_ring: XDP Tx ring **/ void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring) diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile index c997063ed728..9c3e45c54d01 100644 --- a/drivers/net/ethernet/intel/iavf/Makefile +++ b/drivers/net/ethernet/intel/iavf/Makefile @@ -11,5 +11,6 @@ subdir-ccflags-y += -I$(src) obj-$(CONFIG_IAVF) += iavf.o -iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o \ +iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o iavf_fdir.o \ + iavf_adv_rss.o \ iavf_txrx.o iavf_common.o iavf_adminq.o iavf_client.o diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 8a65525a7c0d..e8bd04100ecd 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -37,6 +37,8 @@ #include "iavf_type.h" #include <linux/avf/virtchnl.h> #include "iavf_txrx.h" +#include "iavf_fdir.h" +#include "iavf_adv_rss.h" #define DEFAULT_DEBUG_LEVEL_SHIFT 3 #define PFX "iavf: " @@ -300,6 +302,10 @@ struct iavf_adapter { #define IAVF_FLAG_AQ_DISABLE_CHANNELS BIT(22) #define IAVF_FLAG_AQ_ADD_CLOUD_FILTER BIT(23) #define IAVF_FLAG_AQ_DEL_CLOUD_FILTER BIT(24) +#define IAVF_FLAG_AQ_ADD_FDIR_FILTER BIT(25) +#define IAVF_FLAG_AQ_DEL_FDIR_FILTER BIT(26) +#define IAVF_FLAG_AQ_ADD_ADV_RSS_CFG BIT(27) +#define IAVF_FLAG_AQ_DEL_ADV_RSS_CFG BIT(28) /* OS defined structs */ struct net_device *netdev; @@ -340,6 +346,10 @@ struct iavf_adapter { VIRTCHNL_VF_OFFLOAD_VLAN) #define ADV_LINK_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \ VIRTCHNL_VF_CAP_ADV_LINK_SPEED) +#define FDIR_FLTR_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \ + VIRTCHNL_VF_OFFLOAD_FDIR_PF) +#define ADV_RSS_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \ + VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF) struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */ struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */ struct virtchnl_version_info pf_version; @@ -362,6 +372,14 @@ struct iavf_adapter { /* lock to protect access to the cloud filter list */ spinlock_t cloud_filter_list_lock; u16 num_cloud_filters; + +#define IAVF_MAX_FDIR_FILTERS 128 /* max allowed Flow Director filters */ + u16 fdir_active_fltr; + struct list_head fdir_list_head; + spinlock_t fdir_fltr_lock; /* protect the Flow Director filter list */ + + struct list_head adv_rss_list_head; + spinlock_t adv_rss_lock; /* protect the RSS management list */ }; @@ -432,6 +450,10 @@ void iavf_enable_channels(struct iavf_adapter *adapter); void iavf_disable_channels(struct iavf_adapter *adapter); void iavf_add_cloud_filter(struct iavf_adapter *adapter); void iavf_del_cloud_filter(struct iavf_adapter *adapter); +void iavf_add_fdir_filter(struct iavf_adapter *adapter); +void iavf_del_fdir_filter(struct iavf_adapter *adapter); +void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter); +void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter); struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, const u8 *macaddr); #endif /* _IAVF_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c new file mode 100644 index 000000000000..6edbf134b73f --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021, Intel Corporation. */ + +/* advanced RSS configuration ethtool support for iavf */ + +#include "iavf.h" + +/** + * iavf_fill_adv_rss_ip4_hdr - fill the IPv4 RSS protocol header + * @hdr: the virtchnl message protocol header data structure + * @hash_flds: the RSS configuration protocol hash fields + */ +static void +iavf_fill_adv_rss_ip4_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds) +{ + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV4); + + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_IPV4_SA) + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, SRC); + + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_IPV4_DA) + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST); +} + +/** + * iavf_fill_adv_rss_ip6_hdr - fill the IPv6 RSS protocol header + * @hdr: the virtchnl message protocol header data structure + * @hash_flds: the RSS configuration protocol hash fields + */ +static void +iavf_fill_adv_rss_ip6_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds) +{ + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV6); + + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_IPV6_SA) + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, SRC); + + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_IPV6_DA) + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST); +} + +/** + * iavf_fill_adv_rss_tcp_hdr - fill the TCP RSS protocol header + * @hdr: the virtchnl message protocol header data structure + * @hash_flds: the RSS configuration protocol hash fields + */ +static void +iavf_fill_adv_rss_tcp_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds) +{ + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, TCP); + + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT) + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, SRC_PORT); + + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT) + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, DST_PORT); +} + +/** + * iavf_fill_adv_rss_udp_hdr - fill the UDP RSS protocol header + * @hdr: the virtchnl message protocol header data structure + * @hash_flds: the RSS configuration protocol hash fields + */ +static void +iavf_fill_adv_rss_udp_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds) +{ + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, UDP); + + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT) + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, SRC_PORT); + + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT) + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, DST_PORT); +} + +/** + * iavf_fill_adv_rss_sctp_hdr - fill the SCTP RSS protocol header + * @hdr: the virtchnl message protocol header data structure + * @hash_flds: the RSS configuration protocol hash fields + */ +static void +iavf_fill_adv_rss_sctp_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds) +{ + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, SCTP); + + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT) + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, SRC_PORT); + + if (hash_flds & IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT) + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, DST_PORT); +} + +/** + * iavf_fill_adv_rss_cfg_msg - fill the RSS configuration into virtchnl message + * @rss_cfg: the virtchnl message to be filled with RSS configuration setting + * @packet_hdrs: the RSS configuration protocol header types + * @hash_flds: the RSS configuration protocol hash fields + * + * Returns 0 if the RSS configuration virtchnl message is filled successfully + */ +int +iavf_fill_adv_rss_cfg_msg(struct virtchnl_rss_cfg *rss_cfg, + u32 packet_hdrs, u64 hash_flds) +{ + struct virtchnl_proto_hdrs *proto_hdrs = &rss_cfg->proto_hdrs; + struct virtchnl_proto_hdr *hdr; + + rss_cfg->rss_algorithm = VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC; + + proto_hdrs->tunnel_level = 0; /* always outer layer */ + + hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + switch (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_L3) { + case IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4: + iavf_fill_adv_rss_ip4_hdr(hdr, hash_flds); + break; + case IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6: + iavf_fill_adv_rss_ip6_hdr(hdr, hash_flds); + break; + default: + return -EINVAL; + } + + hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + switch (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_L4) { + case IAVF_ADV_RSS_FLOW_SEG_HDR_TCP: + iavf_fill_adv_rss_tcp_hdr(hdr, hash_flds); + break; + case IAVF_ADV_RSS_FLOW_SEG_HDR_UDP: + iavf_fill_adv_rss_udp_hdr(hdr, hash_flds); + break; + case IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP: + iavf_fill_adv_rss_sctp_hdr(hdr, hash_flds); + break; + default: + return -EINVAL; + } + + return 0; +} + +/** + * iavf_find_adv_rss_cfg_by_hdrs - find RSS configuration with header type + * @adapter: pointer to the VF adapter structure + * @packet_hdrs: protocol header type to find. + * + * Returns pointer to advance RSS configuration if found or null + */ +struct iavf_adv_rss * +iavf_find_adv_rss_cfg_by_hdrs(struct iavf_adapter *adapter, u32 packet_hdrs) +{ + struct iavf_adv_rss *rss; + + list_for_each_entry(rss, &adapter->adv_rss_list_head, list) + if (rss->packet_hdrs == packet_hdrs) + return rss; + + return NULL; +} + +/** + * iavf_print_adv_rss_cfg + * @adapter: pointer to the VF adapter structure + * @rss: pointer to the advance RSS configuration to print + * @action: the string description about how to handle the RSS + * @result: the string description about the virtchnl result + * + * Print the advance RSS configuration + **/ +void +iavf_print_adv_rss_cfg(struct iavf_adapter *adapter, struct iavf_adv_rss *rss, + const char *action, const char *result) +{ + u32 packet_hdrs = rss->packet_hdrs; + u64 hash_flds = rss->hash_flds; + static char hash_opt[300]; + const char *proto; + + if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_TCP) + proto = "TCP"; + else if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_UDP) + proto = "UDP"; + else if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP) + proto = "SCTP"; + else + return; + + memset(hash_opt, 0, sizeof(hash_opt)); + + strcat(hash_opt, proto); + if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4) + strcat(hash_opt, "v4 "); + else + strcat(hash_opt, "v6 "); + + if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_SA | + IAVF_ADV_RSS_HASH_FLD_IPV6_SA)) + strcat(hash_opt, "IP SA,"); + if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_DA | + IAVF_ADV_RSS_HASH_FLD_IPV6_DA)) + strcat(hash_opt, "IP DA,"); + if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT | + IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT | + IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT)) + strcat(hash_opt, "src port,"); + if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT | + IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT | + IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT)) + strcat(hash_opt, "dst port,"); + + if (!action) + action = ""; + + if (!result) + result = ""; + + dev_info(&adapter->pdev->dev, "%s %s %s\n", action, hash_opt, result); +} diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h new file mode 100644 index 000000000000..4d3be11af7aa --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021, Intel Corporation. */ + +#ifndef _IAVF_ADV_RSS_H_ +#define _IAVF_ADV_RSS_H_ + +struct iavf_adapter; + +/* State of advanced RSS configuration */ +enum iavf_adv_rss_state_t { + IAVF_ADV_RSS_ADD_REQUEST, /* User requests to add RSS */ + IAVF_ADV_RSS_ADD_PENDING, /* RSS pending add by the PF */ + IAVF_ADV_RSS_DEL_REQUEST, /* Driver requests to delete RSS */ + IAVF_ADV_RSS_DEL_PENDING, /* RSS pending delete by the PF */ + IAVF_ADV_RSS_ACTIVE, /* RSS configuration is active */ +}; + +enum iavf_adv_rss_flow_seg_hdr { + IAVF_ADV_RSS_FLOW_SEG_HDR_NONE = 0x00000000, + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4 = 0x00000001, + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6 = 0x00000002, + IAVF_ADV_RSS_FLOW_SEG_HDR_TCP = 0x00000004, + IAVF_ADV_RSS_FLOW_SEG_HDR_UDP = 0x00000008, + IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP = 0x00000010, +}; + +#define IAVF_ADV_RSS_FLOW_SEG_HDR_L3 \ + (IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4 | \ + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6) + +#define IAVF_ADV_RSS_FLOW_SEG_HDR_L4 \ + (IAVF_ADV_RSS_FLOW_SEG_HDR_TCP | \ + IAVF_ADV_RSS_FLOW_SEG_HDR_UDP | \ + IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP) + +enum iavf_adv_rss_flow_field { + /* L3 */ + IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_SA, + IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_DA, + IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV6_SA, + IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV6_DA, + /* L4 */ + IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_SRC_PORT, + IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_DST_PORT, + IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_SRC_PORT, + IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_DST_PORT, + IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_SRC_PORT, + IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_DST_PORT, + + /* The total number of enums must not exceed 64 */ + IAVF_ADV_RSS_FLOW_FIELD_IDX_MAX +}; + +#define IAVF_ADV_RSS_HASH_INVALID 0 +#define IAVF_ADV_RSS_HASH_FLD_IPV4_SA \ + BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_SA) +#define IAVF_ADV_RSS_HASH_FLD_IPV6_SA \ + BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV6_SA) +#define IAVF_ADV_RSS_HASH_FLD_IPV4_DA \ + BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_DA) +#define IAVF_ADV_RSS_HASH_FLD_IPV6_DA \ + BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV6_DA) +#define IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT \ + BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_SRC_PORT) +#define IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT \ + BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_DST_PORT) +#define IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT \ + BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_SRC_PORT) +#define IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT \ + BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_DST_PORT) +#define IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT \ + BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_SRC_PORT) +#define IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT \ + BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_DST_PORT) + +/* bookkeeping of advanced RSS configuration */ +struct iavf_adv_rss { + enum iavf_adv_rss_state_t state; + struct list_head list; + + u32 packet_hdrs; + u64 hash_flds; + + struct virtchnl_rss_cfg cfg_msg; +}; + +int +iavf_fill_adv_rss_cfg_msg(struct virtchnl_rss_cfg *rss_cfg, + u32 packet_hdrs, u64 hash_flds); +struct iavf_adv_rss * +iavf_find_adv_rss_cfg_by_hdrs(struct iavf_adapter *adapter, u32 packet_hdrs); +void +iavf_print_adv_rss_cfg(struct iavf_adapter *adapter, struct iavf_adv_rss *rss, + const char *action, const char *result); +#endif /* _IAVF_ADV_RSS_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index c93567f4d0f7..af43fbd8cb75 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -828,6 +828,872 @@ static int iavf_set_per_queue_coalesce(struct net_device *netdev, u32 queue, } /** + * iavf_fltr_to_ethtool_flow - convert filter type values to ethtool + * flow type values + * @flow: filter type to be converted + * + * Returns the corresponding ethtool flow type. + */ +static int iavf_fltr_to_ethtool_flow(enum iavf_fdir_flow_type flow) +{ + switch (flow) { + case IAVF_FDIR_FLOW_IPV4_TCP: + return TCP_V4_FLOW; + case IAVF_FDIR_FLOW_IPV4_UDP: + return UDP_V4_FLOW; + case IAVF_FDIR_FLOW_IPV4_SCTP: + return SCTP_V4_FLOW; + case IAVF_FDIR_FLOW_IPV4_AH: + return AH_V4_FLOW; + case IAVF_FDIR_FLOW_IPV4_ESP: + return ESP_V4_FLOW; + case IAVF_FDIR_FLOW_IPV4_OTHER: + return IPV4_USER_FLOW; + case IAVF_FDIR_FLOW_IPV6_TCP: + return TCP_V6_FLOW; + case IAVF_FDIR_FLOW_IPV6_UDP: + return UDP_V6_FLOW; + case IAVF_FDIR_FLOW_IPV6_SCTP: + return SCTP_V6_FLOW; + case IAVF_FDIR_FLOW_IPV6_AH: + return AH_V6_FLOW; + case IAVF_FDIR_FLOW_IPV6_ESP: + return ESP_V6_FLOW; + case IAVF_FDIR_FLOW_IPV6_OTHER: + return IPV6_USER_FLOW; + case IAVF_FDIR_FLOW_NON_IP_L2: + return ETHER_FLOW; + default: + /* 0 is undefined ethtool flow */ + return 0; + } +} + +/** + * iavf_ethtool_flow_to_fltr - convert ethtool flow type to filter enum + * @eth: Ethtool flow type to be converted + * + * Returns flow enum + */ +static enum iavf_fdir_flow_type iavf_ethtool_flow_to_fltr(int eth) +{ + switch (eth) { + case TCP_V4_FLOW: + return IAVF_FDIR_FLOW_IPV4_TCP; + case UDP_V4_FLOW: + return IAVF_FDIR_FLOW_IPV4_UDP; + case SCTP_V4_FLOW: + return IAVF_FDIR_FLOW_IPV4_SCTP; + case AH_V4_FLOW: + return IAVF_FDIR_FLOW_IPV4_AH; + case ESP_V4_FLOW: + return IAVF_FDIR_FLOW_IPV4_ESP; + case IPV4_USER_FLOW: + return IAVF_FDIR_FLOW_IPV4_OTHER; + case TCP_V6_FLOW: + return IAVF_FDIR_FLOW_IPV6_TCP; + case UDP_V6_FLOW: + return IAVF_FDIR_FLOW_IPV6_UDP; + case SCTP_V6_FLOW: + return IAVF_FDIR_FLOW_IPV6_SCTP; + case AH_V6_FLOW: + return IAVF_FDIR_FLOW_IPV6_AH; + case ESP_V6_FLOW: + return IAVF_FDIR_FLOW_IPV6_ESP; + case IPV6_USER_FLOW: + return IAVF_FDIR_FLOW_IPV6_OTHER; + case ETHER_FLOW: + return IAVF_FDIR_FLOW_NON_IP_L2; + default: + return IAVF_FDIR_FLOW_NONE; + } +} + +/** + * iavf_is_mask_valid - check mask field set + * @mask: full mask to check + * @field: field for which mask should be valid + * + * If the mask is fully set return true. If it is not valid for field return + * false. + */ +static bool iavf_is_mask_valid(u64 mask, u64 field) +{ + return (mask & field) == field; +} + +/** + * iavf_parse_rx_flow_user_data - deconstruct user-defined data + * @fsp: pointer to ethtool Rx flow specification + * @fltr: pointer to Flow Director filter for userdef data storage + * + * Returns 0 on success, negative error value on failure + */ +static int +iavf_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp, + struct iavf_fdir_fltr *fltr) +{ + struct iavf_flex_word *flex; + int i, cnt = 0; + + if (!(fsp->flow_type & FLOW_EXT)) + return 0; + + for (i = 0; i < IAVF_FLEX_WORD_NUM; i++) { +#define IAVF_USERDEF_FLEX_WORD_M GENMASK(15, 0) +#define IAVF_USERDEF_FLEX_OFFS_S 16 +#define IAVF_USERDEF_FLEX_OFFS_M GENMASK(31, IAVF_USERDEF_FLEX_OFFS_S) +#define IAVF_USERDEF_FLEX_FLTR_M GENMASK(31, 0) + u32 value = be32_to_cpu(fsp->h_ext.data[i]); + u32 mask = be32_to_cpu(fsp->m_ext.data[i]); + + if (!value || !mask) + continue; + + if (!iavf_is_mask_valid(mask, IAVF_USERDEF_FLEX_FLTR_M)) + return -EINVAL; + + /* 504 is the maximum value for offsets, and offset is measured + * from the start of the MAC address. + */ +#define IAVF_USERDEF_FLEX_MAX_OFFS_VAL 504 + flex = &fltr->flex_words[cnt++]; + flex->word = value & IAVF_USERDEF_FLEX_WORD_M; + flex->offset = (value & IAVF_USERDEF_FLEX_OFFS_M) >> + IAVF_USERDEF_FLEX_OFFS_S; + if (flex->offset > IAVF_USERDEF_FLEX_MAX_OFFS_VAL) + return -EINVAL; + } + + fltr->flex_cnt = cnt; + + return 0; +} + +/** + * iavf_fill_rx_flow_ext_data - fill the additional data + * @fsp: pointer to ethtool Rx flow specification + * @fltr: pointer to Flow Director filter to get additional data + */ +static void +iavf_fill_rx_flow_ext_data(struct ethtool_rx_flow_spec *fsp, + struct iavf_fdir_fltr *fltr) +{ + if (!fltr->ext_mask.usr_def[0] && !fltr->ext_mask.usr_def[1]) + return; + + fsp->flow_type |= FLOW_EXT; + + memcpy(fsp->h_ext.data, fltr->ext_data.usr_def, sizeof(fsp->h_ext.data)); + memcpy(fsp->m_ext.data, fltr->ext_mask.usr_def, sizeof(fsp->m_ext.data)); +} + +/** + * iavf_get_ethtool_fdir_entry - fill ethtool structure with Flow Director filter data + * @adapter: the VF adapter structure that contains filter list + * @cmd: ethtool command data structure to receive the filter data + * + * Returns 0 as expected for success by ethtool + */ +static int +iavf_get_ethtool_fdir_entry(struct iavf_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + struct iavf_fdir_fltr *rule = NULL; + int ret = 0; + + if (!FDIR_FLTR_SUPPORT(adapter)) + return -EOPNOTSUPP; + + spin_lock_bh(&adapter->fdir_fltr_lock); + + rule = iavf_find_fdir_fltr_by_loc(adapter, fsp->location); + if (!rule) { + ret = -EINVAL; + goto release_lock; + } + + fsp->flow_type = iavf_fltr_to_ethtool_flow(rule->flow_type); + + memset(&fsp->m_u, 0, sizeof(fsp->m_u)); + memset(&fsp->m_ext, 0, sizeof(fsp->m_ext)); + + switch (fsp->flow_type) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + fsp->h_u.tcp_ip4_spec.ip4src = rule->ip_data.v4_addrs.src_ip; + fsp->h_u.tcp_ip4_spec.ip4dst = rule->ip_data.v4_addrs.dst_ip; + fsp->h_u.tcp_ip4_spec.psrc = rule->ip_data.src_port; + fsp->h_u.tcp_ip4_spec.pdst = rule->ip_data.dst_port; + fsp->h_u.tcp_ip4_spec.tos = rule->ip_data.tos; + fsp->m_u.tcp_ip4_spec.ip4src = rule->ip_mask.v4_addrs.src_ip; + fsp->m_u.tcp_ip4_spec.ip4dst = rule->ip_mask.v4_addrs.dst_ip; + fsp->m_u.tcp_ip4_spec.psrc = rule->ip_mask.src_port; + fsp->m_u.tcp_ip4_spec.pdst = rule->ip_mask.dst_port; + fsp->m_u.tcp_ip4_spec.tos = rule->ip_mask.tos; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + fsp->h_u.ah_ip4_spec.ip4src = rule->ip_data.v4_addrs.src_ip; + fsp->h_u.ah_ip4_spec.ip4dst = rule->ip_data.v4_addrs.dst_ip; + fsp->h_u.ah_ip4_spec.spi = rule->ip_data.spi; + fsp->h_u.ah_ip4_spec.tos = rule->ip_data.tos; + fsp->m_u.ah_ip4_spec.ip4src = rule->ip_mask.v4_addrs.src_ip; + fsp->m_u.ah_ip4_spec.ip4dst = rule->ip_mask.v4_addrs.dst_ip; + fsp->m_u.ah_ip4_spec.spi = rule->ip_mask.spi; + fsp->m_u.ah_ip4_spec.tos = rule->ip_mask.tos; + break; + case IPV4_USER_FLOW: + fsp->h_u.usr_ip4_spec.ip4src = rule->ip_data.v4_addrs.src_ip; + fsp->h_u.usr_ip4_spec.ip4dst = rule->ip_data.v4_addrs.dst_ip; + fsp->h_u.usr_ip4_spec.l4_4_bytes = rule->ip_data.l4_header; + fsp->h_u.usr_ip4_spec.tos = rule->ip_data.tos; + fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4; + fsp->h_u.usr_ip4_spec.proto = rule->ip_data.proto; + fsp->m_u.usr_ip4_spec.ip4src = rule->ip_mask.v4_addrs.src_ip; + fsp->m_u.usr_ip4_spec.ip4dst = rule->ip_mask.v4_addrs.dst_ip; + fsp->m_u.usr_ip4_spec.l4_4_bytes = rule->ip_mask.l4_header; + fsp->m_u.usr_ip4_spec.tos = rule->ip_mask.tos; + fsp->m_u.usr_ip4_spec.ip_ver = 0xFF; + fsp->m_u.usr_ip4_spec.proto = rule->ip_mask.proto; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + memcpy(fsp->h_u.usr_ip6_spec.ip6src, &rule->ip_data.v6_addrs.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->h_u.usr_ip6_spec.ip6dst, &rule->ip_data.v6_addrs.dst_ip, + sizeof(struct in6_addr)); + fsp->h_u.tcp_ip6_spec.psrc = rule->ip_data.src_port; + fsp->h_u.tcp_ip6_spec.pdst = rule->ip_data.dst_port; + fsp->h_u.tcp_ip6_spec.tclass = rule->ip_data.tclass; + memcpy(fsp->m_u.usr_ip6_spec.ip6src, &rule->ip_mask.v6_addrs.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->m_u.usr_ip6_spec.ip6dst, &rule->ip_mask.v6_addrs.dst_ip, + sizeof(struct in6_addr)); + fsp->m_u.tcp_ip6_spec.psrc = rule->ip_mask.src_port; + fsp->m_u.tcp_ip6_spec.pdst = rule->ip_mask.dst_port; + fsp->m_u.tcp_ip6_spec.tclass = rule->ip_mask.tclass; + break; + case AH_V6_FLOW: + case ESP_V6_FLOW: + memcpy(fsp->h_u.ah_ip6_spec.ip6src, &rule->ip_data.v6_addrs.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->h_u.ah_ip6_spec.ip6dst, &rule->ip_data.v6_addrs.dst_ip, + sizeof(struct in6_addr)); + fsp->h_u.ah_ip6_spec.spi = rule->ip_data.spi; + fsp->h_u.ah_ip6_spec.tclass = rule->ip_data.tclass; + memcpy(fsp->m_u.ah_ip6_spec.ip6src, &rule->ip_mask.v6_addrs.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->m_u.ah_ip6_spec.ip6dst, &rule->ip_mask.v6_addrs.dst_ip, + sizeof(struct in6_addr)); + fsp->m_u.ah_ip6_spec.spi = rule->ip_mask.spi; + fsp->m_u.ah_ip6_spec.tclass = rule->ip_mask.tclass; + break; + case IPV6_USER_FLOW: + memcpy(fsp->h_u.usr_ip6_spec.ip6src, &rule->ip_data.v6_addrs.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->h_u.usr_ip6_spec.ip6dst, &rule->ip_data.v6_addrs.dst_ip, + sizeof(struct in6_addr)); + fsp->h_u.usr_ip6_spec.l4_4_bytes = rule->ip_data.l4_header; + fsp->h_u.usr_ip6_spec.tclass = rule->ip_data.tclass; + fsp->h_u.usr_ip6_spec.l4_proto = rule->ip_data.proto; + memcpy(fsp->m_u.usr_ip6_spec.ip6src, &rule->ip_mask.v6_addrs.src_ip, + sizeof(struct in6_addr)); + memcpy(fsp->m_u.usr_ip6_spec.ip6dst, &rule->ip_mask.v6_addrs.dst_ip, + sizeof(struct in6_addr)); + fsp->m_u.usr_ip6_spec.l4_4_bytes = rule->ip_mask.l4_header; + fsp->m_u.usr_ip6_spec.tclass = rule->ip_mask.tclass; + fsp->m_u.usr_ip6_spec.l4_proto = rule->ip_mask.proto; + break; + case ETHER_FLOW: + fsp->h_u.ether_spec.h_proto = rule->eth_data.etype; + fsp->m_u.ether_spec.h_proto = rule->eth_mask.etype; + break; + default: + ret = -EINVAL; + break; + } + + iavf_fill_rx_flow_ext_data(fsp, rule); + + if (rule->action == VIRTCHNL_ACTION_DROP) + fsp->ring_cookie = RX_CLS_FLOW_DISC; + else + fsp->ring_cookie = rule->q_index; + +release_lock: + spin_unlock_bh(&adapter->fdir_fltr_lock); + return ret; +} + +/** + * iavf_get_fdir_fltr_ids - fill buffer with filter IDs of active filters + * @adapter: the VF adapter structure containing the filter list + * @cmd: ethtool command data structure + * @rule_locs: ethtool array passed in from OS to receive filter IDs + * + * Returns 0 as expected for success by ethtool + */ +static int +iavf_get_fdir_fltr_ids(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct iavf_fdir_fltr *fltr; + unsigned int cnt = 0; + int val = 0; + + if (!FDIR_FLTR_SUPPORT(adapter)) + return -EOPNOTSUPP; + + cmd->data = IAVF_MAX_FDIR_FILTERS; + + spin_lock_bh(&adapter->fdir_fltr_lock); + + list_for_each_entry(fltr, &adapter->fdir_list_head, list) { + if (cnt == cmd->rule_cnt) { + val = -EMSGSIZE; + goto release_lock; + } + rule_locs[cnt] = fltr->loc; + cnt++; + } + +release_lock: + spin_unlock_bh(&adapter->fdir_fltr_lock); + if (!val) + cmd->rule_cnt = cnt; + + return val; +} + +/** + * iavf_add_fdir_fltr_info - Set the input set for Flow Director filter + * @adapter: pointer to the VF adapter structure + * @fsp: pointer to ethtool Rx flow specification + * @fltr: filter structure + */ +static int +iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spec *fsp, + struct iavf_fdir_fltr *fltr) +{ + u32 flow_type, q_index = 0; + enum virtchnl_action act; + int err; + + if (fsp->ring_cookie == RX_CLS_FLOW_DISC) { + act = VIRTCHNL_ACTION_DROP; + } else { + q_index = fsp->ring_cookie; + if (q_index >= adapter->num_active_queues) + return -EINVAL; + + act = VIRTCHNL_ACTION_QUEUE; + } + + fltr->action = act; + fltr->loc = fsp->location; + fltr->q_index = q_index; + + if (fsp->flow_type & FLOW_EXT) { + memcpy(fltr->ext_data.usr_def, fsp->h_ext.data, + sizeof(fltr->ext_data.usr_def)); + memcpy(fltr->ext_mask.usr_def, fsp->m_ext.data, + sizeof(fltr->ext_mask.usr_def)); + } + + flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); + fltr->flow_type = iavf_ethtool_flow_to_fltr(flow_type); + + switch (flow_type) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + fltr->ip_data.v4_addrs.src_ip = fsp->h_u.tcp_ip4_spec.ip4src; + fltr->ip_data.v4_addrs.dst_ip = fsp->h_u.tcp_ip4_spec.ip4dst; + fltr->ip_data.src_port = fsp->h_u.tcp_ip4_spec.psrc; + fltr->ip_data.dst_port = fsp->h_u.tcp_ip4_spec.pdst; + fltr->ip_data.tos = fsp->h_u.tcp_ip4_spec.tos; + fltr->ip_mask.v4_addrs.src_ip = fsp->m_u.tcp_ip4_spec.ip4src; + fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.tcp_ip4_spec.ip4dst; + fltr->ip_mask.src_port = fsp->m_u.tcp_ip4_spec.psrc; + fltr->ip_mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst; + fltr->ip_mask.tos = fsp->m_u.tcp_ip4_spec.tos; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + fltr->ip_data.v4_addrs.src_ip = fsp->h_u.ah_ip4_spec.ip4src; + fltr->ip_data.v4_addrs.dst_ip = fsp->h_u.ah_ip4_spec.ip4dst; + fltr->ip_data.spi = fsp->h_u.ah_ip4_spec.spi; + fltr->ip_data.tos = fsp->h_u.ah_ip4_spec.tos; + fltr->ip_mask.v4_addrs.src_ip = fsp->m_u.ah_ip4_spec.ip4src; + fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.ah_ip4_spec.ip4dst; + fltr->ip_mask.spi = fsp->m_u.ah_ip4_spec.spi; + fltr->ip_mask.tos = fsp->m_u.ah_ip4_spec.tos; + break; + case IPV4_USER_FLOW: + fltr->ip_data.v4_addrs.src_ip = fsp->h_u.usr_ip4_spec.ip4src; + fltr->ip_data.v4_addrs.dst_ip = fsp->h_u.usr_ip4_spec.ip4dst; + fltr->ip_data.l4_header = fsp->h_u.usr_ip4_spec.l4_4_bytes; + fltr->ip_data.tos = fsp->h_u.usr_ip4_spec.tos; + fltr->ip_data.proto = fsp->h_u.usr_ip4_spec.proto; + fltr->ip_mask.v4_addrs.src_ip = fsp->m_u.usr_ip4_spec.ip4src; + fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.usr_ip4_spec.ip4dst; + fltr->ip_mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes; + fltr->ip_mask.tos = fsp->m_u.usr_ip4_spec.tos; + fltr->ip_mask.proto = fsp->m_u.usr_ip4_spec.proto; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&fltr->ip_data.v6_addrs.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + fltr->ip_data.src_port = fsp->h_u.tcp_ip6_spec.psrc; + fltr->ip_data.dst_port = fsp->h_u.tcp_ip6_spec.pdst; + fltr->ip_data.tclass = fsp->h_u.tcp_ip6_spec.tclass; + memcpy(&fltr->ip_mask.v6_addrs.src_ip, fsp->m_u.usr_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&fltr->ip_mask.v6_addrs.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + fltr->ip_mask.src_port = fsp->m_u.tcp_ip6_spec.psrc; + fltr->ip_mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst; + fltr->ip_mask.tclass = fsp->m_u.tcp_ip6_spec.tclass; + break; + case AH_V6_FLOW: + case ESP_V6_FLOW: + memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.ah_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&fltr->ip_data.v6_addrs.dst_ip, fsp->h_u.ah_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + fltr->ip_data.spi = fsp->h_u.ah_ip6_spec.spi; + fltr->ip_data.tclass = fsp->h_u.ah_ip6_spec.tclass; + memcpy(&fltr->ip_mask.v6_addrs.src_ip, fsp->m_u.ah_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&fltr->ip_mask.v6_addrs.dst_ip, fsp->m_u.ah_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + fltr->ip_mask.spi = fsp->m_u.ah_ip6_spec.spi; + fltr->ip_mask.tclass = fsp->m_u.ah_ip6_spec.tclass; + break; + case IPV6_USER_FLOW: + memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&fltr->ip_data.v6_addrs.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + fltr->ip_data.l4_header = fsp->h_u.usr_ip6_spec.l4_4_bytes; + fltr->ip_data.tclass = fsp->h_u.usr_ip6_spec.tclass; + fltr->ip_data.proto = fsp->h_u.usr_ip6_spec.l4_proto; + memcpy(&fltr->ip_mask.v6_addrs.src_ip, fsp->m_u.usr_ip6_spec.ip6src, + sizeof(struct in6_addr)); + memcpy(&fltr->ip_mask.v6_addrs.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst, + sizeof(struct in6_addr)); + fltr->ip_mask.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes; + fltr->ip_mask.tclass = fsp->m_u.usr_ip6_spec.tclass; + fltr->ip_mask.proto = fsp->m_u.usr_ip6_spec.l4_proto; + break; + case ETHER_FLOW: + fltr->eth_data.etype = fsp->h_u.ether_spec.h_proto; + fltr->eth_mask.etype = fsp->m_u.ether_spec.h_proto; + break; + default: + /* not doing un-parsed flow types */ + return -EINVAL; + } + + if (iavf_fdir_is_dup_fltr(adapter, fltr)) + return -EEXIST; + + err = iavf_parse_rx_flow_user_data(fsp, fltr); + if (err) + return err; + + return iavf_fill_fdir_add_msg(adapter, fltr); +} + +/** + * iavf_add_fdir_ethtool - add Flow Director filter + * @adapter: pointer to the VF adapter structure + * @cmd: command to add Flow Director filter + * + * Returns 0 on success and negative values for failure + */ +static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = &cmd->fs; + struct iavf_fdir_fltr *fltr; + int count = 50; + int err; + + if (!FDIR_FLTR_SUPPORT(adapter)) + return -EOPNOTSUPP; + + if (fsp->flow_type & FLOW_MAC_EXT) + return -EINVAL; + + if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) { + dev_err(&adapter->pdev->dev, + "Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n", + IAVF_MAX_FDIR_FILTERS); + return -ENOSPC; + } + + spin_lock_bh(&adapter->fdir_fltr_lock); + if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) { + dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n"); + spin_unlock_bh(&adapter->fdir_fltr_lock); + return -EEXIST; + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + + fltr = kzalloc(sizeof(*fltr), GFP_KERNEL); + if (!fltr) + return -ENOMEM; + + while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, + &adapter->crit_section)) { + if (--count == 0) { + kfree(fltr); + return -EINVAL; + } + udelay(1); + } + + err = iavf_add_fdir_fltr_info(adapter, fsp, fltr); + if (err) + goto ret; + + spin_lock_bh(&adapter->fdir_fltr_lock); + iavf_fdir_list_add_fltr(adapter, fltr); + adapter->fdir_active_fltr++; + fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; + spin_unlock_bh(&adapter->fdir_fltr_lock); + + mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); + +ret: + if (err && fltr) + kfree(fltr); + + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + return err; +} + +/** + * iavf_del_fdir_ethtool - delete Flow Director filter + * @adapter: pointer to the VF adapter structure + * @cmd: command to delete Flow Director filter + * + * Returns 0 on success and negative values for failure + */ +static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; + struct iavf_fdir_fltr *fltr = NULL; + int err = 0; + + if (!FDIR_FLTR_SUPPORT(adapter)) + return -EOPNOTSUPP; + + spin_lock_bh(&adapter->fdir_fltr_lock); + fltr = iavf_find_fdir_fltr_by_loc(adapter, fsp->location); + if (fltr) { + if (fltr->state == IAVF_FDIR_FLTR_ACTIVE) { + fltr->state = IAVF_FDIR_FLTR_DEL_REQUEST; + adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER; + } else { + err = -EBUSY; + } + } else if (adapter->fdir_active_fltr) { + err = -EINVAL; + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + + if (fltr && fltr->state == IAVF_FDIR_FLTR_DEL_REQUEST) + mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); + + return err; +} + +/** + * iavf_adv_rss_parse_hdrs - parses headers from RSS hash input + * @cmd: ethtool rxnfc command + * + * This function parses the rxnfc command and returns intended + * header types for RSS configuration + */ +static u32 iavf_adv_rss_parse_hdrs(struct ethtool_rxnfc *cmd) +{ + u32 hdrs = IAVF_ADV_RSS_FLOW_SEG_HDR_NONE; + + switch (cmd->flow_type) { + case TCP_V4_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_TCP | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4; + break; + case UDP_V4_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_UDP | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4; + break; + case SCTP_V4_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4; + break; + case TCP_V6_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_TCP | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6; + break; + case UDP_V6_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_UDP | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6; + break; + case SCTP_V6_FLOW: + hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP | + IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6; + break; + default: + break; + } + + return hdrs; +} + +/** + * iavf_adv_rss_parse_hash_flds - parses hash fields from RSS hash input + * @cmd: ethtool rxnfc command + * + * This function parses the rxnfc command and returns intended hash fields for + * RSS configuration + */ +static u64 iavf_adv_rss_parse_hash_flds(struct ethtool_rxnfc *cmd) +{ + u64 hfld = IAVF_ADV_RSS_HASH_INVALID; + + if (cmd->data & RXH_IP_SRC || cmd->data & RXH_IP_DST) { + switch (cmd->flow_type) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case SCTP_V4_FLOW: + if (cmd->data & RXH_IP_SRC) + hfld |= IAVF_ADV_RSS_HASH_FLD_IPV4_SA; + if (cmd->data & RXH_IP_DST) + hfld |= IAVF_ADV_RSS_HASH_FLD_IPV4_DA; + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: + case SCTP_V6_FLOW: + if (cmd->data & RXH_IP_SRC) + hfld |= IAVF_ADV_RSS_HASH_FLD_IPV6_SA; + if (cmd->data & RXH_IP_DST) + hfld |= IAVF_ADV_RSS_HASH_FLD_IPV6_DA; + break; + default: + break; + } + } + + if (cmd->data & RXH_L4_B_0_1 || cmd->data & RXH_L4_B_2_3) { + switch (cmd->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + if (cmd->data & RXH_L4_B_0_1) + hfld |= IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT; + if (cmd->data & RXH_L4_B_2_3) + hfld |= IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT; + break; + case UDP_V4_FLOW: + case UDP_V6_FLOW: + if (cmd->data & RXH_L4_B_0_1) + hfld |= IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT; + if (cmd->data & RXH_L4_B_2_3) + hfld |= IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT; + break; + case SCTP_V4_FLOW: + case SCTP_V6_FLOW: + if (cmd->data & RXH_L4_B_0_1) + hfld |= IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT; + if (cmd->data & RXH_L4_B_2_3) + hfld |= IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT; + break; + default: + break; + } + } + + return hfld; +} + +/** + * iavf_set_adv_rss_hash_opt - Enable/Disable flow types for RSS hash + * @adapter: pointer to the VF adapter structure + * @cmd: ethtool rxnfc command + * + * Returns Success if the flow input set is supported. + */ +static int +iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct iavf_adv_rss *rss_old, *rss_new; + bool rss_new_add = false; + int count = 50, err = 0; + u64 hash_flds; + u32 hdrs; + + if (!ADV_RSS_SUPPORT(adapter)) + return -EOPNOTSUPP; + + hdrs = iavf_adv_rss_parse_hdrs(cmd); + if (hdrs == IAVF_ADV_RSS_FLOW_SEG_HDR_NONE) + return -EINVAL; + + hash_flds = iavf_adv_rss_parse_hash_flds(cmd); + if (hash_flds == IAVF_ADV_RSS_HASH_INVALID) + return -EINVAL; + + rss_new = kzalloc(sizeof(*rss_new), GFP_KERNEL); + if (!rss_new) + return -ENOMEM; + + if (iavf_fill_adv_rss_cfg_msg(&rss_new->cfg_msg, hdrs, hash_flds)) { + kfree(rss_new); + return -EINVAL; + } + + while (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, + &adapter->crit_section)) { + if (--count == 0) { + kfree(rss_new); + return -EINVAL; + } + + udelay(1); + } + + spin_lock_bh(&adapter->adv_rss_lock); + rss_old = iavf_find_adv_rss_cfg_by_hdrs(adapter, hdrs); + if (rss_old) { + if (rss_old->state != IAVF_ADV_RSS_ACTIVE) { + err = -EBUSY; + } else if (rss_old->hash_flds != hash_flds) { + rss_old->state = IAVF_ADV_RSS_ADD_REQUEST; + rss_old->hash_flds = hash_flds; + memcpy(&rss_old->cfg_msg, &rss_new->cfg_msg, + sizeof(rss_new->cfg_msg)); + adapter->aq_required |= IAVF_FLAG_AQ_ADD_ADV_RSS_CFG; + } else { + err = -EEXIST; + } + } else { + rss_new_add = true; + rss_new->state = IAVF_ADV_RSS_ADD_REQUEST; + rss_new->packet_hdrs = hdrs; + rss_new->hash_flds = hash_flds; + list_add_tail(&rss_new->list, &adapter->adv_rss_list_head); + adapter->aq_required |= IAVF_FLAG_AQ_ADD_ADV_RSS_CFG; + } + spin_unlock_bh(&adapter->adv_rss_lock); + + if (!err) + mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); + + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + + if (!rss_new_add) + kfree(rss_new); + + return err; +} + +/** + * iavf_get_adv_rss_hash_opt - Retrieve hash fields for a given flow-type + * @adapter: pointer to the VF adapter structure + * @cmd: ethtool rxnfc command + * + * Returns Success if the flow input set is supported. + */ +static int +iavf_get_adv_rss_hash_opt(struct iavf_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct iavf_adv_rss *rss; + u64 hash_flds; + u32 hdrs; + + if (!ADV_RSS_SUPPORT(adapter)) + return -EOPNOTSUPP; + + cmd->data = 0; + + hdrs = iavf_adv_rss_parse_hdrs(cmd); + if (hdrs == IAVF_ADV_RSS_FLOW_SEG_HDR_NONE) + return -EINVAL; + + spin_lock_bh(&adapter->adv_rss_lock); + rss = iavf_find_adv_rss_cfg_by_hdrs(adapter, hdrs); + if (rss) + hash_flds = rss->hash_flds; + else + hash_flds = IAVF_ADV_RSS_HASH_INVALID; + spin_unlock_bh(&adapter->adv_rss_lock); + + if (hash_flds == IAVF_ADV_RSS_HASH_INVALID) + return -EINVAL; + + if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_SA | + IAVF_ADV_RSS_HASH_FLD_IPV6_SA)) + cmd->data |= (u64)RXH_IP_SRC; + + if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_DA | + IAVF_ADV_RSS_HASH_FLD_IPV6_DA)) + cmd->data |= (u64)RXH_IP_DST; + + if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT | + IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT | + IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT)) + cmd->data |= (u64)RXH_L4_B_0_1; + + if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT | + IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT | + IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT)) + cmd->data |= (u64)RXH_L4_B_2_3; + + return 0; +} + +/** + * iavf_set_rxnfc - command to set Rx flow rules. + * @netdev: network interface device structure + * @cmd: ethtool rxnfc command + * + * Returns 0 for success and negative values for errors + */ +static int iavf_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + ret = iavf_add_fdir_ethtool(adapter, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + ret = iavf_del_fdir_ethtool(adapter, cmd); + break; + case ETHTOOL_SRXFH: + ret = iavf_set_adv_rss_hash_opt(adapter, cmd); + break; + default: + break; + } + + return ret; +} + +/** * iavf_get_rxnfc - command to get RX flow classification rules * @netdev: network interface device structure * @cmd: ethtool rxnfc command @@ -846,9 +1712,21 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, cmd->data = adapter->num_active_queues; ret = 0; break; + case ETHTOOL_GRXCLSRLCNT: + if (!FDIR_FLTR_SUPPORT(adapter)) + break; + cmd->rule_cnt = adapter->fdir_active_fltr; + cmd->data = IAVF_MAX_FDIR_FILTERS; + ret = 0; + break; + case ETHTOOL_GRXCLSRULE: + ret = iavf_get_ethtool_fdir_entry(adapter, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + ret = iavf_get_fdir_fltr_ids(adapter, cmd, (u32 *)rule_locs); + break; case ETHTOOL_GRXFH: - netdev_info(netdev, - "RSS hash info is not available to vf, use pf.\n"); + ret = iavf_get_adv_rss_hash_opt(adapter, cmd); break; default: break; @@ -1025,6 +1903,7 @@ static const struct ethtool_ops iavf_ethtool_ops = { .set_coalesce = iavf_set_coalesce, .get_per_queue_coalesce = iavf_get_per_queue_coalesce, .set_per_queue_coalesce = iavf_set_per_queue_coalesce, + .set_rxnfc = iavf_set_rxnfc, .get_rxnfc = iavf_get_rxnfc, .get_rxfh_indir_size = iavf_get_rxfh_indir_size, .get_rxfh = iavf_get_rxfh, diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c new file mode 100644 index 000000000000..6146203efd84 --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c @@ -0,0 +1,779 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Intel Corporation. */ + +/* flow director ethtool support for iavf */ + +#include "iavf.h" + +#define GTPU_PORT 2152 +#define NAT_T_ESP_PORT 4500 +#define PFCP_PORT 8805 + +static const struct in6_addr ipv6_addr_full_mask = { + .in6_u = { + .u6_addr8 = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + } + } +}; + +/** + * iavf_pkt_udp_no_pay_len - the length of UDP packet without payload + * @fltr: Flow Director filter data structure + */ +static u16 iavf_pkt_udp_no_pay_len(struct iavf_fdir_fltr *fltr) +{ + return sizeof(struct ethhdr) + + (fltr->ip_ver == 4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) + + sizeof(struct udphdr); +} + +/** + * iavf_fill_fdir_gtpu_hdr - fill the GTP-U protocol header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the GTP-U protocol header is set successfully + */ +static int +iavf_fill_fdir_gtpu_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + struct virtchnl_proto_hdr *uhdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1]; + struct virtchnl_proto_hdr *ghdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + struct virtchnl_proto_hdr *ehdr = NULL; /* Extension Header if it exists */ + u16 adj_offs, hdr_offs; + int i; + + VIRTCHNL_SET_PROTO_HDR_TYPE(ghdr, GTPU_IP); + + adj_offs = iavf_pkt_udp_no_pay_len(fltr); + + for (i = 0; i < fltr->flex_cnt; i++) { +#define IAVF_GTPU_HDR_TEID_OFFS0 4 +#define IAVF_GTPU_HDR_TEID_OFFS1 6 +#define IAVF_GTPU_HDR_N_PDU_AND_NEXT_EXTHDR_OFFS 10 +#define IAVF_GTPU_HDR_NEXT_EXTHDR_TYPE_MASK 0x00FF /* skip N_PDU */ +/* PDU Session Container Extension Header (PSC) */ +#define IAVF_GTPU_PSC_EXTHDR_TYPE 0x85 +#define IAVF_GTPU_HDR_PSC_PDU_TYPE_AND_QFI_OFFS 13 +#define IAVF_GTPU_HDR_PSC_PDU_QFI_MASK 0x3F /* skip Type */ +#define IAVF_GTPU_EH_QFI_IDX 1 + + if (fltr->flex_words[i].offset < adj_offs) + return -EINVAL; + + hdr_offs = fltr->flex_words[i].offset - adj_offs; + + switch (hdr_offs) { + case IAVF_GTPU_HDR_TEID_OFFS0: + case IAVF_GTPU_HDR_TEID_OFFS1: { + __be16 *pay_word = (__be16 *)ghdr->buffer; + + pay_word[hdr_offs >> 1] = htons(fltr->flex_words[i].word); + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(ghdr, GTPU_IP, TEID); + } + break; + case IAVF_GTPU_HDR_N_PDU_AND_NEXT_EXTHDR_OFFS: + if ((fltr->flex_words[i].word & + IAVF_GTPU_HDR_NEXT_EXTHDR_TYPE_MASK) != + IAVF_GTPU_PSC_EXTHDR_TYPE) + return -EOPNOTSUPP; + if (!ehdr) + ehdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + VIRTCHNL_SET_PROTO_HDR_TYPE(ehdr, GTPU_EH); + break; + case IAVF_GTPU_HDR_PSC_PDU_TYPE_AND_QFI_OFFS: + if (!ehdr) + return -EINVAL; + ehdr->buffer[IAVF_GTPU_EH_QFI_IDX] = + fltr->flex_words[i].word & + IAVF_GTPU_HDR_PSC_PDU_QFI_MASK; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(ehdr, GTPU_EH, QFI); + break; + default: + return -EINVAL; + } + } + + uhdr->field_selector = 0; /* The PF ignores the UDP header fields */ + + return 0; +} + +/** + * iavf_fill_fdir_pfcp_hdr - fill the PFCP protocol header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the PFCP protocol header is set successfully + */ +static int +iavf_fill_fdir_pfcp_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + struct virtchnl_proto_hdr *uhdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1]; + struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + u16 adj_offs, hdr_offs; + int i; + + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, PFCP); + + adj_offs = iavf_pkt_udp_no_pay_len(fltr); + + for (i = 0; i < fltr->flex_cnt; i++) { +#define IAVF_PFCP_HDR_SFIELD_AND_MSG_TYPE_OFFS 0 + if (fltr->flex_words[i].offset < adj_offs) + return -EINVAL; + + hdr_offs = fltr->flex_words[i].offset - adj_offs; + + switch (hdr_offs) { + case IAVF_PFCP_HDR_SFIELD_AND_MSG_TYPE_OFFS: + hdr->buffer[0] = (fltr->flex_words[i].word >> 8) & 0xff; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, PFCP, S_FIELD); + break; + default: + return -EINVAL; + } + } + + uhdr->field_selector = 0; /* The PF ignores the UDP header fields */ + + return 0; +} + +/** + * iavf_fill_fdir_nat_t_esp_hdr - fill the NAT-T-ESP protocol header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the NAT-T-ESP protocol header is set successfully + */ +static int +iavf_fill_fdir_nat_t_esp_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + struct virtchnl_proto_hdr *uhdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1]; + struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + u16 adj_offs, hdr_offs; + u32 spi = 0; + int i; + + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ESP); + + adj_offs = iavf_pkt_udp_no_pay_len(fltr); + + for (i = 0; i < fltr->flex_cnt; i++) { +#define IAVF_NAT_T_ESP_SPI_OFFS0 0 +#define IAVF_NAT_T_ESP_SPI_OFFS1 2 + if (fltr->flex_words[i].offset < adj_offs) + return -EINVAL; + + hdr_offs = fltr->flex_words[i].offset - adj_offs; + + switch (hdr_offs) { + case IAVF_NAT_T_ESP_SPI_OFFS0: + spi |= fltr->flex_words[i].word << 16; + break; + case IAVF_NAT_T_ESP_SPI_OFFS1: + spi |= fltr->flex_words[i].word; + break; + default: + return -EINVAL; + } + } + + if (!spi) + return -EOPNOTSUPP; /* Not support IKE Header Format with SPI 0 */ + + *(__be32 *)hdr->buffer = htonl(spi); + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, ESP, SPI); + + uhdr->field_selector = 0; /* The PF ignores the UDP header fields */ + + return 0; +} + +/** + * iavf_fill_fdir_udp_flex_pay_hdr - fill the UDP payload header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the UDP payload defined protocol header is set successfully + */ +static int +iavf_fill_fdir_udp_flex_pay_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + int err; + + switch (ntohs(fltr->ip_data.dst_port)) { + case GTPU_PORT: + err = iavf_fill_fdir_gtpu_hdr(fltr, proto_hdrs); + break; + case NAT_T_ESP_PORT: + err = iavf_fill_fdir_nat_t_esp_hdr(fltr, proto_hdrs); + break; + case PFCP_PORT: + err = iavf_fill_fdir_pfcp_hdr(fltr, proto_hdrs); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +/** + * iavf_fill_fdir_ip4_hdr - fill the IPv4 protocol header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the IPv4 protocol header is set successfully + */ +static int +iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + struct iphdr *iph = (struct iphdr *)hdr->buffer; + + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV4); + + if (fltr->ip_mask.tos == U8_MAX) { + iph->tos = fltr->ip_data.tos; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DSCP); + } + + if (fltr->ip_mask.proto == U8_MAX) { + iph->protocol = fltr->ip_data.proto; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, PROT); + } + + if (fltr->ip_mask.v4_addrs.src_ip == htonl(U32_MAX)) { + iph->saddr = fltr->ip_data.v4_addrs.src_ip; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, SRC); + } + + if (fltr->ip_mask.v4_addrs.dst_ip == htonl(U32_MAX)) { + iph->daddr = fltr->ip_data.v4_addrs.dst_ip; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST); + } + + fltr->ip_ver = 4; + + return 0; +} + +/** + * iavf_fill_fdir_ip6_hdr - fill the IPv6 protocol header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the IPv6 protocol header is set successfully + */ +static int +iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + struct ipv6hdr *iph = (struct ipv6hdr *)hdr->buffer; + + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV6); + + if (fltr->ip_mask.tclass == U8_MAX) { + iph->priority = (fltr->ip_data.tclass >> 4) & 0xF; + iph->flow_lbl[0] = (fltr->ip_data.tclass << 4) & 0xF0; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, TC); + } + + if (fltr->ip_mask.proto == U8_MAX) { + iph->nexthdr = fltr->ip_data.proto; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, PROT); + } + + if (!memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_full_mask, + sizeof(struct in6_addr))) { + memcpy(&iph->saddr, &fltr->ip_data.v6_addrs.src_ip, + sizeof(struct in6_addr)); + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, SRC); + } + + if (!memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_full_mask, + sizeof(struct in6_addr))) { + memcpy(&iph->daddr, &fltr->ip_data.v6_addrs.dst_ip, + sizeof(struct in6_addr)); + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST); + } + + fltr->ip_ver = 6; + + return 0; +} + +/** + * iavf_fill_fdir_tcp_hdr - fill the TCP protocol header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the TCP protocol header is set successfully + */ +static int +iavf_fill_fdir_tcp_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + struct tcphdr *tcph = (struct tcphdr *)hdr->buffer; + + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, TCP); + + if (fltr->ip_mask.src_port == htons(U16_MAX)) { + tcph->source = fltr->ip_data.src_port; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, SRC_PORT); + } + + if (fltr->ip_mask.dst_port == htons(U16_MAX)) { + tcph->dest = fltr->ip_data.dst_port; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, DST_PORT); + } + + return 0; +} + +/** + * iavf_fill_fdir_udp_hdr - fill the UDP protocol header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the UDP protocol header is set successfully + */ +static int +iavf_fill_fdir_udp_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + struct udphdr *udph = (struct udphdr *)hdr->buffer; + + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, UDP); + + if (fltr->ip_mask.src_port == htons(U16_MAX)) { + udph->source = fltr->ip_data.src_port; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, SRC_PORT); + } + + if (fltr->ip_mask.dst_port == htons(U16_MAX)) { + udph->dest = fltr->ip_data.dst_port; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, DST_PORT); + } + + if (!fltr->flex_cnt) + return 0; + + return iavf_fill_fdir_udp_flex_pay_hdr(fltr, proto_hdrs); +} + +/** + * iavf_fill_fdir_sctp_hdr - fill the SCTP protocol header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the SCTP protocol header is set successfully + */ +static int +iavf_fill_fdir_sctp_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + struct sctphdr *sctph = (struct sctphdr *)hdr->buffer; + + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, SCTP); + + if (fltr->ip_mask.src_port == htons(U16_MAX)) { + sctph->source = fltr->ip_data.src_port; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, SRC_PORT); + } + + if (fltr->ip_mask.dst_port == htons(U16_MAX)) { + sctph->dest = fltr->ip_data.dst_port; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, DST_PORT); + } + + return 0; +} + +/** + * iavf_fill_fdir_ah_hdr - fill the AH protocol header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the AH protocol header is set successfully + */ +static int +iavf_fill_fdir_ah_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + struct ip_auth_hdr *ah = (struct ip_auth_hdr *)hdr->buffer; + + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, AH); + + if (fltr->ip_mask.spi == htonl(U32_MAX)) { + ah->spi = fltr->ip_data.spi; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, AH, SPI); + } + + return 0; +} + +/** + * iavf_fill_fdir_esp_hdr - fill the ESP protocol header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the ESP protocol header is set successfully + */ +static int +iavf_fill_fdir_esp_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + struct ip_esp_hdr *esph = (struct ip_esp_hdr *)hdr->buffer; + + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ESP); + + if (fltr->ip_mask.spi == htonl(U32_MAX)) { + esph->spi = fltr->ip_data.spi; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, ESP, SPI); + } + + return 0; +} + +/** + * iavf_fill_fdir_l4_hdr - fill the L4 protocol header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the L4 protocol header is set successfully + */ +static int +iavf_fill_fdir_l4_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + struct virtchnl_proto_hdr *hdr; + __be32 *l4_4_data; + + if (!fltr->ip_mask.proto) /* IPv4/IPv6 header only */ + return 0; + + hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + l4_4_data = (__be32 *)hdr->buffer; + + /* L2TPv3 over IP with 'Session ID' */ + if (fltr->ip_data.proto == 115 && fltr->ip_mask.l4_header == htonl(U32_MAX)) { + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, L2TPV3); + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, L2TPV3, SESS_ID); + + *l4_4_data = fltr->ip_data.l4_header; + } else { + return -EOPNOTSUPP; + } + + return 0; +} + +/** + * iavf_fill_fdir_eth_hdr - fill the Ethernet protocol header + * @fltr: Flow Director filter data structure + * @proto_hdrs: Flow Director protocol headers data structure + * + * Returns 0 if the Ethernet protocol header is set successfully + */ +static int +iavf_fill_fdir_eth_hdr(struct iavf_fdir_fltr *fltr, + struct virtchnl_proto_hdrs *proto_hdrs) +{ + struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++]; + struct ethhdr *ehdr = (struct ethhdr *)hdr->buffer; + + VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ETH); + + if (fltr->eth_mask.etype == htons(U16_MAX)) { + if (fltr->eth_data.etype == htons(ETH_P_IP) || + fltr->eth_data.etype == htons(ETH_P_IPV6)) + return -EOPNOTSUPP; + + ehdr->h_proto = fltr->eth_data.etype; + VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, ETH, ETHERTYPE); + } + + return 0; +} + +/** + * iavf_fill_fdir_add_msg - fill the Flow Director filter into virtchnl message + * @adapter: pointer to the VF adapter structure + * @fltr: Flow Director filter data structure + * + * Returns 0 if the add Flow Director virtchnl message is filled successfully + */ +int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr) +{ + struct virtchnl_fdir_add *vc_msg = &fltr->vc_add_msg; + struct virtchnl_proto_hdrs *proto_hdrs; + int err; + + proto_hdrs = &vc_msg->rule_cfg.proto_hdrs; + + err = iavf_fill_fdir_eth_hdr(fltr, proto_hdrs); /* L2 always exists */ + if (err) + return err; + + switch (fltr->flow_type) { + case IAVF_FDIR_FLOW_IPV4_TCP: + err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) | + iavf_fill_fdir_tcp_hdr(fltr, proto_hdrs); + break; + case IAVF_FDIR_FLOW_IPV4_UDP: + err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) | + iavf_fill_fdir_udp_hdr(fltr, proto_hdrs); + break; + case IAVF_FDIR_FLOW_IPV4_SCTP: + err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) | + iavf_fill_fdir_sctp_hdr(fltr, proto_hdrs); + break; + case IAVF_FDIR_FLOW_IPV4_AH: + err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) | + iavf_fill_fdir_ah_hdr(fltr, proto_hdrs); + break; + case IAVF_FDIR_FLOW_IPV4_ESP: + err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) | + iavf_fill_fdir_esp_hdr(fltr, proto_hdrs); + break; + case IAVF_FDIR_FLOW_IPV4_OTHER: + err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) | + iavf_fill_fdir_l4_hdr(fltr, proto_hdrs); + break; + case IAVF_FDIR_FLOW_IPV6_TCP: + err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) | + iavf_fill_fdir_tcp_hdr(fltr, proto_hdrs); + break; + case IAVF_FDIR_FLOW_IPV6_UDP: + err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) | + iavf_fill_fdir_udp_hdr(fltr, proto_hdrs); + break; + case IAVF_FDIR_FLOW_IPV6_SCTP: + err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) | + iavf_fill_fdir_sctp_hdr(fltr, proto_hdrs); + break; + case IAVF_FDIR_FLOW_IPV6_AH: + err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) | + iavf_fill_fdir_ah_hdr(fltr, proto_hdrs); + break; + case IAVF_FDIR_FLOW_IPV6_ESP: + err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) | + iavf_fill_fdir_esp_hdr(fltr, proto_hdrs); + break; + case IAVF_FDIR_FLOW_IPV6_OTHER: + err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) | + iavf_fill_fdir_l4_hdr(fltr, proto_hdrs); + break; + case IAVF_FDIR_FLOW_NON_IP_L2: + break; + default: + err = -EINVAL; + break; + } + + if (err) + return err; + + vc_msg->vsi_id = adapter->vsi.id; + vc_msg->rule_cfg.action_set.count = 1; + vc_msg->rule_cfg.action_set.actions[0].type = fltr->action; + vc_msg->rule_cfg.action_set.actions[0].act_conf.queue.index = fltr->q_index; + + return 0; +} + +/** + * iavf_fdir_flow_proto_name - get the flow protocol name + * @flow_type: Flow Director filter flow type + **/ +static const char *iavf_fdir_flow_proto_name(enum iavf_fdir_flow_type flow_type) +{ + switch (flow_type) { + case IAVF_FDIR_FLOW_IPV4_TCP: + case IAVF_FDIR_FLOW_IPV6_TCP: + return "TCP"; + case IAVF_FDIR_FLOW_IPV4_UDP: + case IAVF_FDIR_FLOW_IPV6_UDP: + return "UDP"; + case IAVF_FDIR_FLOW_IPV4_SCTP: + case IAVF_FDIR_FLOW_IPV6_SCTP: + return "SCTP"; + case IAVF_FDIR_FLOW_IPV4_AH: + case IAVF_FDIR_FLOW_IPV6_AH: + return "AH"; + case IAVF_FDIR_FLOW_IPV4_ESP: + case IAVF_FDIR_FLOW_IPV6_ESP: + return "ESP"; + case IAVF_FDIR_FLOW_IPV4_OTHER: + case IAVF_FDIR_FLOW_IPV6_OTHER: + return "Other"; + case IAVF_FDIR_FLOW_NON_IP_L2: + return "Ethernet"; + default: + return NULL; + } +} + +/** + * iavf_print_fdir_fltr + * @adapter: adapter structure + * @fltr: Flow Director filter to print + * + * Print the Flow Director filter + **/ +void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr) +{ + const char *proto = iavf_fdir_flow_proto_name(fltr->flow_type); + + if (!proto) + return; + + switch (fltr->flow_type) { + case IAVF_FDIR_FLOW_IPV4_TCP: + case IAVF_FDIR_FLOW_IPV4_UDP: + case IAVF_FDIR_FLOW_IPV4_SCTP: + dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI4 src_ip %pI4 %s: dst_port %hu src_port %hu\n", + fltr->loc, + &fltr->ip_data.v4_addrs.dst_ip, + &fltr->ip_data.v4_addrs.src_ip, + proto, + ntohs(fltr->ip_data.dst_port), + ntohs(fltr->ip_data.src_port)); + break; + case IAVF_FDIR_FLOW_IPV4_AH: + case IAVF_FDIR_FLOW_IPV4_ESP: + dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI4 src_ip %pI4 %s: SPI %u\n", + fltr->loc, + &fltr->ip_data.v4_addrs.dst_ip, + &fltr->ip_data.v4_addrs.src_ip, + proto, + ntohl(fltr->ip_data.spi)); + break; + case IAVF_FDIR_FLOW_IPV4_OTHER: + dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI4 src_ip %pI4 proto: %u L4_bytes: 0x%x\n", + fltr->loc, + &fltr->ip_data.v4_addrs.dst_ip, + &fltr->ip_data.v4_addrs.src_ip, + fltr->ip_data.proto, + ntohl(fltr->ip_data.l4_header)); + break; + case IAVF_FDIR_FLOW_IPV6_TCP: + case IAVF_FDIR_FLOW_IPV6_UDP: + case IAVF_FDIR_FLOW_IPV6_SCTP: + dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI6 src_ip %pI6 %s: dst_port %hu src_port %hu\n", + fltr->loc, + &fltr->ip_data.v6_addrs.dst_ip, + &fltr->ip_data.v6_addrs.src_ip, + proto, + ntohs(fltr->ip_data.dst_port), + ntohs(fltr->ip_data.src_port)); + break; + case IAVF_FDIR_FLOW_IPV6_AH: + case IAVF_FDIR_FLOW_IPV6_ESP: + dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI6 src_ip %pI6 %s: SPI %u\n", + fltr->loc, + &fltr->ip_data.v6_addrs.dst_ip, + &fltr->ip_data.v6_addrs.src_ip, + proto, + ntohl(fltr->ip_data.spi)); + break; + case IAVF_FDIR_FLOW_IPV6_OTHER: + dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI6 src_ip %pI6 proto: %u L4_bytes: 0x%x\n", + fltr->loc, + &fltr->ip_data.v6_addrs.dst_ip, + &fltr->ip_data.v6_addrs.src_ip, + fltr->ip_data.proto, + ntohl(fltr->ip_data.l4_header)); + break; + case IAVF_FDIR_FLOW_NON_IP_L2: + dev_info(&adapter->pdev->dev, "Rule ID: %u eth_type: 0x%x\n", + fltr->loc, + ntohs(fltr->eth_data.etype)); + break; + default: + break; + } +} + +/** + * iavf_fdir_is_dup_fltr - test if filter is already in list + * @adapter: pointer to the VF adapter structure + * @fltr: Flow Director filter data structure + * + * Returns true if the filter is found in the list + */ +bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr) +{ + struct iavf_fdir_fltr *tmp; + + list_for_each_entry(tmp, &adapter->fdir_list_head, list) { + if (tmp->flow_type != fltr->flow_type) + continue; + + if (!memcmp(&tmp->eth_data, &fltr->eth_data, + sizeof(fltr->eth_data)) && + !memcmp(&tmp->ip_data, &fltr->ip_data, + sizeof(fltr->ip_data)) && + !memcmp(&tmp->ext_data, &fltr->ext_data, + sizeof(fltr->ext_data))) + return true; + } + + return false; +} + +/** + * iavf_find_fdir_fltr_by_loc - find filter with location + * @adapter: pointer to the VF adapter structure + * @loc: location to find. + * + * Returns pointer to Flow Director filter if found or null + */ +struct iavf_fdir_fltr *iavf_find_fdir_fltr_by_loc(struct iavf_adapter *adapter, u32 loc) +{ + struct iavf_fdir_fltr *rule; + + list_for_each_entry(rule, &adapter->fdir_list_head, list) + if (rule->loc == loc) + return rule; + + return NULL; +} + +/** + * iavf_fdir_list_add_fltr - add a new node to the flow director filter list + * @adapter: pointer to the VF adapter structure + * @fltr: filter node to add to structure + */ +void iavf_fdir_list_add_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr) +{ + struct iavf_fdir_fltr *rule, *parent = NULL; + + list_for_each_entry(rule, &adapter->fdir_list_head, list) { + if (rule->loc >= fltr->loc) + break; + parent = rule; + } + + if (parent) + list_add(&fltr->list, &parent->list); + else + list_add(&fltr->list, &adapter->fdir_list_head); +} diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h new file mode 100644 index 000000000000..33c55c366315 --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021, Intel Corporation. */ + +#ifndef _IAVF_FDIR_H_ +#define _IAVF_FDIR_H_ + +struct iavf_adapter; + +/* State of Flow Director filter */ +enum iavf_fdir_fltr_state_t { + IAVF_FDIR_FLTR_ADD_REQUEST, /* User requests to add filter */ + IAVF_FDIR_FLTR_ADD_PENDING, /* Filter pending add by the PF */ + IAVF_FDIR_FLTR_DEL_REQUEST, /* User requests to delete filter */ + IAVF_FDIR_FLTR_DEL_PENDING, /* Filter pending delete by the PF */ + IAVF_FDIR_FLTR_ACTIVE, /* Filter is active */ +}; + +enum iavf_fdir_flow_type { + /* NONE - used for undef/error */ + IAVF_FDIR_FLOW_NONE = 0, + IAVF_FDIR_FLOW_IPV4_TCP, + IAVF_FDIR_FLOW_IPV4_UDP, + IAVF_FDIR_FLOW_IPV4_SCTP, + IAVF_FDIR_FLOW_IPV4_AH, + IAVF_FDIR_FLOW_IPV4_ESP, + IAVF_FDIR_FLOW_IPV4_OTHER, + IAVF_FDIR_FLOW_IPV6_TCP, + IAVF_FDIR_FLOW_IPV6_UDP, + IAVF_FDIR_FLOW_IPV6_SCTP, + IAVF_FDIR_FLOW_IPV6_AH, + IAVF_FDIR_FLOW_IPV6_ESP, + IAVF_FDIR_FLOW_IPV6_OTHER, + IAVF_FDIR_FLOW_NON_IP_L2, + /* MAX - this must be last and add anything new just above it */ + IAVF_FDIR_FLOW_PTYPE_MAX, +}; + +/* Must not exceed the array element number of '__be32 data[2]' in the ethtool + * 'struct ethtool_rx_flow_spec.m_ext.data[2]' to express the flex-byte (word). + */ +#define IAVF_FLEX_WORD_NUM 2 + +struct iavf_flex_word { + u16 offset; + u16 word; +}; + +struct iavf_ipv4_addrs { + __be32 src_ip; + __be32 dst_ip; +}; + +struct iavf_ipv6_addrs { + struct in6_addr src_ip; + struct in6_addr dst_ip; +}; + +struct iavf_fdir_eth { + __be16 etype; +}; + +struct iavf_fdir_ip { + union { + struct iavf_ipv4_addrs v4_addrs; + struct iavf_ipv6_addrs v6_addrs; + }; + __be16 src_port; + __be16 dst_port; + __be32 l4_header; /* first 4 bytes of the layer 4 header */ + __be32 spi; /* security parameter index for AH/ESP */ + union { + u8 tos; + u8 tclass; + }; + u8 proto; +}; + +struct iavf_fdir_extra { + u32 usr_def[IAVF_FLEX_WORD_NUM]; +}; + +/* bookkeeping of Flow Director filters */ +struct iavf_fdir_fltr { + enum iavf_fdir_fltr_state_t state; + struct list_head list; + + enum iavf_fdir_flow_type flow_type; + + struct iavf_fdir_eth eth_data; + struct iavf_fdir_eth eth_mask; + + struct iavf_fdir_ip ip_data; + struct iavf_fdir_ip ip_mask; + + struct iavf_fdir_extra ext_data; + struct iavf_fdir_extra ext_mask; + + enum virtchnl_action action; + + /* flex byte filter data */ + u8 ip_ver; /* used to adjust the flex offset, 4 : IPv4, 6 : IPv6 */ + u8 flex_cnt; + struct iavf_flex_word flex_words[IAVF_FLEX_WORD_NUM]; + + u32 flow_id; + + u32 loc; /* Rule location inside the flow table */ + u32 q_index; + + struct virtchnl_fdir_add vc_add_msg; +}; + +int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); +void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); +bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); +void iavf_fdir_list_add_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); +struct iavf_fdir_fltr *iavf_find_fdir_fltr_by_loc(struct iavf_adapter *adapter, u32 loc); +#endif /* _IAVF_FDIR_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index dc5b3c06d1e0..e612c24fa384 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -959,8 +959,10 @@ void iavf_down(struct iavf_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct iavf_vlan_filter *vlf; - struct iavf_mac_filter *f; struct iavf_cloud_filter *cf; + struct iavf_fdir_fltr *fdir; + struct iavf_mac_filter *f; + struct iavf_adv_rss *rss; if (adapter->state <= __IAVF_DOWN_PENDING) return; @@ -996,6 +998,19 @@ void iavf_down(struct iavf_adapter *adapter) } spin_unlock_bh(&adapter->cloud_filter_list_lock); + /* remove all Flow Director filters */ + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry(fdir, &adapter->fdir_list_head, list) { + fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST; + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + + /* remove all advance RSS configuration */ + spin_lock_bh(&adapter->adv_rss_lock); + list_for_each_entry(rss, &adapter->adv_rss_list_head, list) + rss->state = IAVF_ADV_RSS_DEL_REQUEST; + spin_unlock_bh(&adapter->adv_rss_lock); + if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) && adapter->state != __IAVF_RESETTING) { /* cancel any current operation */ @@ -1007,6 +1022,8 @@ void iavf_down(struct iavf_adapter *adapter) adapter->aq_required = IAVF_FLAG_AQ_DEL_MAC_FILTER; adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER; adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER; + adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER; + adapter->aq_required |= IAVF_FLAG_AQ_DEL_ADV_RSS_CFG; adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES; } @@ -1629,6 +1646,22 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) iavf_add_cloud_filter(adapter); return 0; } + if (adapter->aq_required & IAVF_FLAG_AQ_ADD_FDIR_FILTER) { + iavf_add_fdir_filter(adapter); + return IAVF_SUCCESS; + } + if (adapter->aq_required & IAVF_FLAG_AQ_DEL_FDIR_FILTER) { + iavf_del_fdir_filter(adapter); + return IAVF_SUCCESS; + } + if (adapter->aq_required & IAVF_FLAG_AQ_ADD_ADV_RSS_CFG) { + iavf_add_adv_rss_cfg(adapter); + return 0; + } + if (adapter->aq_required & IAVF_FLAG_AQ_DEL_ADV_RSS_CFG) { + iavf_del_adv_rss_cfg(adapter); + return 0; + } return -EAGAIN; } @@ -2529,7 +2562,7 @@ validate_bw: } /** - * iavf_validate_channel_config - validate queue mapping info + * iavf_validate_ch_config - validate queue mapping info * @adapter: board private structure * @mqprio_qopt: queue parameters * @@ -3525,6 +3558,8 @@ int iavf_process_config(struct iavf_adapter *adapter) /* Enable cloud filter if ADQ is supported */ if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) hw_features |= NETIF_F_HW_TC; + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_USO) + hw_features |= NETIF_F_GSO_UDP_L4; netdev->hw_features |= hw_features; @@ -3738,10 +3773,14 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&adapter->mac_vlan_list_lock); spin_lock_init(&adapter->cloud_filter_list_lock); + spin_lock_init(&adapter->fdir_fltr_lock); + spin_lock_init(&adapter->adv_rss_lock); INIT_LIST_HEAD(&adapter->mac_filter_list); INIT_LIST_HEAD(&adapter->vlan_filter_list); INIT_LIST_HEAD(&adapter->cloud_filter_list); + INIT_LIST_HEAD(&adapter->fdir_list_head); + INIT_LIST_HEAD(&adapter->adv_rss_list_head); INIT_WORK(&adapter->reset_task, iavf_reset_task); INIT_WORK(&adapter->adminq_task, iavf_adminq_task); @@ -3845,7 +3884,9 @@ static void iavf_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct iavf_adapter *adapter = netdev_priv(netdev); + struct iavf_fdir_fltr *fdir, *fdirtmp; struct iavf_vlan_filter *vlf, *vlftmp; + struct iavf_adv_rss *rss, *rsstmp; struct iavf_mac_filter *f, *ftmp; struct iavf_cloud_filter *cf, *cftmp; struct iavf_hw *hw = &adapter->hw; @@ -3899,8 +3940,6 @@ static void iavf_remove(struct pci_dev *pdev) iounmap(hw->hw_addr); pci_release_regions(pdev); - iavf_free_all_tx_resources(adapter); - iavf_free_all_rx_resources(adapter); iavf_free_queues(adapter); kfree(adapter->vf_res); spin_lock_bh(&adapter->mac_vlan_list_lock); @@ -3926,6 +3965,21 @@ static void iavf_remove(struct pci_dev *pdev) } spin_unlock_bh(&adapter->cloud_filter_list_lock); + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head, list) { + list_del(&fdir->list); + kfree(fdir); + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + + spin_lock_bh(&adapter->adv_rss_lock); + list_for_each_entry_safe(rss, rsstmp, &adapter->adv_rss_list_head, + list) { + list_del(&rss->list); + kfree(rss); + } + spin_unlock_bh(&adapter->adv_rss_lock); + free_netdev(netdev); pci_disable_pcie_error_reporting(pdev); diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index ffaf2742a2e0..3525eab8e9f9 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1905,13 +1905,20 @@ static int iavf_tso(struct iavf_tx_buffer *first, u8 *hdr_len, /* determine offset of inner transport header */ l4_offset = l4.hdr - skb->data; - /* remove payload length from inner checksum */ paylen = skb->len - l4_offset; - csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); - /* compute length of segmentation header */ - *hdr_len = (l4.tcp->doff * 4) + l4_offset; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + /* compute length of UDP segmentation header */ + *hdr_len = (u8)sizeof(l4.udp) + l4_offset; + } else { + csum_replace_by_diff(&l4.tcp->check, + (__force __wsum)htonl(paylen)); + /* compute length of TCP segmentation header */ + *hdr_len = (u8)((l4.tcp->doff * 4) + l4_offset); + } /* pull values out of skb_shinfo */ gso_size = skb_shinfo(skb)->gso_size; @@ -2098,7 +2105,7 @@ static int iavf_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, } /** - * iavf_create_tx_ctx Build the Tx context descriptor + * iavf_create_tx_ctx - Build the Tx context descriptor * @tx_ring: ring to create the descriptor on * @cd_type_cmd_tso_mss: Quad Word 1 * @cd_tunneling: Quad Word 0 - bits 0-31 diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 647e7fde11b4..0eab3c43bdc5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -140,6 +140,9 @@ int iavf_send_vf_config_msg(struct iavf_adapter *adapter) VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | VIRTCHNL_VF_OFFLOAD_REQ_QUEUES | VIRTCHNL_VF_OFFLOAD_ADQ | + VIRTCHNL_VF_OFFLOAD_USO | + VIRTCHNL_VF_OFFLOAD_FDIR_PF | + VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF | VIRTCHNL_VF_CAP_ADV_LINK_SPEED; adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES; @@ -1005,7 +1008,7 @@ iavf_set_adapter_link_speed_from_vpe(struct iavf_adapter *adapter, } /** - * iavf_enable_channel + * iavf_enable_channels * @adapter: adapter structure * * Request that the PF enable channels as specified by @@ -1046,7 +1049,7 @@ void iavf_enable_channels(struct iavf_adapter *adapter) } /** - * iavf_disable_channel + * iavf_disable_channels * @adapter: adapter structure * * Request that the PF disable channels that are configured @@ -1198,6 +1201,200 @@ void iavf_del_cloud_filter(struct iavf_adapter *adapter) } /** + * iavf_add_fdir_filter + * @adapter: the VF adapter structure + * + * Request that the PF add Flow Director filters as specified + * by the user via ethtool. + **/ +void iavf_add_fdir_filter(struct iavf_adapter *adapter) +{ + struct iavf_fdir_fltr *fdir; + struct virtchnl_fdir_add *f; + bool process_fltr = false; + int len; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot add Flow Director filter, command %d pending\n", + adapter->current_op); + return; + } + + len = sizeof(struct virtchnl_fdir_add); + f = kzalloc(len, GFP_KERNEL); + if (!f) + return; + + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry(fdir, &adapter->fdir_list_head, list) { + if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST) { + process_fltr = true; + fdir->state = IAVF_FDIR_FLTR_ADD_PENDING; + memcpy(f, &fdir->vc_add_msg, len); + break; + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + + if (!process_fltr) { + /* prevent iavf_add_fdir_filter() from being called when there + * are no filters to add + */ + adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_FDIR_FILTER; + kfree(f); + return; + } + adapter->current_op = VIRTCHNL_OP_ADD_FDIR_FILTER; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_FDIR_FILTER, (u8 *)f, len); + kfree(f); +} + +/** + * iavf_del_fdir_filter + * @adapter: the VF adapter structure + * + * Request that the PF delete Flow Director filters as specified + * by the user via ethtool. + **/ +void iavf_del_fdir_filter(struct iavf_adapter *adapter) +{ + struct iavf_fdir_fltr *fdir; + struct virtchnl_fdir_del f; + bool process_fltr = false; + int len; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot remove Flow Director filter, command %d pending\n", + adapter->current_op); + return; + } + + len = sizeof(struct virtchnl_fdir_del); + + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry(fdir, &adapter->fdir_list_head, list) { + if (fdir->state == IAVF_FDIR_FLTR_DEL_REQUEST) { + process_fltr = true; + memset(&f, 0, len); + f.vsi_id = fdir->vc_add_msg.vsi_id; + f.flow_id = fdir->flow_id; + fdir->state = IAVF_FDIR_FLTR_DEL_PENDING; + break; + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + + if (!process_fltr) { + adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_FDIR_FILTER; + return; + } + + adapter->current_op = VIRTCHNL_OP_DEL_FDIR_FILTER; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_FDIR_FILTER, (u8 *)&f, len); +} + +/** + * iavf_add_adv_rss_cfg + * @adapter: the VF adapter structure + * + * Request that the PF add RSS configuration as specified + * by the user via ethtool. + **/ +void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter) +{ + struct virtchnl_rss_cfg *rss_cfg; + struct iavf_adv_rss *rss; + bool process_rss = false; + int len; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot add RSS configuration, command %d pending\n", + adapter->current_op); + return; + } + + len = sizeof(struct virtchnl_rss_cfg); + rss_cfg = kzalloc(len, GFP_KERNEL); + if (!rss_cfg) + return; + + spin_lock_bh(&adapter->adv_rss_lock); + list_for_each_entry(rss, &adapter->adv_rss_list_head, list) { + if (rss->state == IAVF_ADV_RSS_ADD_REQUEST) { + process_rss = true; + rss->state = IAVF_ADV_RSS_ADD_PENDING; + memcpy(rss_cfg, &rss->cfg_msg, len); + iavf_print_adv_rss_cfg(adapter, rss, + "Input set change for", + "is pending"); + break; + } + } + spin_unlock_bh(&adapter->adv_rss_lock); + + if (process_rss) { + adapter->current_op = VIRTCHNL_OP_ADD_RSS_CFG; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_RSS_CFG, + (u8 *)rss_cfg, len); + } else { + adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_ADV_RSS_CFG; + } + + kfree(rss_cfg); +} + +/** + * iavf_del_adv_rss_cfg + * @adapter: the VF adapter structure + * + * Request that the PF delete RSS configuration as specified + * by the user via ethtool. + **/ +void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter) +{ + struct virtchnl_rss_cfg *rss_cfg; + struct iavf_adv_rss *rss; + bool process_rss = false; + int len; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot remove RSS configuration, command %d pending\n", + adapter->current_op); + return; + } + + len = sizeof(struct virtchnl_rss_cfg); + rss_cfg = kzalloc(len, GFP_KERNEL); + if (!rss_cfg) + return; + + spin_lock_bh(&adapter->adv_rss_lock); + list_for_each_entry(rss, &adapter->adv_rss_list_head, list) { + if (rss->state == IAVF_ADV_RSS_DEL_REQUEST) { + process_rss = true; + rss->state = IAVF_ADV_RSS_DEL_PENDING; + memcpy(rss_cfg, &rss->cfg_msg, len); + break; + } + } + spin_unlock_bh(&adapter->adv_rss_lock); + + if (process_rss) { + adapter->current_op = VIRTCHNL_OP_DEL_RSS_CFG; + iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_RSS_CFG, + (u8 *)rss_cfg, len); + } else { + adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_ADV_RSS_CFG; + } + + kfree(rss_cfg); +} + +/** * iavf_request_reset * @adapter: adapter structure * @@ -1357,6 +1554,84 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, } } break; + case VIRTCHNL_OP_ADD_FDIR_FILTER: { + struct iavf_fdir_fltr *fdir, *fdir_tmp; + + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry_safe(fdir, fdir_tmp, + &adapter->fdir_list_head, + list) { + if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING) { + dev_info(&adapter->pdev->dev, "Failed to add Flow Director filter, error %s\n", + iavf_stat_str(&adapter->hw, + v_retval)); + iavf_print_fdir_fltr(adapter, fdir); + if (msglen) + dev_err(&adapter->pdev->dev, + "%s\n", msg); + list_del(&fdir->list); + kfree(fdir); + adapter->fdir_active_fltr--; + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + } + break; + case VIRTCHNL_OP_DEL_FDIR_FILTER: { + struct iavf_fdir_fltr *fdir; + + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry(fdir, &adapter->fdir_list_head, + list) { + if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) { + fdir->state = IAVF_FDIR_FLTR_ACTIVE; + dev_info(&adapter->pdev->dev, "Failed to del Flow Director filter, error %s\n", + iavf_stat_str(&adapter->hw, + v_retval)); + iavf_print_fdir_fltr(adapter, fdir); + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + } + break; + case VIRTCHNL_OP_ADD_RSS_CFG: { + struct iavf_adv_rss *rss, *rss_tmp; + + spin_lock_bh(&adapter->adv_rss_lock); + list_for_each_entry_safe(rss, rss_tmp, + &adapter->adv_rss_list_head, + list) { + if (rss->state == IAVF_ADV_RSS_ADD_PENDING) { + iavf_print_adv_rss_cfg(adapter, rss, + "Failed to change the input set for", + NULL); + list_del(&rss->list); + kfree(rss); + } + } + spin_unlock_bh(&adapter->adv_rss_lock); + } + break; + case VIRTCHNL_OP_DEL_RSS_CFG: { + struct iavf_adv_rss *rss; + + spin_lock_bh(&adapter->adv_rss_lock); + list_for_each_entry(rss, &adapter->adv_rss_list_head, + list) { + if (rss->state == IAVF_ADV_RSS_DEL_PENDING) { + rss->state = IAVF_ADV_RSS_ACTIVE; + dev_err(&adapter->pdev->dev, "Failed to delete RSS configuration, error %s\n", + iavf_stat_str(&adapter->hw, + v_retval)); + } + } + spin_unlock_bh(&adapter->adv_rss_lock); + } + break; + case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: + case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: + dev_warn(&adapter->pdev->dev, "Changing VLAN Stripping is not allowed when Port VLAN is configured\n"); + break; default: dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", v_retval, iavf_stat_str(&adapter->hw, v_retval), @@ -1490,6 +1765,87 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, } } break; + case VIRTCHNL_OP_ADD_FDIR_FILTER: { + struct virtchnl_fdir_add *add_fltr = (struct virtchnl_fdir_add *)msg; + struct iavf_fdir_fltr *fdir, *fdir_tmp; + + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry_safe(fdir, fdir_tmp, + &adapter->fdir_list_head, + list) { + if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING) { + if (add_fltr->status == VIRTCHNL_FDIR_SUCCESS) { + dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is added\n", + fdir->loc); + fdir->state = IAVF_FDIR_FLTR_ACTIVE; + fdir->flow_id = add_fltr->flow_id; + } else { + dev_info(&adapter->pdev->dev, "Failed to add Flow Director filter with status: %d\n", + add_fltr->status); + iavf_print_fdir_fltr(adapter, fdir); + list_del(&fdir->list); + kfree(fdir); + adapter->fdir_active_fltr--; + } + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + } + break; + case VIRTCHNL_OP_DEL_FDIR_FILTER: { + struct virtchnl_fdir_del *del_fltr = (struct virtchnl_fdir_del *)msg; + struct iavf_fdir_fltr *fdir, *fdir_tmp; + + spin_lock_bh(&adapter->fdir_fltr_lock); + list_for_each_entry_safe(fdir, fdir_tmp, &adapter->fdir_list_head, + list) { + if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) { + if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS) { + dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is deleted\n", + fdir->loc); + list_del(&fdir->list); + kfree(fdir); + adapter->fdir_active_fltr--; + } else { + fdir->state = IAVF_FDIR_FLTR_ACTIVE; + dev_info(&adapter->pdev->dev, "Failed to delete Flow Director filter with status: %d\n", + del_fltr->status); + iavf_print_fdir_fltr(adapter, fdir); + } + } + } + spin_unlock_bh(&adapter->fdir_fltr_lock); + } + break; + case VIRTCHNL_OP_ADD_RSS_CFG: { + struct iavf_adv_rss *rss; + + spin_lock_bh(&adapter->adv_rss_lock); + list_for_each_entry(rss, &adapter->adv_rss_list_head, list) { + if (rss->state == IAVF_ADV_RSS_ADD_PENDING) { + iavf_print_adv_rss_cfg(adapter, rss, + "Input set change for", + "successful"); + rss->state = IAVF_ADV_RSS_ACTIVE; + } + } + spin_unlock_bh(&adapter->adv_rss_lock); + } + break; + case VIRTCHNL_OP_DEL_RSS_CFG: { + struct iavf_adv_rss *rss, *rss_tmp; + + spin_lock_bh(&adapter->adv_rss_lock); + list_for_each_entry_safe(rss, rss_tmp, + &adapter->adv_rss_list_head, list) { + if (rss->state == IAVF_ADV_RSS_DEL_PENDING) { + list_del(&rss->list); + kfree(rss); + } + } + spin_unlock_bh(&adapter->adv_rss_lock); + } + break; default: if (adapter->current_op && (v_opcode != adapter->current_op)) dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 73da4f71f530..07fe857e9e3a 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -26,7 +26,8 @@ ice-y := ice_main.o \ ice_fw_update.o \ ice_lag.o \ ice_ethtool.o -ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o +ice-$(CONFIG_PCI_IOV) += ice_virtchnl_allowlist.o +ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o ice_virtchnl_fdir.o ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 357706444dd5..e35db3ff583b 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -36,6 +36,7 @@ #include <linux/bpf.h> #include <linux/avf/virtchnl.h> #include <linux/cpu_rmap.h> +#include <linux/dim.h> #include <net/devlink.h> #include <net/ipv6.h> #include <net/xdp_sock.h> @@ -44,6 +45,9 @@ #include <net/gre.h> #include <net/udp_tunnel.h> #include <net/vxlan.h> +#if IS_ENABLED(CONFIG_DCB) +#include <scsi/iscsi_proto.h> +#endif /* CONFIG_DCB */ #include "ice_devids.h" #include "ice_type.h" #include "ice_txrx.h" @@ -73,7 +77,7 @@ #define ICE_MIN_LAN_TXRX_MSIX 1 #define ICE_MIN_LAN_OICR_MSIX 1 #define ICE_MIN_MSIX (ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX) -#define ICE_FDIR_MSIX 1 +#define ICE_FDIR_MSIX 2 #define ICE_NO_VSI 0xffff #define ICE_VSI_MAP_CONTIG 0 #define ICE_VSI_MAP_SCATTER 1 @@ -84,9 +88,12 @@ #define ICE_MAX_LG_RSS_QS 256 #define ICE_RES_VALID_BIT 0x8000 #define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) +/* All VF control VSIs share the same IRQ, so assign a unique ID for them */ +#define ICE_RES_VF_CTRL_VEC_ID (ICE_RES_MISC_VEC_ID - 1) #define ICE_INVAL_Q_INDEX 0xffff #define ICE_INVAL_VFID 256 +#define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */ #define ICE_MAX_RESET_WAIT 20 #define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4) @@ -190,54 +197,58 @@ struct ice_sw { u8 dflt_vsi_ena:1; /* true if above dflt_vsi is enabled */ }; -enum ice_state { - __ICE_TESTING, - __ICE_DOWN, - __ICE_NEEDS_RESTART, - __ICE_PREPARED_FOR_RESET, /* set by driver when prepared */ - __ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */ - __ICE_DCBNL_DEVRESET, /* set by dcbnl devreset */ - __ICE_PFR_REQ, /* set by driver and peers */ - __ICE_CORER_REQ, /* set by driver and peers */ - __ICE_GLOBR_REQ, /* set by driver and peers */ - __ICE_CORER_RECV, /* set by OICR handler */ - __ICE_GLOBR_RECV, /* set by OICR handler */ - __ICE_EMPR_RECV, /* set by OICR handler */ - __ICE_SUSPENDED, /* set on module remove path */ - __ICE_RESET_FAILED, /* set by reset/rebuild */ +enum ice_pf_state { + ICE_TESTING, + ICE_DOWN, + ICE_NEEDS_RESTART, + ICE_PREPARED_FOR_RESET, /* set by driver when prepared */ + ICE_RESET_OICR_RECV, /* set by driver after rcv reset OICR */ + ICE_PFR_REQ, /* set by driver and peers */ + ICE_CORER_REQ, /* set by driver and peers */ + ICE_GLOBR_REQ, /* set by driver and peers */ + ICE_CORER_RECV, /* set by OICR handler */ + ICE_GLOBR_RECV, /* set by OICR handler */ + ICE_EMPR_RECV, /* set by OICR handler */ + ICE_SUSPENDED, /* set on module remove path */ + ICE_RESET_FAILED, /* set by reset/rebuild */ /* When checking for the PF to be in a nominal operating state, the * bits that are grouped at the beginning of the list need to be - * checked. Bits occurring before __ICE_STATE_NOMINAL_CHECK_BITS will + * checked. Bits occurring before ICE_STATE_NOMINAL_CHECK_BITS will * be checked. If you need to add a bit into consideration for nominal * operating state, it must be added before - * __ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position + * ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position * without appropriate consideration. */ - __ICE_STATE_NOMINAL_CHECK_BITS, - __ICE_ADMINQ_EVENT_PENDING, - __ICE_MAILBOXQ_EVENT_PENDING, - __ICE_MDD_EVENT_PENDING, - __ICE_VFLR_EVENT_PENDING, - __ICE_FLTR_OVERFLOW_PROMISC, - __ICE_VF_DIS, - __ICE_CFG_BUSY, - __ICE_SERVICE_SCHED, - __ICE_SERVICE_DIS, - __ICE_FD_FLUSH_REQ, - __ICE_OICR_INTR_DIS, /* Global OICR interrupt disabled */ - __ICE_MDD_VF_PRINT_PENDING, /* set when MDD event handle */ - __ICE_VF_RESETS_DISABLED, /* disable resets during ice_remove */ - __ICE_LINK_DEFAULT_OVERRIDE_PENDING, - __ICE_PHY_INIT_COMPLETE, - __ICE_STATE_NBITS /* must be last */ + ICE_STATE_NOMINAL_CHECK_BITS, + ICE_ADMINQ_EVENT_PENDING, + ICE_MAILBOXQ_EVENT_PENDING, + ICE_MDD_EVENT_PENDING, + ICE_VFLR_EVENT_PENDING, + ICE_FLTR_OVERFLOW_PROMISC, + ICE_VF_DIS, + ICE_CFG_BUSY, + ICE_SERVICE_SCHED, + ICE_SERVICE_DIS, + ICE_FD_FLUSH_REQ, + ICE_OICR_INTR_DIS, /* Global OICR interrupt disabled */ + ICE_MDD_VF_PRINT_PENDING, /* set when MDD event handle */ + ICE_VF_RESETS_DISABLED, /* disable resets during ice_remove */ + ICE_LINK_DEFAULT_OVERRIDE_PENDING, + ICE_PHY_INIT_COMPLETE, + ICE_FD_VF_FLUSH_CTX, /* set at FD Rx IRQ or timeout */ + ICE_STATE_NBITS /* must be last */ }; -enum ice_vsi_flags { - ICE_VSI_FLAG_UMAC_FLTR_CHANGED, - ICE_VSI_FLAG_MMAC_FLTR_CHANGED, - ICE_VSI_FLAG_VLAN_FLTR_CHANGED, - ICE_VSI_FLAG_PROMISC_CHANGED, - ICE_VSI_FLAG_NBITS /* must be last */ +enum ice_vsi_state { + ICE_VSI_DOWN, + ICE_VSI_NEEDS_RESTART, + ICE_VSI_NETDEV_ALLOCD, + ICE_VSI_NETDEV_REGISTERED, + ICE_VSI_UMAC_FLTR_CHANGED, + ICE_VSI_MMAC_FLTR_CHANGED, + ICE_VSI_VLAN_FLTR_CHANGED, + ICE_VSI_PROMISC_CHANGED, + ICE_VSI_STATE_NBITS /* must be last */ }; /* struct that defines a VSI, associated with a dev */ @@ -253,14 +264,12 @@ struct ice_vsi { irqreturn_t (*irq_handler)(int irq, void *data); u64 tx_linearize; - DECLARE_BITMAP(state, __ICE_STATE_NBITS); - DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS); + DECLARE_BITMAP(state, ICE_VSI_STATE_NBITS); unsigned int current_netdev_flags; u32 tx_restart; u32 tx_busy; u32 rx_buf_failed; u32 rx_page_failed; - u32 rx_gro_dropped; u16 num_q_vectors; u16 base_vector; /* IRQ base for OS reserved vectors */ enum ice_vsi_type type; @@ -343,7 +352,7 @@ struct ice_q_vector { u16 reg_idx; u8 num_ring_rx; /* total number of Rx rings in vector */ u8 num_ring_tx; /* total number of Tx rings in vector */ - u8 itr_countdown; /* when 0 should adjust adaptive ITR */ + u8 wb_on_itr:1; /* if true, WB on ITR is enabled */ /* in usecs, need to use ice_intrl_to_usecs_reg() before writing this * value to the device */ @@ -358,6 +367,8 @@ struct ice_q_vector { struct irq_affinity_notify affinity_notify; char name[ICE_INT_NAME_STR_LEN]; + + u16 total_events; /* net_dim(): number of interrupts processed */ } ____cacheline_internodealigned_in_smp; enum ice_pf_flags { @@ -415,7 +426,8 @@ struct ice_pf { u16 num_msix_per_vf; /* used to ratelimit the MDD event logging */ unsigned long last_printed_mdd_jiffies; - DECLARE_BITMAP(state, __ICE_STATE_NBITS); + DECLARE_BITMAP(malvfs, ICE_MAX_VF_COUNT); + DECLARE_BITMAP(state, ICE_STATE_NBITS); DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS); unsigned long *avail_txqs; /* bitmap to track PF Tx queue usage */ unsigned long *avail_rxqs; /* bitmap to track PF Rx queue usage */ @@ -500,7 +512,7 @@ ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | (itr << GLINT_DYN_CTL_ITR_INDX_S); if (vsi) - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state)) return; wr32(hw, GLINT_DYN_CTL(vector), val); } @@ -617,14 +629,16 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi); int ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); -int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); -int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size); +int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size); +int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed); +int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed); void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); const char *ice_stat_str(enum ice_status stat_err); const char *ice_aq_str(enum ice_aq_err aq_err); -bool ice_is_wol_supported(struct ice_pf *pf); +bool ice_is_wol_supported(struct ice_hw *hw); int ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add, bool is_tun); @@ -642,6 +656,7 @@ int ice_fdir_create_dflt_rules(struct ice_pf *pf); int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout, struct ice_rq_event_info *event); int ice_open(struct net_device *netdev); +int ice_open_internal(struct net_device *netdev); int ice_stop(struct net_device *netdev); void ice_service_task_schedule(struct ice_pf *pf); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 80186589153b..5cdfe406af84 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -877,16 +877,18 @@ struct ice_aqc_get_phy_caps { __le16 param0; /* 18.0 - Report qualified modules */ #define ICE_AQC_GET_PHY_RQM BIT(0) - /* 18.1 - 18.2 : Report mode - * 00b - Report NVM capabilities - * 01b - Report topology capabilities - * 10b - Report SW configured + /* 18.1 - 18.3 : Report mode + * 000b - Report NVM capabilities + * 001b - Report topology capabilities + * 010b - Report SW configured + * 100b - Report default capabilities */ -#define ICE_AQC_REPORT_MODE_S 1 -#define ICE_AQC_REPORT_MODE_M (3 << ICE_AQC_REPORT_MODE_S) -#define ICE_AQC_REPORT_NVM_CAP 0 -#define ICE_AQC_REPORT_TOPO_CAP BIT(1) -#define ICE_AQC_REPORT_SW_CFG BIT(2) +#define ICE_AQC_REPORT_MODE_S 1 +#define ICE_AQC_REPORT_MODE_M (7 << ICE_AQC_REPORT_MODE_S) +#define ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA 0 +#define ICE_AQC_REPORT_TOPO_CAP_MEDIA BIT(1) +#define ICE_AQC_REPORT_ACTIVE_CFG BIT(2) +#define ICE_AQC_REPORT_DFLT_CFG BIT(3) __le32 reserved1; __le32 addr_high; __le32 addr_low; @@ -1407,8 +1409,7 @@ struct ice_aqc_nvm_comp_tbl { u8 cvs[]; /* Component Version String */ } __packed; -/* - * Send to PF command (indirect 0x0801) ID is only used by PF +/* Send to PF command (indirect 0x0801) ID is only used by PF * * Send to VF command (indirect 0x0802) ID is only used by PF * @@ -1790,6 +1791,7 @@ struct ice_pkg_ver { }; #define ICE_PKG_NAME_SIZE 32 +#define ICE_SEG_ID_SIZE 28 #define ICE_SEG_NAME_SIZE 28 struct ice_aqc_get_pkg_info { diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 6560acd76c94..88d98c9e5f91 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -581,8 +581,7 @@ void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) return; netdev = vsi->netdev; - if (!netdev || !netdev->rx_cpu_rmap || - netdev->reg_state != NETREG_REGISTERED) + if (!netdev || !netdev->rx_cpu_rmap) return; free_irq_cpu_rmap(netdev->rx_cpu_rmap); @@ -604,8 +603,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) pf = vsi->back; netdev = vsi->netdev; - if (!pf || !netdev || !vsi->num_q_vectors || - vsi->netdev->reg_state != NETREG_REGISTERED) + if (!pf || !netdev || !vsi->num_q_vectors) return -EINVAL; netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, q_vectors %d\n", diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 3124a3bf519a..5985a7e5ca8a 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -113,6 +113,9 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) q_vector->v_idx = v_idx; q_vector->tx.itr_setting = ICE_DFLT_TX_ITR; q_vector->rx.itr_setting = ICE_DFLT_RX_ITR; + q_vector->tx.itr_mode = ITR_DYNAMIC; + q_vector->rx.itr_mode = ITR_DYNAMIC; + if (vsi->type == ICE_VSI_VF) goto out; /* only set affinity_mask if the CPU is online */ @@ -215,6 +218,26 @@ static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc) } /** + * ice_cfg_xps_tx_ring - Configure XPS for a Tx ring + * @ring: The Tx ring to configure + * + * This enables/disables XPS for a given Tx descriptor ring + * based on the TCs enabled for the VSI that ring belongs to. + */ +static void ice_cfg_xps_tx_ring(struct ice_ring *ring) +{ + if (!ring->q_vector || !ring->netdev) + return; + + /* We only initialize XPS once, so as not to overwrite user settings */ + if (test_and_set_bit(ICE_TX_XPS_INIT_DONE, ring->xps_state)) + return; + + netif_set_xps_queue(ring->netdev, &ring->q_vector->affinity_mask, + ring->q_index); +} + +/** * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance * @ring: The Tx ring to configure * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized @@ -275,6 +298,22 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) } /** + * ice_rx_offset - Return expected offset into page to access data + * @rx_ring: Ring we are requesting offset of + * + * Returns the offset value for ring into the data buffer. + */ +static unsigned int ice_rx_offset(struct ice_ring *rx_ring) +{ + if (ice_ring_uses_build_skb(rx_ring)) + return ICE_SKB_PAD; + else if (ice_is_xdp_ena_vsi(rx_ring->vsi)) + return XDP_PACKET_HEADROOM; + + return 0; +} + +/** * ice_setup_rx_ctx - Configure a receive ring context * @ring: The Rx ring to configure * @@ -413,11 +452,15 @@ int ice_setup_rx_ctx(struct ice_ring *ring) else ice_set_ring_build_skb_ena(ring); + ring->rx_offset = ice_rx_offset(ring); + /* init queue specific tail register */ ring->tail = hw->hw_addr + QRX_TAIL(pf_q); writel(0, ring->tail); if (ring->xsk_pool) { + bool ok; + if (!xsk_buff_can_alloc(ring->xsk_pool, num_bufs)) { dev_warn(dev, "XSK buffer pool does not provide enough addresses to fill %d buffers on Rx ring %d\n", num_bufs, ring->q_index); @@ -426,8 +469,8 @@ int ice_setup_rx_ctx(struct ice_ring *ring) return 0; } - err = ice_alloc_rx_bufs_zc(ring, num_bufs); - if (err) + ok = ice_alloc_rx_bufs_zc(ring, num_bufs); + if (!ok) dev_info(dev, "Failed to allocate some buffers on XSK buffer pool enabled Rx ring %d (pf_q %d)\n", ring->q_index, pf_q); return 0; @@ -644,6 +687,9 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, u16 pf_q; u8 tc; + /* Configure XPS */ + ice_cfg_xps_tx_ring(ring); + pf_q = ring->reg_idx; ice_setup_tx_ctx(ring, &tlan_ctx, pf_q); /* copy context contents into the qg_buf */ @@ -697,25 +743,13 @@ void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector) { ice_cfg_itr_gran(hw); - if (q_vector->num_ring_rx) { - struct ice_ring_container *rc = &q_vector->rx; + if (q_vector->num_ring_rx) + ice_write_itr(&q_vector->rx, q_vector->rx.itr_setting); - rc->target_itr = ITR_TO_REG(rc->itr_setting); - rc->next_update = jiffies + 1; - rc->current_itr = rc->target_itr; - wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); - } - - if (q_vector->num_ring_tx) { - struct ice_ring_container *rc = &q_vector->tx; + if (q_vector->num_ring_tx) + ice_write_itr(&q_vector->tx, q_vector->tx.itr_setting); - rc->target_itr = ITR_TO_REG(rc->itr_setting); - rc->next_update = jiffies + 1; - rc->current_itr = rc->target_itr; - wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rc->current_itr) >> ICE_ITR_GRAN_S); - } + ice_write_intrl(q_vector, q_vector->intrl); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 3d9475e222cd..e93b1e40f627 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -158,6 +158,10 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, return ICE_ERR_PARAM; hw = pi->hw; + if (report_mode == ICE_AQC_REPORT_DFLT_CFG && + !ice_fw_supports_report_dflt_cfg(hw)) + return ICE_ERR_PARAM; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps); if (qual_mods) @@ -191,7 +195,7 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, ice_debug(hw, ICE_DBG_LINK, " module_type[2] = 0x%x\n", pcaps->module_type[2]); - if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) { + if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP_MEDIA) { pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low); pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high); memcpy(pi->phy.link_info.module_type, &pcaps->module_type, @@ -717,8 +721,8 @@ static enum ice_status ice_cfg_fw_log(struct ice_hw *hw, bool enable) if (!data) { data = devm_kcalloc(ice_hw_to_dev(hw), - sizeof(*data), ICE_AQC_FW_LOG_ID_MAX, + sizeof(*data), GFP_KERNEL); if (!data) return ICE_ERR_NO_MEMORY; @@ -922,7 +926,8 @@ enum ice_status ice_init_hw(struct ice_hw *hw) /* Initialize port_info struct with PHY capabilities */ status = ice_aq_get_phy_caps(hw->port_info, false, - ICE_AQC_REPORT_TOPO_CAP, pcaps, NULL); + ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, + NULL); devm_kfree(ice_hw_to_dev(hw), pcaps); if (status) dev_warn(ice_hw_to_dev(hw), "Get PHY capabilities failed status = %d, continuing anyway\n", @@ -1293,6 +1298,85 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = { DEFINE_MUTEX(ice_global_cfg_lock_sw); /** + * ice_should_retry_sq_send_cmd + * @opcode: AQ opcode + * + * Decide if we should retry the send command routine for the ATQ, depending + * on the opcode. + */ +static bool ice_should_retry_sq_send_cmd(u16 opcode) +{ + switch (opcode) { + case ice_aqc_opc_get_link_topo: + case ice_aqc_opc_lldp_stop: + case ice_aqc_opc_lldp_start: + case ice_aqc_opc_lldp_filter_ctrl: + return true; + } + + return false; +} + +/** + * ice_sq_send_cmd_retry - send command to Control Queue (ATQ) + * @hw: pointer to the HW struct + * @cq: pointer to the specific Control queue + * @desc: prefilled descriptor describing the command + * @buf: buffer to use for indirect commands (or NULL for direct commands) + * @buf_size: size of buffer for indirect commands (or 0 for direct commands) + * @cd: pointer to command details structure + * + * Retry sending the FW Admin Queue command, multiple times, to the FW Admin + * Queue if the EBUSY AQ error is returned. + */ +static enum ice_status +ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq, + struct ice_aq_desc *desc, void *buf, u16 buf_size, + struct ice_sq_cd *cd) +{ + struct ice_aq_desc desc_cpy; + enum ice_status status; + bool is_cmd_for_retry; + u8 *buf_cpy = NULL; + u8 idx = 0; + u16 opcode; + + opcode = le16_to_cpu(desc->opcode); + is_cmd_for_retry = ice_should_retry_sq_send_cmd(opcode); + memset(&desc_cpy, 0, sizeof(desc_cpy)); + + if (is_cmd_for_retry) { + if (buf) { + buf_cpy = kzalloc(buf_size, GFP_KERNEL); + if (!buf_cpy) + return ICE_ERR_NO_MEMORY; + } + + memcpy(&desc_cpy, desc, sizeof(desc_cpy)); + } + + do { + status = ice_sq_send_cmd(hw, cq, desc, buf, buf_size, cd); + + if (!is_cmd_for_retry || !status || + hw->adminq.sq_last_status != ICE_AQ_RC_EBUSY) + break; + + if (buf_cpy) + memcpy(buf, buf_cpy, buf_size); + + memcpy(desc, &desc_cpy, sizeof(desc_cpy)); + + mdelay(ICE_SQ_SEND_DELAY_TIME_MS); + + } while (++idx < ICE_SQ_SEND_MAX_EXECUTE); + + kfree(buf_cpy); + + return status; +} + +/** * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue * @hw: pointer to the HW struct * @desc: descriptor describing the command @@ -1333,7 +1417,7 @@ ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf, break; } - status = ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd); + status = ice_sq_send_cmd_retry(hw, &hw->adminq, desc, buf, buf_size, cd); if (lock_acquired) mutex_unlock(&ice_global_cfg_lock_sw); @@ -2655,7 +2739,7 @@ enum ice_status ice_update_link_info(struct ice_port_info *pi) if (!pcaps) return ICE_ERR_NO_MEMORY; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, NULL); devm_kfree(ice_hw_to_dev(hw), pcaps); @@ -2815,8 +2899,8 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update) return ICE_ERR_NO_MEMORY; /* Get the current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, - NULL); + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, + pcaps, NULL); if (status) { *aq_failures = ICE_SET_FC_AQ_FAIL_GET; goto out; @@ -2929,17 +3013,6 @@ ice_copy_phy_caps_to_cfg(struct ice_port_info *pi, cfg->link_fec_opt = caps->link_fec_options; cfg->module_compliance_enforcement = caps->module_compliance_enforcement; - - if (ice_fw_supports_link_override(pi->hw)) { - struct ice_link_default_override_tlv tlv; - - if (ice_get_link_default_override(&tlv, pi)) - return; - - if (tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) - cfg->module_compliance_enforcement |= - ICE_LINK_OVERRIDE_STRICT_MODE; - } } /** @@ -2954,16 +3027,21 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, { struct ice_aqc_get_phy_caps_data *pcaps; enum ice_status status; + struct ice_hw *hw; if (!pi || !cfg) return ICE_ERR_BAD_PTR; + hw = pi->hw; + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) return ICE_ERR_NO_MEMORY; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, - NULL); + status = ice_aq_get_phy_caps(pi, false, + (ice_fw_supports_report_dflt_cfg(hw) ? + ICE_AQC_REPORT_DFLT_CFG : + ICE_AQC_REPORT_TOPO_CAP_MEDIA), pcaps, NULL); if (status) goto out; @@ -3002,7 +3080,8 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, break; } - if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(pi->hw)) { + if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(hw) && + !ice_fw_supports_report_dflt_cfg(hw)) { struct ice_link_default_override_tlv tlv; if (ice_get_link_default_override(&tlv, pi)) @@ -3186,7 +3265,7 @@ ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom); cmd = &desc.params.read_write_sff_param; - desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF); + desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD); cmd->lport_num = (u8)(lport & 0xff); cmd->lport_num_valid = (u8)((lport >> 8) & 0x01); cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) & @@ -3206,23 +3285,33 @@ ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, /** * __ice_aq_get_set_rss_lut * @hw: pointer to the hardware structure - * @vsi_id: VSI FW index - * @lut_type: LUT table type - * @lut: pointer to the LUT buffer provided by the caller - * @lut_size: size of the LUT buffer - * @glob_lut_idx: global LUT index + * @params: RSS LUT parameters * @set: set true to set the table, false to get the table * * Internal function to get (0x0B05) or set (0x0B03) RSS look up table */ static enum ice_status -__ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut, - u16 lut_size, u8 glob_lut_idx, bool set) +__ice_aq_get_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *params, bool set) { + u16 flags = 0, vsi_id, lut_type, lut_size, glob_lut_idx, vsi_handle; struct ice_aqc_get_set_rss_lut *cmd_resp; struct ice_aq_desc desc; enum ice_status status; - u16 flags = 0; + u8 *lut; + + if (!params) + return ICE_ERR_PARAM; + + vsi_handle = params->vsi_handle; + lut = params->lut; + + if (!ice_is_vsi_valid(hw, vsi_handle) || !lut) + return ICE_ERR_PARAM; + + lut_size = params->lut_size; + lut_type = params->lut_type; + glob_lut_idx = params->global_lut_id; + vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); cmd_resp = &desc.params.get_set_rss_lut; @@ -3296,43 +3385,27 @@ ice_aq_get_set_rss_lut_exit: /** * ice_aq_get_rss_lut * @hw: pointer to the hardware structure - * @vsi_handle: software VSI handle - * @lut_type: LUT table type - * @lut: pointer to the LUT buffer provided by the caller - * @lut_size: size of the LUT buffer + * @get_params: RSS LUT parameters used to specify which RSS LUT to get * * get the RSS lookup table, PF or VSI type */ enum ice_status -ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, - u8 *lut, u16 lut_size) +ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params) { - if (!ice_is_vsi_valid(hw, vsi_handle) || !lut) - return ICE_ERR_PARAM; - - return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle), - lut_type, lut, lut_size, 0, false); + return __ice_aq_get_set_rss_lut(hw, get_params, false); } /** * ice_aq_set_rss_lut * @hw: pointer to the hardware structure - * @vsi_handle: software VSI handle - * @lut_type: LUT table type - * @lut: pointer to the LUT buffer provided by the caller - * @lut_size: size of the LUT buffer + * @set_params: RSS LUT parameters used to specify how to set the RSS LUT * * set the RSS lookup table, PF or VSI type */ enum ice_status -ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, - u8 *lut, u16 lut_size) +ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params) { - if (!ice_is_vsi_valid(hw, vsi_handle) || !lut) - return ICE_ERR_PARAM; - - return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle), - lut_type, lut, lut_size, 0, true); + return __ice_aq_get_set_rss_lut(hw, set_params, true); } /** @@ -4373,7 +4446,7 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, } /** - * ice_fw_supports_lldp_fltr - check NVM version supports lldp_fltr_ctrl + * ice_fw_supports_lldp_fltr_ctrl - check NVM version supports lldp_fltr_ctrl * @hw: pointer to HW struct */ bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw) @@ -4418,3 +4491,23 @@ ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add) return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); } + +/** + * ice_fw_supports_report_dflt_cfg + * @hw: pointer to the hardware structure + * + * Checks if the firmware supports report default configuration + */ +bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw) +{ + if (hw->api_maj_ver == ICE_FW_API_REPORT_DFLT_CFG_MAJ) { + if (hw->api_min_ver > ICE_FW_API_REPORT_DFLT_CFG_MIN) + return true; + if (hw->api_min_ver == ICE_FW_API_REPORT_DFLT_CFG_MIN && + hw->api_patch >= ICE_FW_API_REPORT_DFLT_CFG_PATCH) + return true; + } else if (hw->api_maj_ver > ICE_FW_API_REPORT_DFLT_CFG_MAJ) { + return true; + } + return false; +} diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index baf4064fcbfe..7a9d2dfb21a2 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -11,6 +11,9 @@ #include "ice_switch.h" #include <linux/avf/virtchnl.h> +#define ICE_SQ_SEND_DELAY_TIME_MS 10 +#define ICE_SQ_SEND_MAX_EXECUTE 3 + enum ice_status ice_init_hw(struct ice_hw *hw); void ice_deinit_hw(struct ice_hw *hw); enum ice_status ice_check_reset(struct ice_hw *hw); @@ -51,11 +54,9 @@ ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, u32 rxq_index); enum ice_status -ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, u8 *lut, - u16 lut_size); +ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params); enum ice_status -ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type, u8 *lut, - u16 lut_size); +ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params); enum ice_status ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_handle, struct ice_aqc_get_set_rss_keys *keys); @@ -178,4 +179,5 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw); enum ice_status ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add); +bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index b2d8a5932b1d..87b33bdd4960 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -892,7 +892,7 @@ static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq) * ice_sq_send_cmd - send command to Control Queue (ATQ) * @hw: pointer to the HW struct * @cq: pointer to the specific Control queue - * @desc: prefilled descriptor describing the command (non DMA mem) + * @desc: prefilled descriptor describing the command * @buf: buffer to use for indirect commands (or NULL for direct commands) * @buf_size: size of buffer for indirect commands (or 0 for direct commands) * @cd: pointer to command details structure @@ -1097,6 +1097,7 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, struct ice_rq_event_info *e, u16 *pending) { u16 ntc = cq->rq.next_to_clean; + enum ice_aq_err rq_last_status; enum ice_status ret_code = 0; struct ice_aq_desc *desc; struct ice_dma_mem *bi; @@ -1130,13 +1131,12 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, desc = ICE_CTL_Q_DESC(cq->rq, ntc); desc_idx = ntc; - cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval); + rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval); flags = le16_to_cpu(desc->flags); if (flags & ICE_AQ_FLAG_ERR) { ret_code = ICE_ERR_AQ_ERROR; ice_debug(hw, ICE_DBG_AQ_MSG, "Control Receive Queue Event 0x%04X received with error 0x%X\n", - le16_to_cpu(desc->opcode), - cq->rq_last_status); + le16_to_cpu(desc->opcode), rq_last_status); } memcpy(&e->desc, desc, sizeof(e->desc)); datalen = le16_to_cpu(desc->datalen); diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index faaa08e8171b..fe75871e48ca 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -14,8 +14,8 @@ (&(((struct ice_aq_desc *)((R).desc_buf.va))[i])) #define ICE_CTL_Q_DESC_UNUSED(R) \ - (u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ - (R)->next_to_clean - (R)->next_to_use - 1) + ((u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ + (R)->next_to_clean - (R)->next_to_use - 1)) /* Defines that help manage the driver vs FW API checks. * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage. @@ -31,8 +31,8 @@ enum ice_ctl_q { ICE_CTL_Q_MAILBOX, }; -/* Control Queue timeout settings - max delay 250ms */ -#define ICE_CTL_Q_SQ_CMD_TIMEOUT 2500 /* Count 2500 times */ +/* Control Queue timeout settings - max delay 1s */ +#define ICE_CTL_Q_SQ_CMD_TIMEOUT 10000 /* Count 10000 times */ #define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */ #define ICE_CTL_Q_ADMIN_INIT_TIMEOUT 10 /* Count 10 times */ #define ICE_CTL_Q_ADMIN_INIT_MSEC 100 /* Check every 100msec */ @@ -83,7 +83,6 @@ struct ice_rq_event_info { /* Control Queue information */ struct ice_ctl_q_info { enum ice_ctl_q qtype; - enum ice_aq_err rq_last_status; /* last status on receive queue */ struct ice_ctl_q_ring rq; /* receive queue */ struct ice_ctl_q_ring sq; /* send queue */ u32 sq_cmd_timeout; /* send queue cmd write back timeout */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index e42727941ef5..849fcf605479 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -738,22 +738,27 @@ ice_aq_get_cee_dcb_cfg(struct ice_hw *hw, /** * ice_cee_to_dcb_cfg * @cee_cfg: pointer to CEE configuration struct - * @dcbcfg: DCB configuration struct + * @pi: port information structure * * Convert CEE configuration from firmware to DCB configuration */ static void ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, - struct ice_dcbx_cfg *dcbcfg) + struct ice_port_info *pi) { u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status); - u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift; - u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio); + u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift, j; u8 i, err, sync, oper, app_index, ice_app_sel_type; + u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio); u16 ice_aqc_cee_app_mask, ice_aqc_cee_app_shift; + struct ice_dcbx_cfg *cmp_dcbcfg, *dcbcfg; u16 ice_app_prot_id_type; - /* CEE PG data to ETS config */ + dcbcfg = &pi->qos_cfg.local_dcbx_cfg; + dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE; + dcbcfg->tlv_status = tlv_status; + + /* CEE PG data */ dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc; /* Note that the FW creates the oper_prio_tc nibbles reversed @@ -780,10 +785,16 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, } } - /* CEE PFC data to ETS config */ + /* CEE PFC data */ dcbcfg->pfc.pfcena = cee_cfg->oper_pfc_en; dcbcfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS; + /* CEE APP TLV data */ + if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING) + cmp_dcbcfg = &pi->qos_cfg.desired_dcbx_cfg; + else + cmp_dcbcfg = &pi->qos_cfg.remote_dcbx_cfg; + app_index = 0; for (i = 0; i < 3; i++) { if (i == 0) { @@ -793,7 +804,7 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FCOE_M; ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FCOE_S; ice_app_sel_type = ICE_APP_SEL_ETHTYPE; - ice_app_prot_id_type = ICE_APP_PROT_ID_FCOE; + ice_app_prot_id_type = ETH_P_FCOE; } else if (i == 1) { /* iSCSI APP */ ice_aqc_cee_status_mask = ICE_AQC_CEE_ISCSI_STATUS_M; @@ -801,7 +812,19 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_ISCSI_M; ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_ISCSI_S; ice_app_sel_type = ICE_APP_SEL_TCPIP; - ice_app_prot_id_type = ICE_APP_PROT_ID_ISCSI; + ice_app_prot_id_type = ISCSI_LISTEN_PORT; + + for (j = 0; j < cmp_dcbcfg->numapps; j++) { + u16 prot_id = cmp_dcbcfg->app[j].prot_id; + u8 sel = cmp_dcbcfg->app[j].selector; + + if (sel == ICE_APP_SEL_TCPIP && + (prot_id == ISCSI_LISTEN_PORT || + prot_id == ICE_APP_PROT_ID_ISCSI_860)) { + ice_app_prot_id_type = prot_id; + break; + } + } } else { /* FIP APP */ ice_aqc_cee_status_mask = ICE_AQC_CEE_FIP_STATUS_M; @@ -809,7 +832,7 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FIP_M; ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FIP_S; ice_app_sel_type = ICE_APP_SEL_ETHTYPE; - ice_app_prot_id_type = ICE_APP_PROT_ID_FIP; + ice_app_prot_id_type = ETH_P_FIP; } status = (tlv_status & ice_aqc_cee_status_mask) >> @@ -834,7 +857,7 @@ ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg, } /** - * ice_get_ieee_dcb_cfg + * ice_get_ieee_or_cee_dcb_cfg * @pi: port information structure * @dcbx_mode: mode of DCBX (IEEE or CEE) * @@ -892,11 +915,8 @@ enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi) ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL); if (!ret) { /* CEE mode */ - dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; - dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_CEE; - dcbx_cfg->tlv_status = le32_to_cpu(cee_cfg.tlv_status); - ice_cee_to_dcb_cfg(&cee_cfg, dcbx_cfg); ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE); + ice_cee_to_dcb_cfg(&cee_cfg, pi); } else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) { /* CEE mode not enabled try querying IEEE data */ dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 1e8f71ffc8ce..df02cffdf209 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -563,7 +563,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked) dcbcfg->numapps = 1; dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE; dcbcfg->app[0].priority = 3; - dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE; + dcbcfg->app[0].prot_id = ETH_P_FCOE; ret = ice_pf_dcb_cfg(pf, dcbcfg, locked); kfree(dcbcfg); diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index 468a63f7eff9..4180f1f35fb8 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -18,12 +18,10 @@ static void ice_dcbnl_devreset(struct net_device *netdev) while (ice_is_reset_in_progress(pf->state)) usleep_range(1000, 2000); - set_bit(__ICE_DCBNL_DEVRESET, pf->state); dev_close(netdev); netdev_state_change(netdev); dev_open(netdev, NULL); netdev_state_change(netdev); - clear_bit(__ICE_DCBNL_DEVRESET, pf->state); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 2dcfa685b763..d9ddd0bcf65f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -60,7 +60,6 @@ static const struct ice_stats ice_gstrings_vsi_stats[] = { ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed), ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), - ICE_VSI_STAT("rx_gro_dropped", rx_gro_dropped), ICE_VSI_STAT("tx_errors", eth_stats.tx_errors), ICE_VSI_STAT("tx_linearize", tx_linearize), ICE_VSI_STAT("tx_busy", tx_busy), @@ -807,7 +806,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, if (eth_test->flags == ETH_TEST_FL_OFFLINE) { netdev_info(netdev, "offline testing starting\n"); - set_bit(__ICE_TESTING, pf->state); + set_bit(ICE_TESTING, pf->state); if (ice_active_vfs(pf)) { dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); @@ -817,7 +816,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, data[ICE_ETH_TEST_LOOP] = 1; data[ICE_ETH_TEST_LINK] = 1; eth_test->flags |= ETH_TEST_FL_FAILED; - clear_bit(__ICE_TESTING, pf->state); + clear_bit(ICE_TESTING, pf->state); goto skip_ol_tests; } /* If the device is online then take it offline */ @@ -838,7 +837,7 @@ ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, data[ICE_ETH_TEST_REG]) eth_test->flags |= ETH_TEST_FL_FAILED; - clear_bit(__ICE_TESTING, pf->state); + clear_bit(ICE_TESTING, pf->state); if (if_running) { int status = ice_open(netdev); @@ -871,68 +870,47 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - char *p = (char *)data; unsigned int i; + u8 *p = data; switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < ICE_VSI_STATS_LEN; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - ice_gstrings_vsi_stats[i].stat_string); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < ICE_VSI_STATS_LEN; i++) + ethtool_sprintf(&p, + ice_gstrings_vsi_stats[i].stat_string); ice_for_each_alloc_txq(vsi, i) { - snprintf(p, ETH_GSTRING_LEN, - "tx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "tx_queue_%u_packets", i); + ethtool_sprintf(&p, "tx_queue_%u_bytes", i); } ice_for_each_alloc_rxq(vsi, i) { - snprintf(p, ETH_GSTRING_LEN, - "rx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "rx_queue_%u_packets", i); + ethtool_sprintf(&p, "rx_queue_%u_bytes", i); } if (vsi->type != ICE_VSI_PF) return; - for (i = 0; i < ICE_PF_STATS_LEN; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - ice_gstrings_pf_stats[i].stat_string); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < ICE_PF_STATS_LEN; i++) + ethtool_sprintf(&p, + ice_gstrings_pf_stats[i].stat_string); for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { - snprintf(p, ETH_GSTRING_LEN, - "tx_priority_%u_xon.nic", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, - "tx_priority_%u_xoff.nic", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "tx_priority_%u_xon.nic", i); + ethtool_sprintf(&p, "tx_priority_%u_xoff.nic", i); } for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { - snprintf(p, ETH_GSTRING_LEN, - "rx_priority_%u_xon.nic", i); - p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, - "rx_priority_%u_xoff.nic", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "rx_priority_%u_xon.nic", i); + ethtool_sprintf(&p, "rx_priority_%u_xoff.nic", i); } break; case ETH_SS_TEST: memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN); break; case ETH_SS_PRIV_FLAGS: - for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) { - snprintf(p, ETH_GSTRING_LEN, "%s", - ice_gstrings_priv_flags[i].name); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) + ethtool_sprintf(&p, ice_gstrings_priv_flags[i].name); break; default: break; @@ -1081,7 +1059,7 @@ ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) if (!caps) return -ENOMEM; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL); if (status) { err = -EAGAIN; @@ -1116,24 +1094,15 @@ static int ice_nway_reset(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - struct ice_port_info *pi; - enum ice_status status; + int err; - pi = vsi->port_info; /* If VSI state is up, then restart autoneg with link up */ - if (!test_bit(__ICE_DOWN, vsi->back->state)) - status = ice_aq_set_link_restart_an(pi, true, NULL); + if (!test_bit(ICE_DOWN, vsi->back->state)) + err = ice_set_link(vsi, true); else - status = ice_aq_set_link_restart_an(pi, false, NULL); + err = ice_set_link(vsi, false); - if (status) { - netdev_info(netdev, "link restart failed, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(pi->hw->adminq.sq_last_status)); - return -EIO; - } - - return 0; + return err; } /** @@ -1475,8 +1444,8 @@ void ice_mask_min_supported_speeds(u64 phy_types_high, u64 *phy_types_low) do { \ if (req_speeds & (aq_link_speed) || \ (!req_speeds && \ - (adv_phy_type_lo & phy_type_mask_lo || \ - adv_phy_type_hi & phy_type_mask_hi))) \ + (advert_phy_type_lo & phy_type_mask_lo || \ + advert_phy_type_hi & phy_type_mask_hi))) \ ethtool_link_ksettings_add_link_mode(ks, advertising,\ ethtool_link_mode); \ } while (0) @@ -1493,10 +1462,10 @@ ice_phy_type_to_ethtool(struct net_device *netdev, struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + u64 advert_phy_type_lo = 0; + u64 advert_phy_type_hi = 0; u64 phy_type_mask_lo = 0; u64 phy_type_mask_hi = 0; - u64 adv_phy_type_lo = 0; - u64 adv_phy_type_hi = 0; u64 phy_types_high = 0; u64 phy_types_low = 0; u16 req_speeds; @@ -1514,28 +1483,35 @@ ice_phy_type_to_ethtool(struct net_device *netdev, * requested by user. */ if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) { - struct ice_link_default_override_tlv *ldo; - - ldo = &pf->link_dflt_override; phy_types_low = le64_to_cpu(pf->nvm_phy_type_lo); phy_types_high = le64_to_cpu(pf->nvm_phy_type_hi); ice_mask_min_supported_speeds(phy_types_high, &phy_types_low); - - /* If override enabled and PHY mask set, then - * Advertising link mode is the intersection of the PHY - * types without media and the override PHY mask. + /* determine advertised modes based on link override only + * if it's supported and if the FW doesn't abstract the + * driver from having to account for link overrides */ - if (ldo->options & ICE_LINK_OVERRIDE_EN && - (ldo->phy_type_low || ldo->phy_type_high)) { - adv_phy_type_lo = - le64_to_cpu(pf->nvm_phy_type_lo) & - ldo->phy_type_low; - adv_phy_type_hi = - le64_to_cpu(pf->nvm_phy_type_hi) & - ldo->phy_type_high; + if (ice_fw_supports_link_override(&pf->hw) && + !ice_fw_supports_report_dflt_cfg(&pf->hw)) { + struct ice_link_default_override_tlv *ldo; + + ldo = &pf->link_dflt_override; + /* If override enabled and PHY mask set, then + * Advertising link mode is the intersection of the PHY + * types without media and the override PHY mask. + */ + if (ldo->options & ICE_LINK_OVERRIDE_EN && + (ldo->phy_type_low || ldo->phy_type_high)) { + advert_phy_type_lo = + le64_to_cpu(pf->nvm_phy_type_lo) & + ldo->phy_type_low; + advert_phy_type_hi = + le64_to_cpu(pf->nvm_phy_type_hi) & + ldo->phy_type_high; + } } } else { + /* strict mode */ phy_types_low = vsi->port_info->phy.phy_type_low; phy_types_high = vsi->port_info->phy.phy_type_high; } @@ -1543,9 +1519,9 @@ ice_phy_type_to_ethtool(struct net_device *netdev, /* If Advertising link mode PHY type is not using override PHY type, * then use PHY type with media. */ - if (!adv_phy_type_lo && !adv_phy_type_hi) { - adv_phy_type_lo = vsi->port_info->phy.phy_type_low; - adv_phy_type_hi = vsi->port_info->phy.phy_type_high; + if (!advert_phy_type_lo && !advert_phy_type_hi) { + advert_phy_type_lo = vsi->port_info->phy.phy_type_low; + advert_phy_type_hi = vsi->port_info->phy.phy_type_high; } ethtool_link_ksettings_zero_link_mode(ks, supported); @@ -2021,7 +1997,7 @@ ice_get_link_ksettings(struct net_device *netdev, return -ENOMEM; status = ice_aq_get_phy_caps(vsi->port_info, false, - ICE_AQC_REPORT_SW_CFG, caps, NULL); + ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL); if (status) { err = -EIO; goto done; @@ -2058,7 +2034,7 @@ ice_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); status = ice_aq_get_phy_caps(vsi->port_info, false, - ICE_AQC_REPORT_TOPO_CAP, caps, NULL); + ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL); if (status) { err = -EIO; goto done; @@ -2225,13 +2201,14 @@ ice_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *ks) { struct ice_netdev_priv *np = netdev_priv(netdev); - struct ethtool_link_ksettings safe_ks, copy_ks; - struct ice_aqc_get_phy_caps_data *abilities; u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT; - u16 adv_link_speed, curr_link_speed, idx; + struct ethtool_link_ksettings copy_ks = *ks; + struct ethtool_link_ksettings safe_ks = {}; + struct ice_aqc_get_phy_caps_data *phy_caps; struct ice_aqc_set_phy_cfg_data config; + u16 adv_link_speed, curr_link_speed; struct ice_pf *pf = np->vsi->back; - struct ice_port_info *p; + struct ice_port_info *pi; u8 autoneg_changed = 0; enum ice_status status; u64 phy_type_high = 0; @@ -2239,46 +2216,37 @@ ice_set_link_ksettings(struct net_device *netdev, int err = 0; bool linkup; - p = np->vsi->port_info; - - if (!p) - return -EOPNOTSUPP; + pi = np->vsi->port_info; - /* Check if this is LAN VSI */ - ice_for_each_vsi(pf, idx) - if (pf->vsi[idx]->type == ICE_VSI_PF) { - if (np->vsi != pf->vsi[idx]) - return -EOPNOTSUPP; - break; - } + if (!pi) + return -EIO; - if (p->phy.media_type != ICE_MEDIA_BASET && - p->phy.media_type != ICE_MEDIA_FIBER && - p->phy.media_type != ICE_MEDIA_BACKPLANE && - p->phy.media_type != ICE_MEDIA_DA && - p->phy.link_info.link_info & ICE_AQ_LINK_UP) + if (pi->phy.media_type != ICE_MEDIA_BASET && + pi->phy.media_type != ICE_MEDIA_FIBER && + pi->phy.media_type != ICE_MEDIA_BACKPLANE && + pi->phy.media_type != ICE_MEDIA_DA && + pi->phy.link_info.link_info & ICE_AQ_LINK_UP) return -EOPNOTSUPP; - abilities = kzalloc(sizeof(*abilities), GFP_KERNEL); - if (!abilities) + phy_caps = kzalloc(sizeof(*phy_caps), GFP_KERNEL); + if (!phy_caps) return -ENOMEM; /* Get the PHY capabilities based on media */ - status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_TOPO_CAP, - abilities, NULL); + if (ice_fw_supports_report_dflt_cfg(pi->hw)) + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG, + phy_caps, NULL); + else + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, + phy_caps, NULL); if (status) { - err = -EAGAIN; + err = -EIO; goto done; } - /* copy the ksettings to copy_ks to avoid modifying the original */ - memcpy(©_ks, ks, sizeof(copy_ks)); - /* save autoneg out of ksettings */ autoneg = copy_ks.base.autoneg; - memset(&safe_ks, 0, sizeof(safe_ks)); - /* Get link modes supported by hardware.*/ ice_phy_type_to_ethtool(netdev, &safe_ks); @@ -2290,7 +2258,7 @@ ice_set_link_ksettings(struct net_device *netdev, __ETHTOOL_LINK_MODE_MASK_NBITS)) { if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n"); - err = -EINVAL; + err = -EOPNOTSUPP; goto done; } @@ -2314,7 +2282,7 @@ ice_set_link_ksettings(struct net_device *netdev, goto done; } - while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) { timeout--; if (!timeout) { err = -EBUSY; @@ -2327,26 +2295,26 @@ ice_set_link_ksettings(struct net_device *netdev, * configuration is initialized during probe from PHY capabilities * software mode, and updated on set PHY configuration. */ - memcpy(&config, &p->phy.curr_user_phy_cfg, sizeof(config)); + config = pi->phy.curr_user_phy_cfg; config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; /* Check autoneg */ - err = ice_setup_autoneg(p, &safe_ks, &config, autoneg, &autoneg_changed, + err = ice_setup_autoneg(pi, &safe_ks, &config, autoneg, &autoneg_changed, netdev); if (err) goto done; /* Call to get the current link speed */ - p->phy.get_link_info = true; - status = ice_get_link_status(p, &linkup); + pi->phy.get_link_info = true; + status = ice_get_link_status(pi, &linkup); if (status) { - err = -EAGAIN; + err = -EIO; goto done; } - curr_link_speed = p->phy.link_info.link_speed; + curr_link_speed = pi->phy.link_info.link_speed; adv_link_speed = ice_ksettings_find_adv_link_speed(ks); /* If speed didn't get set, set it to what it currently is. @@ -2365,7 +2333,7 @@ ice_set_link_ksettings(struct net_device *netdev, } /* save the requested speeds */ - p->phy.link_info.req_speeds = adv_link_speed; + pi->phy.link_info.req_speeds = adv_link_speed; /* set link and auto negotiation so changes take effect */ config.caps |= ICE_AQ_PHY_ENA_LINK; @@ -2373,7 +2341,7 @@ ice_set_link_ksettings(struct net_device *netdev, /* check if there is a PHY type for the requested advertised speed */ if (!(phy_type_low || phy_type_high)) { netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n"); - err = -EAGAIN; + err = -EOPNOTSUPP; goto done; } @@ -2381,9 +2349,9 @@ ice_set_link_ksettings(struct net_device *netdev, * for set PHY configuration */ config.phy_type_high = cpu_to_le64(phy_type_high) & - abilities->phy_type_high; + phy_caps->phy_type_high; config.phy_type_low = cpu_to_le64(phy_type_low) & - abilities->phy_type_low; + phy_caps->phy_type_low; if (!(config.phy_type_high || config.phy_type_low)) { /* If there is no intersection and lenient mode is enabled, then @@ -2397,13 +2365,13 @@ ice_set_link_ksettings(struct net_device *netdev, pf->nvm_phy_type_lo; } else { netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n"); - err = -EAGAIN; + err = -EOPNOTSUPP; goto done; } } /* If link is up put link down */ - if (p->phy.link_info.link_info & ICE_AQ_LINK_UP) { + if (pi->phy.link_info.link_info & ICE_AQ_LINK_UP) { /* Tell the OS link is going down, the link will go * back up when fw says it is ready asynchronously */ @@ -2413,18 +2381,18 @@ ice_set_link_ksettings(struct net_device *netdev, } /* make the aq call */ - status = ice_aq_set_phy_cfg(&pf->hw, p, &config, NULL); + status = ice_aq_set_phy_cfg(&pf->hw, pi, &config, NULL); if (status) { netdev_info(netdev, "Set phy config failed,\n"); - err = -EAGAIN; + err = -EIO; goto done; } /* Save speed request */ - p->phy.curr_user_speed_req = adv_link_speed; + pi->phy.curr_user_speed_req = adv_link_speed; done: - kfree(abilities); - clear_bit(__ICE_CFG_BUSY, pf->state); + kfree(phy_caps); + clear_bit(ICE_CFG_BUSY, pf->state); return err; } @@ -2780,7 +2748,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) if (ice_xsk_any_rx_ring_ena(vsi)) return -EBUSY; - while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) { timeout--; if (!timeout) return -EBUSY; @@ -2907,7 +2875,7 @@ process_link: /* Bring interface down, copy in the new ring info, then restore the * interface. if VSI is up, bring it down and then back up */ - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { + if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { ice_down(vsi); if (tx_rings) { @@ -2959,7 +2927,7 @@ free_tx: } done: - clear_bit(__ICE_CFG_BUSY, pf->state); + clear_bit(ICE_CFG_BUSY, pf->state); return err; } @@ -2993,7 +2961,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) return; /* Get current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (status) goto out; @@ -3060,7 +3028,7 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) return -ENOMEM; /* Get current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (status) { kfree(pcaps); @@ -3078,7 +3046,7 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) } /* If we have link and don't have autoneg */ - if (!test_bit(__ICE_DOWN, pf->state) && + if (!test_bit(ICE_DOWN, pf->state) && !(hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) { /* Send message that it might not necessarily work*/ netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n"); @@ -3161,7 +3129,7 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; - int ret = 0, i; + int err, i; u8 *lut; if (hfunc) @@ -3180,17 +3148,20 @@ ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) if (!lut) return -ENOMEM; - if (ice_get_rss(vsi, key, lut, vsi->rss_table_size)) { - ret = -EIO; + err = ice_get_rss_key(vsi, key); + if (err) + goto out; + + err = ice_get_rss_lut(vsi, lut, vsi->rss_table_size); + if (err) goto out; - } for (i = 0; i < vsi->rss_table_size; i++) indir[i] = (u32)(lut[i]); out: kfree(lut); - return ret; + return err; } /** @@ -3211,7 +3182,7 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct device *dev; - u8 *seed = NULL; + int err; dev = ice_pf_to_dev(pf); if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) @@ -3232,7 +3203,10 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, return -ENOMEM; } memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE); - seed = vsi->rss_hkey_user; + + err = ice_set_rss_key(vsi, vsi->rss_hkey_user); + if (err) + return err; } if (!vsi->rss_lut_user) { @@ -3253,8 +3227,9 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, vsi->rss_size); } - if (ice_set_rss(vsi, seed, vsi->rss_lut_user, vsi->rss_table_size)) - return -EIO; + err = ice_set_rss_lut(vsi, vsi->rss_lut_user, vsi->rss_table_size); + if (err) + return err; return 0; } @@ -3350,10 +3325,9 @@ static int ice_get_valid_rss_size(struct ice_hw *hw, int new_size) static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size) { struct ice_pf *pf = vsi->back; - enum ice_status status; struct device *dev; struct ice_hw *hw; - int err = 0; + int err; u8 *lut; dev = ice_pf_to_dev(pf); @@ -3374,14 +3348,10 @@ static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size) /* create/set RSS LUT */ ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); - status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, - vsi->rss_table_size); - if (status) { - dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n", - ice_stat_str(status), + err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); + if (err) + dev_err(dev, "Cannot set RSS lut, err %d aq_err %s\n", err, ice_aq_str(hw->adminq.sq_last_status)); - err = -EIO; - } kfree(lut); return err; @@ -3472,7 +3442,7 @@ static void ice_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) netdev_warn(netdev, "Wake on LAN is not supported on this interface!\n"); /* Get WoL settings based on the HW capability */ - if (ice_is_wol_supported(pf)) { + if (ice_is_wol_supported(&pf->hw)) { wol->supported = WAKE_MAGIC; wol->wolopts = pf->wol_ena ? WAKE_MAGIC : 0; } else { @@ -3492,7 +3462,7 @@ static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; - if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(pf)) + if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(&pf->hw)) return -EOPNOTSUPP; /* only magic packet is supported */ @@ -3540,13 +3510,13 @@ ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, switch (c_type) { case ICE_RX_CONTAINER: - ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting); - ec->rx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC; + ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc); + ec->rx_coalesce_usecs = rc->itr_setting; ec->rx_coalesce_usecs_high = rc->ring->q_vector->intrl; break; case ICE_TX_CONTAINER: - ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc->itr_setting); - ec->tx_coalesce_usecs = rc->itr_setting & ~ICE_ITR_DYNAMIC; + ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc); + ec->tx_coalesce_usecs = rc->itr_setting; break; default: dev_dbg(ice_pf_to_dev(pf), "Invalid c_type %d\n", c_type); @@ -3664,11 +3634,16 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, ICE_MAX_INTRL); return -EINVAL; } + if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl && + (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) { + netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n", + c_type_str); + return -EINVAL; + } if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) { rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high; - wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx), - ice_intrl_usec_to_reg(ec->rx_coalesce_usecs_high, - pf->hw.intrl_gran)); + ice_write_intrl(rc->ring->q_vector, + ec->rx_coalesce_usecs_high); } use_adaptive_coalesce = ec->use_adaptive_rx_coalesce; @@ -3686,7 +3661,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, return -EINVAL; } - itr_setting = rc->itr_setting & ~ICE_ITR_DYNAMIC; + itr_setting = rc->itr_setting; if (coalesce_usecs != itr_setting && use_adaptive_coalesce) { netdev_info(vsi->netdev, "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n", c_type_str, c_type_str); @@ -3700,12 +3675,18 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, } if (use_adaptive_coalesce) { - rc->itr_setting |= ICE_ITR_DYNAMIC; + rc->itr_mode = ITR_DYNAMIC; } else { - /* save the user set usecs */ + rc->itr_mode = ITR_STATIC; + /* store user facing value how it was set */ rc->itr_setting = coalesce_usecs; - /* device ITR granularity is in 2 usec increments */ - rc->target_itr = ITR_REG_ALIGN(rc->itr_setting); + /* write the change to the register */ + ice_write_itr(rc, coalesce_usecs); + /* force writes to take effect immediately, the flush shouldn't + * be done in the functions above because the intent is for + * them to do lazy writes. + */ + ice_flush(&pf->hw); } return 0; @@ -3767,8 +3748,6 @@ ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting, if (use_adaptive_coalesce) return; - itr_setting = ITR_TO_REG(itr_setting); - if (itr_setting != coalesce_usecs && (coalesce_usecs % 2)) netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n", c_type_str, coalesce_usecs, c_type_str, @@ -3823,7 +3802,6 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, return -EINVAL; set_complete: - return 0; } @@ -3936,30 +3914,33 @@ ice_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, u8 *data) { struct ice_netdev_priv *np = netdev_priv(netdev); +#define SFF_READ_BLOCK_SIZE 8 + u8 value[SFF_READ_BLOCK_SIZE] = { 0 }; u8 addr = ICE_I2C_EEPROM_DEV_ADDR; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; enum ice_status status; bool is_sfp = false; - unsigned int i; + unsigned int i, j; u16 offset = 0; - u8 value = 0; u8 page = 0; - status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, - &value, 1, 0, NULL); - if (status) - return -EIO; - if (!ee || !ee->len || !data) return -EINVAL; - if (value == ICE_MODULE_TYPE_SFP) + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, value, 1, 0, + NULL); + if (status) + return -EIO; + + if (value[0] == ICE_MODULE_TYPE_SFP) is_sfp = true; - for (i = 0; i < ee->len; i++) { + memset(data, 0, ee->len); + for (i = 0; i < ee->len; i += SFF_READ_BLOCK_SIZE) { offset = i + ee->offset; + page = 0; /* Check if we need to access the other memory page */ if (is_sfp) { @@ -3975,11 +3956,37 @@ ice_get_module_eeprom(struct net_device *netdev, } } - status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, !is_sfp, - &value, 1, 0, NULL); - if (status) - value = 0; - data[i] = value; + /* Bit 2 of EEPROM address 0x02 declares upper + * pages are disabled on QSFP modules. + * SFP modules only ever use page 0. + */ + if (page == 0 || !(data[0x2] & 0x4)) { + /* If i2c bus is busy due to slow page change or + * link management access, call can fail. This is normal. + * So we retry this a few times. + */ + for (j = 0; j < 4; j++) { + status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, + !is_sfp, value, + SFF_READ_BLOCK_SIZE, + 0, NULL); + netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%X)\n", + addr, offset, page, is_sfp, + value[0], value[1], value[2], value[3], + value[4], value[5], value[6], value[7], + status); + if (status) { + usleep_range(1500, 2500); + memset(value, 0, SFF_READ_BLOCK_SIZE); + continue; + } + break; + } + + /* Make sure we have enough room for the new block */ + if ((i + SFF_READ_BLOCK_SIZE) < ee->len) + memcpy(data + i, value, SFF_READ_BLOCK_SIZE); + } } return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 192729546bbf..16de603b280c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1452,7 +1452,7 @@ int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd) return -EBUSY; } - if (test_bit(__ICE_FD_FLUSH_REQ, pf->state)) + if (test_bit(ICE_FD_FLUSH_REQ, pf->state)) return -EBUSY; mutex_lock(&hw->fdir_fltr_lock); @@ -1679,6 +1679,10 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd) input->flex_offset = userdata.flex_offset; } + input->cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS; + input->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_THREE; + input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL; + /* input struct is added to the HW filter list */ ice_fdir_update_list_entry(pf, input, fsp->location); diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c index 59c0c6a0f8c5..59ef68f072c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_fdir.c @@ -40,6 +40,204 @@ static const u8 ice_fdir_ipv4_pkt[] = { 0x00, 0x00 }; +static const u8 ice_fdir_udp4_gtpu4_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, + 0x00, 0x4c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00, + 0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00, + 0x00, 0x1c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, +}; + +static const u8 ice_fdir_tcp4_gtpu4_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, + 0x00, 0x58, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00, + 0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00, + 0x00, 0x28, 0x00, 0x00, 0x40, 0x00, 0x40, 0x06, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static const u8 ice_fdir_icmp4_gtpu4_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, + 0x00, 0x4c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00, + 0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00, + 0x00, 0x1c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, +}; + +static const u8 ice_fdir_ipv4_gtpu4_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, + 0x00, 0x44, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00, + 0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00, + 0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, +}; + +static const u8 ice_fdir_ipv4_l2tpv3_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, + 0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static const u8 ice_fdir_ipv6_l2tpv3_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x73, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, +}; + +static const u8 ice_fdir_ipv4_esp_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, + 0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x32, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00 +}; + +static const u8 ice_fdir_ipv6_esp_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x32, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static const u8 ice_fdir_ipv4_ah_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, + 0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x33, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00 +}; + +static const u8 ice_fdir_ipv6_ah_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x33, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static const u8 ice_fdir_ipv4_nat_t_esp_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, + 0x00, 0x1C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x94, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, +}; + +static const u8 ice_fdir_ipv6_nat_t_esp_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00, + 0x00, 0x00, 0x00, 0x08, 0x11, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x94, 0x00, 0x00, 0x00, 0x08, +}; + +static const u8 ice_fdir_ipv4_pfcp_node_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, + 0x00, 0x2C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x22, 0x65, 0x22, 0x65, 0x00, 0x00, + 0x00, 0x00, 0x20, 0x00, 0x00, 0x10, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, +}; + +static const u8 ice_fdir_ipv4_pfcp_session_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00, + 0x00, 0x2C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x22, 0x65, 0x22, 0x65, 0x00, 0x00, + 0x00, 0x00, 0x21, 0x00, 0x00, 0x10, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, +}; + +static const u8 ice_fdir_ipv6_pfcp_node_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00, + 0x00, 0x00, 0x00, 0x18, 0x11, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x65, + 0x22, 0x65, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static const u8 ice_fdir_ipv6_pfcp_session_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00, + 0x00, 0x00, 0x00, 0x18, 0x11, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x65, + 0x22, 0x65, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00, + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +static const u8 ice_fdir_non_ip_l2_pkt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + static const u8 ice_fdir_tcpv6_pkt[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00, @@ -239,6 +437,111 @@ static const struct ice_fdir_base_pkt ice_fdir_pkt[] = { sizeof(ice_fdir_ip4_tun_pkt), ice_fdir_ip4_tun_pkt, }, { + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP, + sizeof(ice_fdir_udp4_gtpu4_pkt), + ice_fdir_udp4_gtpu4_pkt, + sizeof(ice_fdir_udp4_gtpu4_pkt), + ice_fdir_udp4_gtpu4_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP, + sizeof(ice_fdir_tcp4_gtpu4_pkt), + ice_fdir_tcp4_gtpu4_pkt, + sizeof(ice_fdir_tcp4_gtpu4_pkt), + ice_fdir_tcp4_gtpu4_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP, + sizeof(ice_fdir_icmp4_gtpu4_pkt), + ice_fdir_icmp4_gtpu4_pkt, + sizeof(ice_fdir_icmp4_gtpu4_pkt), + ice_fdir_icmp4_gtpu4_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER, + sizeof(ice_fdir_ipv4_gtpu4_pkt), + ice_fdir_ipv4_gtpu4_pkt, + sizeof(ice_fdir_ipv4_gtpu4_pkt), + ice_fdir_ipv4_gtpu4_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3, + sizeof(ice_fdir_ipv4_l2tpv3_pkt), ice_fdir_ipv4_l2tpv3_pkt, + sizeof(ice_fdir_ipv4_l2tpv3_pkt), ice_fdir_ipv4_l2tpv3_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3, + sizeof(ice_fdir_ipv6_l2tpv3_pkt), ice_fdir_ipv6_l2tpv3_pkt, + sizeof(ice_fdir_ipv6_l2tpv3_pkt), ice_fdir_ipv6_l2tpv3_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV4_ESP, + sizeof(ice_fdir_ipv4_esp_pkt), ice_fdir_ipv4_esp_pkt, + sizeof(ice_fdir_ipv4_esp_pkt), ice_fdir_ipv4_esp_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV6_ESP, + sizeof(ice_fdir_ipv6_esp_pkt), ice_fdir_ipv6_esp_pkt, + sizeof(ice_fdir_ipv6_esp_pkt), ice_fdir_ipv6_esp_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV4_AH, + sizeof(ice_fdir_ipv4_ah_pkt), ice_fdir_ipv4_ah_pkt, + sizeof(ice_fdir_ipv4_ah_pkt), ice_fdir_ipv4_ah_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV6_AH, + sizeof(ice_fdir_ipv6_ah_pkt), ice_fdir_ipv6_ah_pkt, + sizeof(ice_fdir_ipv6_ah_pkt), ice_fdir_ipv6_ah_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP, + sizeof(ice_fdir_ipv4_nat_t_esp_pkt), + ice_fdir_ipv4_nat_t_esp_pkt, + sizeof(ice_fdir_ipv4_nat_t_esp_pkt), + ice_fdir_ipv4_nat_t_esp_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP, + sizeof(ice_fdir_ipv6_nat_t_esp_pkt), + ice_fdir_ipv6_nat_t_esp_pkt, + sizeof(ice_fdir_ipv6_nat_t_esp_pkt), + ice_fdir_ipv6_nat_t_esp_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE, + sizeof(ice_fdir_ipv4_pfcp_node_pkt), + ice_fdir_ipv4_pfcp_node_pkt, + sizeof(ice_fdir_ipv4_pfcp_node_pkt), + ice_fdir_ipv4_pfcp_node_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION, + sizeof(ice_fdir_ipv4_pfcp_session_pkt), + ice_fdir_ipv4_pfcp_session_pkt, + sizeof(ice_fdir_ipv4_pfcp_session_pkt), + ice_fdir_ipv4_pfcp_session_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE, + sizeof(ice_fdir_ipv6_pfcp_node_pkt), + ice_fdir_ipv6_pfcp_node_pkt, + sizeof(ice_fdir_ipv6_pfcp_node_pkt), + ice_fdir_ipv6_pfcp_node_pkt, + }, + { + ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION, + sizeof(ice_fdir_ipv6_pfcp_session_pkt), + ice_fdir_ipv6_pfcp_session_pkt, + sizeof(ice_fdir_ipv6_pfcp_session_pkt), + ice_fdir_ipv6_pfcp_session_pkt, + }, + { + ICE_FLTR_PTYPE_NON_IP_L2, + sizeof(ice_fdir_non_ip_l2_pkt), ice_fdir_non_ip_l2_pkt, + sizeof(ice_fdir_non_ip_l2_pkt), ice_fdir_non_ip_l2_pkt, + }, + { ICE_FLTR_PTYPE_NONF_IPV6_TCP, sizeof(ice_fdir_tcpv6_pkt), ice_fdir_tcpv6_pkt, sizeof(ice_fdir_tcp6_tun_pkt), ice_fdir_tcp6_tun_pkt, @@ -374,21 +677,31 @@ ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input, if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DROP_PKT) { fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_YES; fdir_fltr_ctx.qindex = 0; + } else if (input->dest_ctl == + ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER) { + fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO; + fdir_fltr_ctx.qindex = 0; } else { + if (input->dest_ctl == + ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP) + fdir_fltr_ctx.toq = input->q_region; fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO; fdir_fltr_ctx.qindex = input->q_index; } - fdir_fltr_ctx.cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS; + fdir_fltr_ctx.cnt_ena = input->cnt_ena; fdir_fltr_ctx.cnt_index = input->cnt_index; fdir_fltr_ctx.fd_vsi = ice_get_hw_vsi_num(hw, input->dest_vsi); fdir_fltr_ctx.evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE; - fdir_fltr_ctx.toq_prio = 3; + if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER) + fdir_fltr_ctx.toq_prio = 0; + else + fdir_fltr_ctx.toq_prio = 3; fdir_fltr_ctx.pcmd = add ? ICE_FXD_FLTR_QW1_PCMD_ADD : ICE_FXD_FLTR_QW1_PCMD_REMOVE; fdir_fltr_ctx.swap = ICE_FXD_FLTR_QW1_SWAP_NOT_SET; fdir_fltr_ctx.comp_q = ICE_FXD_FLTR_QW0_COMP_Q_ZERO; - fdir_fltr_ctx.comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL; - fdir_fltr_ctx.fdid_prio = 3; + fdir_fltr_ctx.comp_report = input->comp_report; + fdir_fltr_ctx.fdid_prio = input->fdid_prio; fdir_fltr_ctx.desc_prof = 1; fdir_fltr_ctx.desc_prof_prio = 3; ice_set_fd_desc_val(&fdir_fltr_ctx, fdesc); @@ -471,6 +784,55 @@ static void ice_pkt_insert_ipv6_addr(u8 *pkt, int offset, __be32 *addr) } /** + * ice_pkt_insert_u6_qfi - insert a u6 value QFI into a memory buffer for GTPU + * @pkt: packet buffer + * @offset: offset into buffer + * @data: 8 bit value to convert and insert into pkt at offset + * + * This function is designed for inserting QFI (6 bits) for GTPU. + */ +static void ice_pkt_insert_u6_qfi(u8 *pkt, int offset, u8 data) +{ + u8 ret; + + ret = (data & 0x3F) + (*(pkt + offset) & 0xC0); + memcpy(pkt + offset, &ret, sizeof(ret)); +} + +/** + * ice_pkt_insert_u8 - insert a u8 value into a memory buffer. + * @pkt: packet buffer + * @offset: offset into buffer + * @data: 8 bit value to convert and insert into pkt at offset + */ +static void ice_pkt_insert_u8(u8 *pkt, int offset, u8 data) +{ + memcpy(pkt + offset, &data, sizeof(data)); +} + +/** + * ice_pkt_insert_u8_tc - insert a u8 value into a memory buffer for TC ipv6. + * @pkt: packet buffer + * @offset: offset into buffer + * @data: 8 bit value to convert and insert into pkt at offset + * + * This function is designed for inserting Traffic Class (TC) for IPv6, + * since that TC is not aligned in number of bytes. Here we split it out + * into two part and fill each byte with data copy from pkt, then insert + * the two bytes data one by one. + */ +static void ice_pkt_insert_u8_tc(u8 *pkt, int offset, u8 data) +{ + u8 high, low; + + high = (data >> 4) + (*(pkt + offset) & 0xF0); + memcpy(pkt + offset, &high, sizeof(high)); + + low = (*(pkt + offset + 1) & 0x0F) + ((data & 0x0F) << 4); + memcpy(pkt + offset + 1, &low, sizeof(low)); +} + +/** * ice_pkt_insert_u16 - insert a be16 value into a memory buffer * @pkt: packet buffer * @offset: offset into buffer @@ -493,6 +855,16 @@ static void ice_pkt_insert_u32(u8 *pkt, int offset, __be32 data) } /** + * ice_pkt_insert_mac_addr - insert a MAC addr into a memory buffer. + * @pkt: packet buffer + * @addr: MAC address to convert and insert into pkt at offset + */ +static void ice_pkt_insert_mac_addr(u8 *pkt, u8 *addr) +{ + ether_addr_copy(pkt, addr); +} + +/** * ice_fdir_get_gen_prgm_pkt - generate a training packet * @hw: pointer to the hardware structure * @input: flow director filter data structure @@ -520,11 +892,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, case IPPROTO_SCTP: flow = ICE_FLTR_PTYPE_NONF_IPV4_SCTP; break; - case IPPROTO_IP: + default: flow = ICE_FLTR_PTYPE_NONF_IPV4_OTHER; break; - default: - return ICE_ERR_PARAM; } } else if (input->flow_type == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) { switch (input->ip.v6.proto) { @@ -537,11 +907,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, case IPPROTO_SCTP: flow = ICE_FLTR_PTYPE_NONF_IPV6_SCTP; break; - case IPPROTO_IP: + default: flow = ICE_FLTR_PTYPE_NONF_IPV6_OTHER; break; - default: - return ICE_ERR_PARAM; } } else { flow = input->flow_type; @@ -580,6 +948,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, input->ip.v4.dst_ip); ice_pkt_insert_u16(loc, ICE_IPV4_TCP_SRC_PORT_OFFSET, input->ip.v4.dst_port); + ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos); + ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl); + ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac); if (frag) loc[20] = ICE_FDIR_IPV4_PKT_FLAG_DF; break; @@ -592,6 +963,11 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, input->ip.v4.dst_ip); ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET, input->ip.v4.dst_port); + ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos); + ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl); + ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac); + ice_pkt_insert_mac_addr(loc + ETH_ALEN, + input->ext_data.src_mac); break; case ICE_FLTR_PTYPE_NONF_IPV4_SCTP: ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET, @@ -602,13 +978,87 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, input->ip.v4.dst_ip); ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_SRC_PORT_OFFSET, input->ip.v4.dst_port); + ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos); + ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl); + ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac); break; case ICE_FLTR_PTYPE_NONF_IPV4_OTHER: ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET, input->ip.v4.src_ip); ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET, input->ip.v4.dst_ip); - ice_pkt_insert_u16(loc, ICE_IPV4_PROTO_OFFSET, 0); + ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos); + ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl); + ice_pkt_insert_u8(loc, ICE_IPV4_PROTO_OFFSET, + input->ip.v4.proto); + ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP: + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP: + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP: + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER: + ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET, + input->ip.v4.src_ip); + ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET, + input->ip.v4.dst_ip); + ice_pkt_insert_u32(loc, ICE_IPV4_GTPU_TEID_OFFSET, + input->gtpu_data.teid); + ice_pkt_insert_u6_qfi(loc, ICE_IPV4_GTPU_QFI_OFFSET, + input->gtpu_data.qfi); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3: + ice_pkt_insert_u32(loc, ICE_IPV4_L2TPV3_SESS_ID_OFFSET, + input->l2tpv3_data.session_id); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3: + ice_pkt_insert_u32(loc, ICE_IPV6_L2TPV3_SESS_ID_OFFSET, + input->l2tpv3_data.session_id); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_ESP: + ice_pkt_insert_u32(loc, ICE_IPV4_ESP_SPI_OFFSET, + input->ip.v4.sec_parm_idx); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_ESP: + ice_pkt_insert_u32(loc, ICE_IPV6_ESP_SPI_OFFSET, + input->ip.v6.sec_parm_idx); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_AH: + ice_pkt_insert_u32(loc, ICE_IPV4_AH_SPI_OFFSET, + input->ip.v4.sec_parm_idx); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_AH: + ice_pkt_insert_u32(loc, ICE_IPV6_AH_SPI_OFFSET, + input->ip.v6.sec_parm_idx); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP: + ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET, + input->ip.v4.src_ip); + ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET, + input->ip.v4.dst_ip); + ice_pkt_insert_u32(loc, ICE_IPV4_NAT_T_ESP_SPI_OFFSET, + input->ip.v4.sec_parm_idx); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP: + ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET, + input->ip.v6.src_ip); + ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET, + input->ip.v6.dst_ip); + ice_pkt_insert_u32(loc, ICE_IPV6_NAT_T_ESP_SPI_OFFSET, + input->ip.v6.sec_parm_idx); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE: + case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION: + ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET, + input->ip.v4.dst_port); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE: + case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION: + ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET, + input->ip.v6.dst_port); + break; + case ICE_FLTR_PTYPE_NON_IP_L2: + ice_pkt_insert_u16(loc, ICE_MAC_ETHTYPE_OFFSET, + input->ext_data.ether_type); break; case ICE_FLTR_PTYPE_NONF_IPV6_TCP: ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET, @@ -619,6 +1069,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, input->ip.v6.src_port); ice_pkt_insert_u16(loc, ICE_IPV6_TCP_SRC_PORT_OFFSET, input->ip.v6.dst_port); + ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc); + ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim); + ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac); break; case ICE_FLTR_PTYPE_NONF_IPV6_UDP: ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET, @@ -629,6 +1082,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, input->ip.v6.src_port); ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET, input->ip.v6.dst_port); + ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc); + ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim); + ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac); break; case ICE_FLTR_PTYPE_NONF_IPV6_SCTP: ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET, @@ -639,12 +1095,20 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, input->ip.v6.src_port); ice_pkt_insert_u16(loc, ICE_IPV6_SCTP_SRC_PORT_OFFSET, input->ip.v6.dst_port); + ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc); + ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim); + ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac); break; case ICE_FLTR_PTYPE_NONF_IPV6_OTHER: ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET, input->ip.v6.src_ip); ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET, input->ip.v6.dst_ip); + ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc); + ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim); + ice_pkt_insert_u8(loc, ICE_IPV6_PROTO_OFFSET, + input->ip.v6.proto); + ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac); break; default: return ICE_ERR_PARAM; @@ -671,7 +1135,7 @@ bool ice_fdir_has_frag(enum ice_fltr_ptype flow) } /** - * ice_fdir_find_by_idx - find filter with idx + * ice_fdir_find_fltr_by_idx - find filter with idx * @hw: pointer to hardware structure * @fltr_idx: index to find. * diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h index 1c587766daab..d2d40e18ae8a 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.h +++ b/drivers/net/ethernet/intel/ice/ice_fdir.h @@ -25,6 +25,25 @@ #define ICE_IPV6_UDP_DST_PORT_OFFSET 56 #define ICE_IPV6_SCTP_SRC_PORT_OFFSET 54 #define ICE_IPV6_SCTP_DST_PORT_OFFSET 56 +#define ICE_MAC_ETHTYPE_OFFSET 12 +#define ICE_IPV4_TOS_OFFSET 15 +#define ICE_IPV4_TTL_OFFSET 22 +#define ICE_IPV6_TC_OFFSET 14 +#define ICE_IPV6_HLIM_OFFSET 21 +#define ICE_IPV6_PROTO_OFFSET 20 +#define ICE_IPV4_GTPU_TEID_OFFSET 46 +#define ICE_IPV4_GTPU_QFI_OFFSET 56 +#define ICE_IPV4_L2TPV3_SESS_ID_OFFSET 34 +#define ICE_IPV6_L2TPV3_SESS_ID_OFFSET 54 +#define ICE_IPV4_ESP_SPI_OFFSET 34 +#define ICE_IPV6_ESP_SPI_OFFSET 54 +#define ICE_IPV4_AH_SPI_OFFSET 38 +#define ICE_IPV6_AH_SPI_OFFSET 58 +#define ICE_IPV4_NAT_T_ESP_SPI_OFFSET 42 +#define ICE_IPV6_NAT_T_ESP_SPI_OFFSET 62 + +#define ICE_FDIR_MAX_FLTRS 16384 + /* IP v4 has 2 flag bits that enable fragment processing: DF and MF. DF * requests that the packet not be fragmented. MF indicates that a packet has * been fragmented. @@ -34,6 +53,8 @@ enum ice_fltr_prgm_desc_dest { ICE_FLTR_PRGM_DESC_DEST_DROP_PKT, ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX, + ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP, + ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER, }; enum ice_fltr_prgm_desc_fd_status { @@ -86,6 +107,7 @@ struct ice_fdir_v4 { u8 tos; u8 ip_ver; u8 proto; + u8 ttl; }; #define ICE_IPV6_ADDR_LEN_AS_U32 4 @@ -99,10 +121,35 @@ struct ice_fdir_v6 { __be32 sec_parm_idx; /* security parameter index */ u8 tc; u8 proto; + u8 hlim; +}; + +struct ice_fdir_udp_gtp { + u8 flags; + u8 msg_type; + __be16 rsrvd_len; + __be32 teid; + __be16 rsrvd_seq_nbr; + u8 rsrvd_n_pdu_nbr; + u8 rsrvd_next_ext_type; + u8 rsvrd_ext_len; + u8 pdu_type:4, + spare:4; + u8 ppp:1, + rqi:1, + qfi:6; + u32 rsvrd; + u8 next_ext; +}; + +struct ice_fdir_l2tpv3 { + __be32 session_id; }; struct ice_fdir_extra { u8 dst_mac[ETH_ALEN]; /* dest MAC address */ + u8 src_mac[ETH_ALEN]; /* src MAC address */ + __be16 ether_type; /* for NON_IP_L2 */ u32 usr_def[2]; /* user data */ __be16 vlan_type; /* VLAN ethertype */ __be16 vlan_tag; /* VLAN tag info */ @@ -117,11 +164,19 @@ struct ice_fdir_fltr { struct ice_fdir_v6 v6; } ip, mask; + struct ice_fdir_udp_gtp gtpu_data; + struct ice_fdir_udp_gtp gtpu_mask; + + struct ice_fdir_l2tpv3 l2tpv3_data; + struct ice_fdir_l2tpv3 l2tpv3_mask; + struct ice_fdir_extra ext_data; struct ice_fdir_extra ext_mask; /* flex byte filter data */ __be16 flex_word; + /* queue region size (=2^q_region) */ + u8 q_region; u16 flex_offset; u16 flex_fltr; @@ -129,9 +184,12 @@ struct ice_fdir_fltr { u16 q_index; u16 dest_vsi; u8 dest_ctl; + u8 cnt_ena; u8 fltr_status; u16 cnt_index; u32 fltr_id; + u8 fdid_prio; + u8 comp_report; }; /* Dummy packet filter definition structure */ diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 5e1fd30c0a0f..06ac9badee77 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -334,6 +334,7 @@ ice_boost_tcam_handler(u32 sect_type, void *section, u32 index, u32 *offset) if (sect_type != ICE_SID_RXPARSER_BOOST_TCAM) return NULL; + /* cppcheck-suppress nullPointer */ if (index > ICE_MAX_BST_TCAMS_IN_BUF) return NULL; @@ -404,6 +405,7 @@ ice_label_enum_handler(u32 __always_unused sect_type, void *section, u32 index, if (!section) return NULL; + /* cppcheck-suppress nullPointer */ if (index > ICE_MAX_LABELS_IN_BUF) return NULL; @@ -1063,32 +1065,36 @@ ice_download_pkg(struct ice_hw *hw, struct ice_seg *ice_seg) static enum ice_status ice_init_pkg_info(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr) { - struct ice_global_metadata_seg *meta_seg; struct ice_generic_seg_hdr *seg_hdr; if (!pkg_hdr) return ICE_ERR_PARAM; - meta_seg = (struct ice_global_metadata_seg *) - ice_find_seg_in_pkg(hw, SEGMENT_TYPE_METADATA, pkg_hdr); - if (meta_seg) { - hw->pkg_ver = meta_seg->pkg_ver; - memcpy(hw->pkg_name, meta_seg->pkg_name, sizeof(hw->pkg_name)); + seg_hdr = ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg_hdr); + if (seg_hdr) { + struct ice_meta_sect *meta; + struct ice_pkg_enum state; + + memset(&state, 0, sizeof(state)); + + /* Get package information from the Metadata Section */ + meta = ice_pkg_enum_section((struct ice_seg *)seg_hdr, &state, + ICE_SID_METADATA); + if (!meta) { + ice_debug(hw, ICE_DBG_INIT, "Did not find ice metadata section in package\n"); + return ICE_ERR_CFG; + } + + hw->pkg_ver = meta->ver; + memcpy(hw->pkg_name, meta->name, sizeof(meta->name)); ice_debug(hw, ICE_DBG_PKG, "Pkg: %d.%d.%d.%d, %s\n", - meta_seg->pkg_ver.major, meta_seg->pkg_ver.minor, - meta_seg->pkg_ver.update, meta_seg->pkg_ver.draft, - meta_seg->pkg_name); - } else { - ice_debug(hw, ICE_DBG_INIT, "Did not find metadata segment in driver package\n"); - return ICE_ERR_CFG; - } + meta->ver.major, meta->ver.minor, meta->ver.update, + meta->ver.draft, meta->name); - seg_hdr = ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg_hdr); - if (seg_hdr) { - hw->ice_pkg_ver = seg_hdr->seg_format_ver; - memcpy(hw->ice_pkg_name, seg_hdr->seg_id, - sizeof(hw->ice_pkg_name)); + hw->ice_seg_fmt_ver = seg_hdr->seg_format_ver; + memcpy(hw->ice_seg_id, seg_hdr->seg_id, + sizeof(hw->ice_seg_id)); ice_debug(hw, ICE_DBG_PKG, "Ice Seg: %d.%d.%d.%d, %s\n", seg_hdr->seg_format_ver.major, @@ -2063,6 +2069,7 @@ ice_match_prop_lst(struct list_head *list1, struct list_head *list2) count++; list_for_each_entry(tmp2, list2, list) chk_count++; + /* cppcheck-suppress knownConditionTrueFalse */ if (!count || count != chk_count) return false; @@ -2361,18 +2368,82 @@ ice_vsig_add_mv_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig) } /** - * ice_find_prof_id - find profile ID for a given field vector + * ice_prof_has_mask_idx - determine if profile index masking is identical + * @hw: pointer to the hardware structure + * @blk: HW block + * @prof: profile to check + * @idx: profile index to check + * @mask: mask to match + */ +static bool +ice_prof_has_mask_idx(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 idx, + u16 mask) +{ + bool expect_no_mask = false; + bool found = false; + bool match = false; + u16 i; + + /* If mask is 0x0000 or 0xffff, then there is no masking */ + if (mask == 0 || mask == 0xffff) + expect_no_mask = true; + + /* Scan the enabled masks on this profile, for the specified idx */ + for (i = hw->blk[blk].masks.first; i < hw->blk[blk].masks.first + + hw->blk[blk].masks.count; i++) + if (hw->blk[blk].es.mask_ena[prof] & BIT(i)) + if (hw->blk[blk].masks.masks[i].in_use && + hw->blk[blk].masks.masks[i].idx == idx) { + found = true; + if (hw->blk[blk].masks.masks[i].mask == mask) + match = true; + break; + } + + if (expect_no_mask) { + if (found) + return false; + } else { + if (!match) + return false; + } + + return true; +} + +/** + * ice_prof_has_mask - determine if profile masking is identical + * @hw: pointer to the hardware structure + * @blk: HW block + * @prof: profile to check + * @masks: masks to match + */ +static bool +ice_prof_has_mask(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 *masks) +{ + u16 i; + + /* es->mask_ena[prof] will have the mask */ + for (i = 0; i < hw->blk[blk].es.fvw; i++) + if (!ice_prof_has_mask_idx(hw, blk, prof, i, masks[i])) + return false; + + return true; +} + +/** + * ice_find_prof_id_with_mask - find profile ID for a given field vector * @hw: pointer to the hardware structure * @blk: HW block * @fv: field vector to search for + * @masks: masks for FV * @prof_id: receives the profile ID */ static enum ice_status -ice_find_prof_id(struct ice_hw *hw, enum ice_block blk, - struct ice_fv_word *fv, u8 *prof_id) +ice_find_prof_id_with_mask(struct ice_hw *hw, enum ice_block blk, + struct ice_fv_word *fv, u16 *masks, u8 *prof_id) { struct ice_es *es = &hw->blk[blk].es; - u16 off; u8 i; /* For FD, we don't want to re-use a existed profile with the same @@ -2382,11 +2453,15 @@ ice_find_prof_id(struct ice_hw *hw, enum ice_block blk, return ICE_ERR_DOES_NOT_EXIST; for (i = 0; i < (u8)es->count; i++) { - off = i * es->fvw; + u16 off = i * es->fvw; if (memcmp(&es->t[off], fv, es->fvw * sizeof(*fv))) continue; + /* check if masks settings are the same for this profile */ + if (masks && !ice_prof_has_mask(hw, blk, i, masks)) + continue; + *prof_id = i; return 0; } @@ -2438,20 +2513,22 @@ static bool ice_tcam_ent_rsrc_type(enum ice_block blk, u16 *rsrc_type) * ice_alloc_tcam_ent - allocate hardware TCAM entry * @hw: pointer to the HW struct * @blk: the block to allocate the TCAM for + * @btm: true to allocate from bottom of table, false to allocate from top * @tcam_idx: pointer to variable to receive the TCAM entry * * This function allocates a new entry in a Profile ID TCAM for a specific * block. */ static enum ice_status -ice_alloc_tcam_ent(struct ice_hw *hw, enum ice_block blk, u16 *tcam_idx) +ice_alloc_tcam_ent(struct ice_hw *hw, enum ice_block blk, bool btm, + u16 *tcam_idx) { u16 res_type; if (!ice_tcam_ent_rsrc_type(blk, &res_type)) return ICE_ERR_PARAM; - return ice_alloc_hw_res(hw, res_type, 1, true, tcam_idx); + return ice_alloc_hw_res(hw, res_type, 1, btm, tcam_idx); } /** @@ -2537,6 +2614,330 @@ ice_prof_inc_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id) } /** + * ice_write_prof_mask_reg - write profile mask register + * @hw: pointer to the HW struct + * @blk: hardware block + * @mask_idx: mask index + * @idx: index of the FV which will use the mask + * @mask: the 16-bit mask + */ +static void +ice_write_prof_mask_reg(struct ice_hw *hw, enum ice_block blk, u16 mask_idx, + u16 idx, u16 mask) +{ + u32 offset; + u32 val; + + switch (blk) { + case ICE_BLK_RSS: + offset = GLQF_HMASK(mask_idx); + val = (idx << GLQF_HMASK_MSK_INDEX_S) & GLQF_HMASK_MSK_INDEX_M; + val |= (mask << GLQF_HMASK_MASK_S) & GLQF_HMASK_MASK_M; + break; + case ICE_BLK_FD: + offset = GLQF_FDMASK(mask_idx); + val = (idx << GLQF_FDMASK_MSK_INDEX_S) & GLQF_FDMASK_MSK_INDEX_M; + val |= (mask << GLQF_FDMASK_MASK_S) & GLQF_FDMASK_MASK_M; + break; + default: + ice_debug(hw, ICE_DBG_PKG, "No profile masks for block %d\n", + blk); + return; + } + + wr32(hw, offset, val); + ice_debug(hw, ICE_DBG_PKG, "write mask, blk %d (%d): %x = %x\n", + blk, idx, offset, val); +} + +/** + * ice_write_prof_mask_enable_res - write profile mask enable register + * @hw: pointer to the HW struct + * @blk: hardware block + * @prof_id: profile ID + * @enable_mask: enable mask + */ +static void +ice_write_prof_mask_enable_res(struct ice_hw *hw, enum ice_block blk, + u16 prof_id, u32 enable_mask) +{ + u32 offset; + + switch (blk) { + case ICE_BLK_RSS: + offset = GLQF_HMASK_SEL(prof_id); + break; + case ICE_BLK_FD: + offset = GLQF_FDMASK_SEL(prof_id); + break; + default: + ice_debug(hw, ICE_DBG_PKG, "No profile masks for block %d\n", + blk); + return; + } + + wr32(hw, offset, enable_mask); + ice_debug(hw, ICE_DBG_PKG, "write mask enable, blk %d (%d): %x = %x\n", + blk, prof_id, offset, enable_mask); +} + +/** + * ice_init_prof_masks - initial prof masks + * @hw: pointer to the HW struct + * @blk: hardware block + */ +static void ice_init_prof_masks(struct ice_hw *hw, enum ice_block blk) +{ + u16 per_pf; + u16 i; + + mutex_init(&hw->blk[blk].masks.lock); + + per_pf = ICE_PROF_MASK_COUNT / hw->dev_caps.num_funcs; + + hw->blk[blk].masks.count = per_pf; + hw->blk[blk].masks.first = hw->pf_id * per_pf; + + memset(hw->blk[blk].masks.masks, 0, sizeof(hw->blk[blk].masks.masks)); + + for (i = hw->blk[blk].masks.first; + i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++) + ice_write_prof_mask_reg(hw, blk, i, 0, 0); +} + +/** + * ice_init_all_prof_masks - initialize all prof masks + * @hw: pointer to the HW struct + */ +static void ice_init_all_prof_masks(struct ice_hw *hw) +{ + ice_init_prof_masks(hw, ICE_BLK_RSS); + ice_init_prof_masks(hw, ICE_BLK_FD); +} + +/** + * ice_alloc_prof_mask - allocate profile mask + * @hw: pointer to the HW struct + * @blk: hardware block + * @idx: index of FV which will use the mask + * @mask: the 16-bit mask + * @mask_idx: variable to receive the mask index + */ +static enum ice_status +ice_alloc_prof_mask(struct ice_hw *hw, enum ice_block blk, u16 idx, u16 mask, + u16 *mask_idx) +{ + bool found_unused = false, found_copy = false; + enum ice_status status = ICE_ERR_MAX_LIMIT; + u16 unused_idx = 0, copy_idx = 0; + u16 i; + + if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD) + return ICE_ERR_PARAM; + + mutex_lock(&hw->blk[blk].masks.lock); + + for (i = hw->blk[blk].masks.first; + i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++) + if (hw->blk[blk].masks.masks[i].in_use) { + /* if mask is in use and it exactly duplicates the + * desired mask and index, then in can be reused + */ + if (hw->blk[blk].masks.masks[i].mask == mask && + hw->blk[blk].masks.masks[i].idx == idx) { + found_copy = true; + copy_idx = i; + break; + } + } else { + /* save off unused index, but keep searching in case + * there is an exact match later on + */ + if (!found_unused) { + found_unused = true; + unused_idx = i; + } + } + + if (found_copy) + i = copy_idx; + else if (found_unused) + i = unused_idx; + else + goto err_ice_alloc_prof_mask; + + /* update mask for a new entry */ + if (found_unused) { + hw->blk[blk].masks.masks[i].in_use = true; + hw->blk[blk].masks.masks[i].mask = mask; + hw->blk[blk].masks.masks[i].idx = idx; + hw->blk[blk].masks.masks[i].ref = 0; + ice_write_prof_mask_reg(hw, blk, i, idx, mask); + } + + hw->blk[blk].masks.masks[i].ref++; + *mask_idx = i; + status = 0; + +err_ice_alloc_prof_mask: + mutex_unlock(&hw->blk[blk].masks.lock); + + return status; +} + +/** + * ice_free_prof_mask - free profile mask + * @hw: pointer to the HW struct + * @blk: hardware block + * @mask_idx: index of mask + */ +static enum ice_status +ice_free_prof_mask(struct ice_hw *hw, enum ice_block blk, u16 mask_idx) +{ + if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD) + return ICE_ERR_PARAM; + + if (!(mask_idx >= hw->blk[blk].masks.first && + mask_idx < hw->blk[blk].masks.first + hw->blk[blk].masks.count)) + return ICE_ERR_DOES_NOT_EXIST; + + mutex_lock(&hw->blk[blk].masks.lock); + + if (!hw->blk[blk].masks.masks[mask_idx].in_use) + goto exit_ice_free_prof_mask; + + if (hw->blk[blk].masks.masks[mask_idx].ref > 1) { + hw->blk[blk].masks.masks[mask_idx].ref--; + goto exit_ice_free_prof_mask; + } + + /* remove mask */ + hw->blk[blk].masks.masks[mask_idx].in_use = false; + hw->blk[blk].masks.masks[mask_idx].mask = 0; + hw->blk[blk].masks.masks[mask_idx].idx = 0; + + /* update mask as unused entry */ + ice_debug(hw, ICE_DBG_PKG, "Free mask, blk %d, mask %d\n", blk, + mask_idx); + ice_write_prof_mask_reg(hw, blk, mask_idx, 0, 0); + +exit_ice_free_prof_mask: + mutex_unlock(&hw->blk[blk].masks.lock); + + return 0; +} + +/** + * ice_free_prof_masks - free all profile masks for a profile + * @hw: pointer to the HW struct + * @blk: hardware block + * @prof_id: profile ID + */ +static enum ice_status +ice_free_prof_masks(struct ice_hw *hw, enum ice_block blk, u16 prof_id) +{ + u32 mask_bm; + u16 i; + + if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD) + return ICE_ERR_PARAM; + + mask_bm = hw->blk[blk].es.mask_ena[prof_id]; + for (i = 0; i < BITS_PER_BYTE * sizeof(mask_bm); i++) + if (mask_bm & BIT(i)) + ice_free_prof_mask(hw, blk, i); + + return 0; +} + +/** + * ice_shutdown_prof_masks - releases lock for masking + * @hw: pointer to the HW struct + * @blk: hardware block + * + * This should be called before unloading the driver + */ +static void ice_shutdown_prof_masks(struct ice_hw *hw, enum ice_block blk) +{ + u16 i; + + mutex_lock(&hw->blk[blk].masks.lock); + + for (i = hw->blk[blk].masks.first; + i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++) { + ice_write_prof_mask_reg(hw, blk, i, 0, 0); + + hw->blk[blk].masks.masks[i].in_use = false; + hw->blk[blk].masks.masks[i].idx = 0; + hw->blk[blk].masks.masks[i].mask = 0; + } + + mutex_unlock(&hw->blk[blk].masks.lock); + mutex_destroy(&hw->blk[blk].masks.lock); +} + +/** + * ice_shutdown_all_prof_masks - releases all locks for masking + * @hw: pointer to the HW struct + * + * This should be called before unloading the driver + */ +static void ice_shutdown_all_prof_masks(struct ice_hw *hw) +{ + ice_shutdown_prof_masks(hw, ICE_BLK_RSS); + ice_shutdown_prof_masks(hw, ICE_BLK_FD); +} + +/** + * ice_update_prof_masking - set registers according to masking + * @hw: pointer to the HW struct + * @blk: hardware block + * @prof_id: profile ID + * @masks: masks + */ +static enum ice_status +ice_update_prof_masking(struct ice_hw *hw, enum ice_block blk, u16 prof_id, + u16 *masks) +{ + bool err = false; + u32 ena_mask = 0; + u16 idx; + u16 i; + + /* Only support FD and RSS masking, otherwise nothing to be done */ + if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD) + return 0; + + for (i = 0; i < hw->blk[blk].es.fvw; i++) + if (masks[i] && masks[i] != 0xFFFF) { + if (!ice_alloc_prof_mask(hw, blk, i, masks[i], &idx)) { + ena_mask |= BIT(idx); + } else { + /* not enough bitmaps */ + err = true; + break; + } + } + + if (err) { + /* free any bitmaps we have allocated */ + for (i = 0; i < BITS_PER_BYTE * sizeof(ena_mask); i++) + if (ena_mask & BIT(i)) + ice_free_prof_mask(hw, blk, i); + + return ICE_ERR_OUT_OF_RANGE; + } + + /* enable the masks for this profile */ + ice_write_prof_mask_enable_res(hw, blk, prof_id, ena_mask); + + /* store enabled masks with profile so that they can be freed later */ + hw->blk[blk].es.mask_ena[prof_id] = ena_mask; + + return 0; +} + +/** * ice_write_es - write an extraction sequence to hardware * @hw: pointer to the HW struct * @blk: the block in which to write the extraction sequence @@ -2575,6 +2976,7 @@ ice_prof_dec_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id) if (hw->blk[blk].es.ref_count[prof_id] > 0) { if (!--hw->blk[blk].es.ref_count[prof_id]) { ice_write_es(hw, blk, prof_id, NULL); + ice_free_prof_masks(hw, blk, prof_id); return ice_free_prof_id(hw, blk, prof_id); } } @@ -2937,6 +3339,7 @@ void ice_free_hw_tbls(struct ice_hw *hw) devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.t); devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.ref_count); devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.written); + devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.mask_ena); } list_for_each_entry_safe(r, rt, &hw->rss_list_head, l_entry) { @@ -2944,6 +3347,7 @@ void ice_free_hw_tbls(struct ice_hw *hw) devm_kfree(ice_hw_to_dev(hw), r); } mutex_destroy(&hw->rss_locks); + ice_shutdown_all_prof_masks(hw); memset(hw->blk, 0, sizeof(hw->blk)); } @@ -2997,6 +3401,7 @@ void ice_clear_hw_tbls(struct ice_hw *hw) memset(es->t, 0, es->count * sizeof(*es->t) * es->fvw); memset(es->ref_count, 0, es->count * sizeof(*es->ref_count)); memset(es->written, 0, es->count * sizeof(*es->written)); + memset(es->mask_ena, 0, es->count * sizeof(*es->mask_ena)); } } @@ -3010,6 +3415,7 @@ enum ice_status ice_init_hw_tbls(struct ice_hw *hw) mutex_init(&hw->rss_locks); INIT_LIST_HEAD(&hw->rss_list_head); + ice_init_all_prof_masks(hw); for (i = 0; i < ICE_BLK_COUNT; i++) { struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir; struct ice_prof_tcam *prof = &hw->blk[i].prof; @@ -3112,6 +3518,11 @@ enum ice_status ice_init_hw_tbls(struct ice_hw *hw) sizeof(*es->written), GFP_KERNEL); if (!es->written) goto err; + + es->mask_ena = devm_kcalloc(ice_hw_to_dev(hw), es->count, + sizeof(*es->mask_ena), GFP_KERNEL); + if (!es->mask_ena) + goto err; } return 0; @@ -3711,22 +4122,79 @@ ice_update_fd_swap(struct ice_hw *hw, u16 prof_id, struct ice_fv_word *es) return 0; } +/* The entries here needs to match the order of enum ice_ptype_attrib */ +static const struct ice_ptype_attrib_info ice_ptype_attributes[] = { + { ICE_GTP_PDU_EH, ICE_GTP_PDU_FLAG_MASK }, + { ICE_GTP_SESSION, ICE_GTP_FLAGS_MASK }, + { ICE_GTP_DOWNLINK, ICE_GTP_FLAGS_MASK }, + { ICE_GTP_UPLINK, ICE_GTP_FLAGS_MASK }, +}; + +/** + * ice_get_ptype_attrib_info - get PTYPE attribute information + * @type: attribute type + * @info: pointer to variable to the attribute information + */ +static void +ice_get_ptype_attrib_info(enum ice_ptype_attrib_type type, + struct ice_ptype_attrib_info *info) +{ + *info = ice_ptype_attributes[type]; +} + +/** + * ice_add_prof_attrib - add any PTG with attributes to profile + * @prof: pointer to the profile to which PTG entries will be added + * @ptg: PTG to be added + * @ptype: PTYPE that needs to be looked up + * @attr: array of attributes that will be considered + * @attr_cnt: number of elements in the attribute array + */ +static enum ice_status +ice_add_prof_attrib(struct ice_prof_map *prof, u8 ptg, u16 ptype, + const struct ice_ptype_attributes *attr, u16 attr_cnt) +{ + bool found = false; + u16 i; + + for (i = 0; i < attr_cnt; i++) + if (attr[i].ptype == ptype) { + found = true; + + prof->ptg[prof->ptg_cnt] = ptg; + ice_get_ptype_attrib_info(attr[i].attrib, + &prof->attr[prof->ptg_cnt]); + + if (++prof->ptg_cnt >= ICE_MAX_PTG_PER_PROFILE) + return ICE_ERR_MAX_LIMIT; + } + + if (!found) + return ICE_ERR_DOES_NOT_EXIST; + + return 0; +} + /** * ice_add_prof - add profile * @hw: pointer to the HW struct * @blk: hardware block * @id: profile tracking ID * @ptypes: array of bitmaps indicating ptypes (ICE_FLOW_PTYPE_MAX bits) + * @attr: array of attributes + * @attr_cnt: number of elements in attr array * @es: extraction sequence (length of array is determined by the block) + * @masks: mask for extraction sequence * - * This function registers a profile, which matches a set of PTGs with a + * This function registers a profile, which matches a set of PTYPES with a * particular extraction sequence. While the hardware profile is allocated * it will not be written until the first call to ice_add_flow that specifies * the ID value used here. */ enum ice_status ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], - struct ice_fv_word *es) + const struct ice_ptype_attributes *attr, u16 attr_cnt, + struct ice_fv_word *es, u16 *masks) { u32 bytes = DIV_ROUND_UP(ICE_FLOW_PTYPE_MAX, BITS_PER_BYTE); DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT); @@ -3740,7 +4208,7 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], mutex_lock(&hw->blk[blk].es.prof_map_lock); /* search for existing profile */ - status = ice_find_prof_id(hw, blk, es, &prof_id); + status = ice_find_prof_id_with_mask(hw, blk, es, masks, &prof_id); if (status) { /* allocate profile ID */ status = ice_alloc_prof_id(hw, blk, &prof_id); @@ -3758,6 +4226,9 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], if (status) goto err_ice_add_prof; } + status = ice_update_prof_masking(hw, blk, prof_id, masks); + if (status) + goto err_ice_add_prof; /* and write new es */ ice_write_es(hw, blk, prof_id, es); @@ -3792,7 +4263,6 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], BITS_PER_BYTE) { u16 ptype; u8 ptg; - u8 m; ptype = byte * BITS_PER_BYTE + bit; @@ -3807,15 +4277,25 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], continue; set_bit(ptg, ptgs_used); - prof->ptg[prof->ptg_cnt] = ptg; - - if (++prof->ptg_cnt >= ICE_MAX_PTG_PER_PROFILE) + /* Check to see there are any attributes for + * this PTYPE, and add them if found. + */ + status = ice_add_prof_attrib(prof, ptg, ptype, + attr, attr_cnt); + if (status == ICE_ERR_MAX_LIMIT) break; + if (status) { + /* This is simple a PTYPE/PTG with no + * attribute + */ + prof->ptg[prof->ptg_cnt] = ptg; + prof->attr[prof->ptg_cnt].flags = 0; + prof->attr[prof->ptg_cnt].mask = 0; - /* nothing left in byte, then exit */ - m = ~(u8)((1 << (bit + 1)) - 1); - if (!(ptypes[byte] & m)) - break; + if (++prof->ptg_cnt >= + ICE_MAX_PTG_PER_PROFILE) + break; + } } bytes--; @@ -4326,7 +4806,12 @@ ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable, } /* for re-enabling, reallocate a TCAM */ - status = ice_alloc_tcam_ent(hw, blk, &tcam->tcam_idx); + /* for entries with empty attribute masks, allocate entry from + * the bottom of the TCAM table; otherwise, allocate from the + * top of the table in order to give it higher priority + */ + status = ice_alloc_tcam_ent(hw, blk, tcam->attr.mask == 0, + &tcam->tcam_idx); if (status) return status; @@ -4336,8 +4821,8 @@ ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable, return ICE_ERR_NO_MEMORY; status = ice_tcam_write_entry(hw, blk, tcam->tcam_idx, tcam->prof_id, - tcam->ptg, vsig, 0, 0, vl_msk, dc_msk, - nm_msk); + tcam->ptg, vsig, 0, tcam->attr.flags, + vl_msk, dc_msk, nm_msk); if (status) goto err_ice_prof_tcam_ena_dis; @@ -4485,7 +4970,12 @@ ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl, } /* allocate the TCAM entry index */ - status = ice_alloc_tcam_ent(hw, blk, &tcam_idx); + /* for entries with empty attribute masks, allocate entry from + * the bottom of the TCAM table; otherwise, allocate from the + * top of the table in order to give it higher priority + */ + status = ice_alloc_tcam_ent(hw, blk, map->attr[i].mask == 0, + &tcam_idx); if (status) { devm_kfree(ice_hw_to_dev(hw), p); goto err_ice_add_prof_id_vsig; @@ -4494,6 +4984,7 @@ ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl, t->tcam[i].ptg = map->ptg[i]; t->tcam[i].prof_id = map->prof_id; t->tcam[i].tcam_idx = tcam_idx; + t->tcam[i].attr = map->attr[i]; t->tcam[i].in_use = true; p->type = ICE_TCAM_ADD; diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h index 20deddb807c5..8a58e79729b9 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h @@ -27,7 +27,8 @@ int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, enum ice_status ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], - struct ice_fv_word *es); + const struct ice_ptype_attributes *attr, u16 attr_cnt, + struct ice_fv_word *es, u16 *masks); enum ice_status ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl); enum ice_status diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h index 24063c1351b2..7d8b517a63c9 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_type.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -109,6 +109,7 @@ struct ice_buf_hdr { (ent_sz)) /* ice package section IDs */ +#define ICE_SID_METADATA 1 #define ICE_SID_XLT0_SW 10 #define ICE_SID_XLT_KEY_BUILDER_SW 11 #define ICE_SID_XLT1_SW 12 @@ -117,6 +118,14 @@ struct ice_buf_hdr { #define ICE_SID_PROFID_REDIR_SW 15 #define ICE_SID_FLD_VEC_SW 16 #define ICE_SID_CDID_KEY_BUILDER_SW 17 + +struct ice_meta_sect { + struct ice_pkg_ver ver; +#define ICE_META_SECT_NAME_SIZE 28 + char name[ICE_META_SECT_NAME_SIZE]; + __le32 track_id; +}; + #define ICE_SID_CDID_REDIR_SW 18 #define ICE_SID_XLT0_ACL 20 @@ -190,6 +199,64 @@ enum ice_sect { ICE_SECT_COUNT }; +#define ICE_MAC_IPV4_GTPU_IPV4_FRAG 331 +#define ICE_MAC_IPV4_GTPU_IPV4_PAY 332 +#define ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY 333 +#define ICE_MAC_IPV4_GTPU_IPV4_TCP 334 +#define ICE_MAC_IPV4_GTPU_IPV4_ICMP 335 +#define ICE_MAC_IPV6_GTPU_IPV4_FRAG 336 +#define ICE_MAC_IPV6_GTPU_IPV4_PAY 337 +#define ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY 338 +#define ICE_MAC_IPV6_GTPU_IPV4_TCP 339 +#define ICE_MAC_IPV6_GTPU_IPV4_ICMP 340 +#define ICE_MAC_IPV4_GTPU_IPV6_FRAG 341 +#define ICE_MAC_IPV4_GTPU_IPV6_PAY 342 +#define ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY 343 +#define ICE_MAC_IPV4_GTPU_IPV6_TCP 344 +#define ICE_MAC_IPV4_GTPU_IPV6_ICMPV6 345 +#define ICE_MAC_IPV6_GTPU_IPV6_FRAG 346 +#define ICE_MAC_IPV6_GTPU_IPV6_PAY 347 +#define ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY 348 +#define ICE_MAC_IPV6_GTPU_IPV6_TCP 349 +#define ICE_MAC_IPV6_GTPU_IPV6_ICMPV6 350 + +/* Attributes that can modify PTYPE definitions. + * + * These values will represent special attributes for PTYPEs, which will + * resolve into metadata packet flags definitions that can be used in the TCAM + * for identifying a PTYPE with specific characteristics. + */ +enum ice_ptype_attrib_type { + /* GTP PTYPEs */ + ICE_PTYPE_ATTR_GTP_PDU_EH, + ICE_PTYPE_ATTR_GTP_SESSION, + ICE_PTYPE_ATTR_GTP_DOWNLINK, + ICE_PTYPE_ATTR_GTP_UPLINK, +}; + +struct ice_ptype_attrib_info { + u16 flags; + u16 mask; +}; + +/* TCAM flag definitions */ +#define ICE_GTP_PDU BIT(14) +#define ICE_GTP_PDU_LINK BIT(13) + +/* GTP attributes */ +#define ICE_GTP_PDU_FLAG_MASK (ICE_GTP_PDU) +#define ICE_GTP_PDU_EH ICE_GTP_PDU + +#define ICE_GTP_FLAGS_MASK (ICE_GTP_PDU | ICE_GTP_PDU_LINK) +#define ICE_GTP_SESSION 0 +#define ICE_GTP_DOWNLINK ICE_GTP_PDU +#define ICE_GTP_UPLINK (ICE_GTP_PDU | ICE_GTP_PDU_LINK) + +struct ice_ptype_attributes { + u16 ptype; + enum ice_ptype_attrib_type attrib; +}; + /* package labels */ struct ice_label { __le16 value; @@ -335,6 +402,7 @@ struct ice_es { u16 count; u16 fvw; u16 *ref_count; + u32 *mask_ena; struct list_head prof_map; struct ice_fv_word *t; struct mutex prof_map_lock; /* protect access to profiles list */ @@ -372,12 +440,14 @@ struct ice_prof_map { u8 prof_id; u8 ptg_cnt; u8 ptg[ICE_MAX_PTG_PER_PROFILE]; + struct ice_ptype_attrib_info attr[ICE_MAX_PTG_PER_PROFILE]; }; #define ICE_INVALID_TCAM 0xFFFF struct ice_tcam_inf { u16 tcam_idx; + struct ice_ptype_attrib_info attr; u8 ptg; u8 prof_id; u8 in_use; @@ -427,8 +497,8 @@ struct ice_xlt1 { #define ICE_PF_NUM_S 13 #define ICE_PF_NUM_M (0x07 << ICE_PF_NUM_S) #define ICE_VSIG_VALUE(vsig, pf_id) \ - (u16)((((u16)(vsig)) & ICE_VSIG_IDX_M) | \ - (((u16)(pf_id) << ICE_PF_NUM_S) & ICE_PF_NUM_M)) + ((u16)((((u16)(vsig)) & ICE_VSIG_IDX_M) | \ + (((u16)(pf_id) << ICE_PF_NUM_S) & ICE_PF_NUM_M))) #define ICE_DEFAULT_VSIG 0 /* XLT2 Table */ @@ -478,6 +548,21 @@ struct ice_prof_redir { u16 count; }; +struct ice_mask { + u16 mask; /* 16-bit mask */ + u16 idx; /* index */ + u16 ref; /* reference count */ + u8 in_use; /* non-zero if used */ +}; + +struct ice_masks { + struct mutex lock; /* lock to protect this structure */ + u16 first; /* first mask owned by the PF */ + u16 count; /* number of masks owned by the PF */ +#define ICE_PROF_MASK_COUNT 32 + struct ice_mask masks[ICE_PROF_MASK_COUNT]; +}; + /* Tables per block */ struct ice_blk_info { struct ice_xlt1 xlt1; @@ -485,6 +570,7 @@ struct ice_blk_info { struct ice_prof_tcam prof; struct ice_prof_redir prof_redir; struct ice_es es; + struct ice_masks masks; u8 overwrite; /* set to true to allow overwrite of table entries */ u8 is_list_init; }; @@ -513,6 +599,7 @@ struct ice_chs_chg { u16 vsig; u16 orig_vsig; u16 tcam_idx; + struct ice_ptype_attrib_info attr; }; #define ICE_FLOW_PTYPE_MAX ICE_XLT1_CNT diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c index 89a0cef20506..f160672448a0 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.c +++ b/drivers/net/ethernet/intel/ice/ice_flow.c @@ -9,18 +9,50 @@ struct ice_flow_field_info { enum ice_flow_seg_hdr hdr; s16 off; /* Offset from start of a protocol header, in bits */ u16 size; /* Size of fields in bits */ + u16 mask; /* 16-bit mask for field */ }; #define ICE_FLOW_FLD_INFO(_hdr, _offset_bytes, _size_bytes) { \ .hdr = _hdr, \ .off = (_offset_bytes) * BITS_PER_BYTE, \ .size = (_size_bytes) * BITS_PER_BYTE, \ + .mask = 0, \ +} + +#define ICE_FLOW_FLD_INFO_MSK(_hdr, _offset_bytes, _size_bytes, _mask) { \ + .hdr = _hdr, \ + .off = (_offset_bytes) * BITS_PER_BYTE, \ + .size = (_size_bytes) * BITS_PER_BYTE, \ + .mask = _mask, \ } /* Table containing properties of supported protocol header fields */ static const struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { + /* Ether */ + /* ICE_FLOW_FIELD_IDX_ETH_DA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, 0, ETH_ALEN), + /* ICE_FLOW_FIELD_IDX_ETH_SA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, ETH_ALEN, ETH_ALEN), + /* ICE_FLOW_FIELD_IDX_S_VLAN */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_VLAN, 12, sizeof(__be16)), + /* ICE_FLOW_FIELD_IDX_C_VLAN */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_VLAN, 14, sizeof(__be16)), + /* ICE_FLOW_FIELD_IDX_ETH_TYPE */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, 0, sizeof(__be16)), /* IPv4 / IPv6 */ + /* ICE_FLOW_FIELD_IDX_IPV4_DSCP */ + ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_IPV4, 0, 1, 0x00fc), + /* ICE_FLOW_FIELD_IDX_IPV6_DSCP */ + ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_IPV6, 0, 1, 0x0ff0), + /* ICE_FLOW_FIELD_IDX_IPV4_TTL */ + ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 8, 1, 0xff00), + /* ICE_FLOW_FIELD_IDX_IPV4_PROT */ + ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 8, 1, 0x00ff), + /* ICE_FLOW_FIELD_IDX_IPV6_TTL */ + ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 6, 1, 0x00ff), + /* ICE_FLOW_FIELD_IDX_IPV6_PROT */ + ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 6, 1, 0xff00), /* ICE_FLOW_FIELD_IDX_IPV4_SA */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 12, sizeof(struct in_addr)), /* ICE_FLOW_FIELD_IDX_IPV4_DA */ @@ -42,22 +74,112 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 0, sizeof(__be16)), /* ICE_FLOW_FIELD_IDX_SCTP_DST_PORT */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 2, sizeof(__be16)), + /* ICE_FLOW_FIELD_IDX_TCP_FLAGS */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 13, 1), + /* ARP */ + /* ICE_FLOW_FIELD_IDX_ARP_SIP */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 14, sizeof(struct in_addr)), + /* ICE_FLOW_FIELD_IDX_ARP_DIP */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 24, sizeof(struct in_addr)), + /* ICE_FLOW_FIELD_IDX_ARP_SHA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 8, ETH_ALEN), + /* ICE_FLOW_FIELD_IDX_ARP_DHA */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 18, ETH_ALEN), + /* ICE_FLOW_FIELD_IDX_ARP_OP */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 6, sizeof(__be16)), + /* ICMP */ + /* ICE_FLOW_FIELD_IDX_ICMP_TYPE */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ICMP, 0, 1), + /* ICE_FLOW_FIELD_IDX_ICMP_CODE */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ICMP, 1, 1), /* GRE */ /* ICE_FLOW_FIELD_IDX_GRE_KEYID */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GRE, 12, sizeof_field(struct gre_full_hdr, key)), + /* GTP */ + /* ICE_FLOW_FIELD_IDX_GTPC_TEID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPC_TEID, 12, sizeof(__be32)), + /* ICE_FLOW_FIELD_IDX_GTPU_IP_TEID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_IP, 12, sizeof(__be32)), + /* ICE_FLOW_FIELD_IDX_GTPU_EH_TEID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_EH, 12, sizeof(__be32)), + /* ICE_FLOW_FIELD_IDX_GTPU_EH_QFI */ + ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_GTPU_EH, 22, sizeof(__be16), + 0x3f00), + /* ICE_FLOW_FIELD_IDX_GTPU_UP_TEID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_UP, 12, sizeof(__be32)), + /* ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_DWN, 12, sizeof(__be32)), + /* PPPoE */ + /* ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_PPPOE, 2, sizeof(__be16)), + /* PFCP */ + /* ICE_FLOW_FIELD_IDX_PFCP_SEID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_PFCP_SESSION, 12, sizeof(__be64)), + /* L2TPv3 */ + /* ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_L2TPV3, 0, sizeof(__be32)), + /* ESP */ + /* ICE_FLOW_FIELD_IDX_ESP_SPI */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ESP, 0, sizeof(__be32)), + /* AH */ + /* ICE_FLOW_FIELD_IDX_AH_SPI */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_AH, 4, sizeof(__be32)), + /* NAT_T_ESP */ + /* ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_NAT_T_ESP, 8, sizeof(__be32)), }; /* Bitmaps indicating relevant packet types for a particular protocol header * - * Packet types for packets with an Outer/First/Single IPv4 header + * Packet types for packets with an Outer/First/Single MAC header + */ +static const u32 ice_ptypes_mac_ofos[] = { + 0xFDC00846, 0xBFBF7F7E, 0xF70001DF, 0xFEFDFDFB, + 0x0000077E, 0x00000000, 0x00000000, 0x00000000, + 0x00400000, 0x03FFF000, 0x7FFFFFE0, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with an Innermost/Last MAC VLAN header */ +static const u32 ice_ptypes_macvlan_il[] = { + 0x00000000, 0xBC000000, 0x000001DF, 0xF0000000, + 0x0000077E, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with an Outer/First/Single IPv4 header, does NOT + * include IPv4 other PTYPEs */ static const u32 ice_ptypes_ipv4_ofos[] = { 0x1DC00000, 0x04000800, 0x00000000, 0x00000000, + 0x00000000, 0x00000155, 0x00000000, 0x00000000, + 0x00000000, 0x000FC000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with an Outer/First/Single IPv4 header, includes + * IPv4 other PTYPEs + */ +static const u32 ice_ptypes_ipv4_ofos_all[] = { + 0x1DC00000, 0x04000800, 0x00000000, 0x00000000, + 0x00000000, 0x00000155, 0x00000000, 0x00000000, + 0x00000000, 0x000FC000, 0x83E0F800, 0x00000101, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -67,7 +189,7 @@ static const u32 ice_ptypes_ipv4_ofos[] = { static const u32 ice_ptypes_ipv4_il[] = { 0xE0000000, 0xB807700E, 0x80000003, 0xE01DC03B, 0x0000000E, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x001FF800, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -75,11 +197,27 @@ static const u32 ice_ptypes_ipv4_il[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, }; -/* Packet types for packets with an Outer/First/Single IPv6 header */ +/* Packet types for packets with an Outer/First/Single IPv6 header, does NOT + * include IPv6 other PTYPEs + */ static const u32 ice_ptypes_ipv6_ofos[] = { 0x00000000, 0x00000000, 0x77000000, 0x10002000, + 0x00000000, 0x000002AA, 0x00000000, 0x00000000, + 0x00000000, 0x03F00000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with an Outer/First/Single IPv6 header, includes + * IPv6 other PTYPEs + */ +static const u32 ice_ptypes_ipv6_ofos_all[] = { + 0x00000000, 0x00000000, 0x77000000, 0x10002000, + 0x00000000, 0x000002AA, 0x00000000, 0x00000000, + 0x00080F00, 0x03F00000, 0x7C1F0000, 0x00000206, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -91,7 +229,7 @@ static const u32 ice_ptypes_ipv6_ofos[] = { static const u32 ice_ptypes_ipv6_il[] = { 0x00000000, 0x03B80770, 0x000001DC, 0x0EE00000, 0x00000770, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x7FE00000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -100,7 +238,7 @@ static const u32 ice_ptypes_ipv6_il[] = { }; /* Packet types for packets with an Outer/First/Single IPv4 header - no L4 */ -static const u32 ice_ipv4_ofos_no_l4[] = { +static const u32 ice_ptypes_ipv4_ofos_no_l4[] = { 0x10C00000, 0x04000800, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -111,8 +249,20 @@ static const u32 ice_ipv4_ofos_no_l4[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, }; +/* Packet types for packets with an Outermost/First ARP header */ +static const u32 ice_ptypes_arp_of[] = { + 0x00000800, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + /* Packet types for packets with an Innermost/Last IPv4 header - no L4 */ -static const u32 ice_ipv4_il_no_l4[] = { +static const u32 ice_ptypes_ipv4_il_no_l4[] = { 0x60000000, 0x18043008, 0x80000002, 0x6010c021, 0x00000008, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -124,7 +274,7 @@ static const u32 ice_ipv4_il_no_l4[] = { }; /* Packet types for packets with an Outer/First/Single IPv6 header - no L4 */ -static const u32 ice_ipv6_ofos_no_l4[] = { +static const u32 ice_ptypes_ipv6_ofos_no_l4[] = { 0x00000000, 0x00000000, 0x43000000, 0x10002000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -136,7 +286,7 @@ static const u32 ice_ipv6_ofos_no_l4[] = { }; /* Packet types for packets with an Innermost/Last IPv6 header - no L4 */ -static const u32 ice_ipv6_il_no_l4[] = { +static const u32 ice_ptypes_ipv6_il_no_l4[] = { 0x00000000, 0x02180430, 0x0000010c, 0x086010c0, 0x00000430, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -153,7 +303,7 @@ static const u32 ice_ipv6_il_no_l4[] = { static const u32 ice_ptypes_udp_il[] = { 0x81000000, 0x20204040, 0x04000010, 0x80810102, 0x00000040, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00410000, 0x90842000, 0x00000007, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -165,7 +315,7 @@ static const u32 ice_ptypes_udp_il[] = { static const u32 ice_ptypes_tcp_il[] = { 0x04000000, 0x80810102, 0x10000040, 0x02040408, 0x00000102, 0x00000000, 0x00000000, 0x00000000, - 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00820000, 0x21084000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -177,6 +327,18 @@ static const u32 ice_ptypes_tcp_il[] = { static const u32 ice_ptypes_sctp_il[] = { 0x08000000, 0x01020204, 0x20000081, 0x04080810, 0x00000204, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x01040000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with an Outermost/First ICMP header */ +static const u32 ice_ptypes_icmp_of[] = { + 0x10000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -185,6 +347,18 @@ static const u32 ice_ptypes_sctp_il[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, }; +/* Packet types for packets with an Innermost/Last ICMP header */ +static const u32 ice_ptypes_icmp_il[] = { + 0x00000000, 0x02040408, 0x40000102, 0x08101020, + 0x00000408, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x42108000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + /* Packet types for packets with an Outermost/First GRE header */ static const u32 ice_ptypes_gre_of[] = { 0x00000000, 0xBFBF7800, 0x000001DF, 0xFEFDE000, @@ -197,6 +371,218 @@ static const u32 ice_ptypes_gre_of[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, }; +/* Packet types for packets with an Innermost/Last MAC header */ +static const u32 ice_ptypes_mac_il[] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for GTPC */ +static const u32 ice_ptypes_gtpc[] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000180, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for GTPC with TEID */ +static const u32 ice_ptypes_gtpc_tid[] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000060, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for GTPU */ +static const struct ice_ptype_attributes ice_attr_gtpu_eh[] = { + { ICE_MAC_IPV4_GTPU_IPV4_FRAG, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV4_GTPU_IPV4_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV4_GTPU_IPV4_TCP, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV4_GTPU_IPV4_ICMP, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV6_GTPU_IPV4_FRAG, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV6_GTPU_IPV4_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV6_GTPU_IPV4_TCP, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV6_GTPU_IPV4_ICMP, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV4_GTPU_IPV6_FRAG, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV4_GTPU_IPV6_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV4_GTPU_IPV6_TCP, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV4_GTPU_IPV6_ICMPV6, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV6_GTPU_IPV6_FRAG, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV6_GTPU_IPV6_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV6_GTPU_IPV6_TCP, ICE_PTYPE_ATTR_GTP_PDU_EH }, + { ICE_MAC_IPV6_GTPU_IPV6_ICMPV6, ICE_PTYPE_ATTR_GTP_PDU_EH }, +}; + +static const struct ice_ptype_attributes ice_attr_gtpu_down[] = { + { ICE_MAC_IPV4_GTPU_IPV4_FRAG, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV4_GTPU_IPV4_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV4_GTPU_IPV4_TCP, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV4_GTPU_IPV4_ICMP, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV6_GTPU_IPV4_FRAG, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV6_GTPU_IPV4_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV6_GTPU_IPV4_TCP, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV6_GTPU_IPV4_ICMP, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV4_GTPU_IPV6_FRAG, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV4_GTPU_IPV6_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV4_GTPU_IPV6_TCP, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV4_GTPU_IPV6_ICMPV6, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV6_GTPU_IPV6_FRAG, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV6_GTPU_IPV6_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV6_GTPU_IPV6_TCP, ICE_PTYPE_ATTR_GTP_DOWNLINK }, + { ICE_MAC_IPV6_GTPU_IPV6_ICMPV6, ICE_PTYPE_ATTR_GTP_DOWNLINK }, +}; + +static const struct ice_ptype_attributes ice_attr_gtpu_up[] = { + { ICE_MAC_IPV4_GTPU_IPV4_FRAG, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV4_GTPU_IPV4_PAY, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV4_GTPU_IPV4_TCP, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV4_GTPU_IPV4_ICMP, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV6_GTPU_IPV4_FRAG, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV6_GTPU_IPV4_PAY, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV6_GTPU_IPV4_TCP, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV6_GTPU_IPV4_ICMP, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV4_GTPU_IPV6_FRAG, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV4_GTPU_IPV6_PAY, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV4_GTPU_IPV6_TCP, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV4_GTPU_IPV6_ICMPV6, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV6_GTPU_IPV6_FRAG, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV6_GTPU_IPV6_PAY, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV6_GTPU_IPV6_TCP, ICE_PTYPE_ATTR_GTP_UPLINK }, + { ICE_MAC_IPV6_GTPU_IPV6_ICMPV6, ICE_PTYPE_ATTR_GTP_UPLINK }, +}; + +static const u32 ice_ptypes_gtpu[] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x7FFFFE00, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for PPPoE */ +static const u32 ice_ptypes_pppoe[] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x03ffe000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with PFCP NODE header */ +static const u32 ice_ptypes_pfcp_node[] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x80000000, 0x00000002, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with PFCP SESSION header */ +static const u32 ice_ptypes_pfcp_session[] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000005, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for L2TPv3 */ +static const u32 ice_ptypes_l2tpv3[] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000300, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for ESP */ +static const u32 ice_ptypes_esp[] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000003, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for AH */ +static const u32 ice_ptypes_ah[] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x0000000C, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +/* Packet types for packets with NAT_T ESP header */ +static const u32 ice_ptypes_nat_t_esp[] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000030, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + +static const u32 ice_ptypes_mac_non_ip_ofos[] = { + 0x00000846, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00400000, 0x03FFF000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + /* Manage parameters and info. used during the creation of a flow profile */ struct ice_flow_prof_params { enum ice_block blk; @@ -208,12 +594,30 @@ struct ice_flow_prof_params { * This will give us the direction flags. */ struct ice_fv_word es[ICE_MAX_FV_WORDS]; + /* attributes can be used to add attributes to a particular PTYPE */ + const struct ice_ptype_attributes *attr; + u16 attr_cnt; + + u16 mask[ICE_MAX_FV_WORDS]; DECLARE_BITMAP(ptypes, ICE_FLOW_PTYPE_MAX); }; +#define ICE_FLOW_RSS_HDRS_INNER_MASK \ + (ICE_FLOW_SEG_HDR_PPPOE | ICE_FLOW_SEG_HDR_GTPC | \ + ICE_FLOW_SEG_HDR_GTPC_TEID | ICE_FLOW_SEG_HDR_GTPU | \ + ICE_FLOW_SEG_HDR_PFCP_SESSION | ICE_FLOW_SEG_HDR_L2TPV3 | \ + ICE_FLOW_SEG_HDR_ESP | ICE_FLOW_SEG_HDR_AH | \ + ICE_FLOW_SEG_HDR_NAT_T_ESP) + +#define ICE_FLOW_SEG_HDRS_L2_MASK \ + (ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN) #define ICE_FLOW_SEG_HDRS_L3_MASK \ - (ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6) + (ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_ARP) #define ICE_FLOW_SEG_HDRS_L4_MASK \ + (ICE_FLOW_SEG_HDR_ICMP | ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | \ + ICE_FLOW_SEG_HDR_SCTP) +/* mask for L4 protocols that are NOT part of IPv4/6 OTHER PTYPE groups */ +#define ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER \ (ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP) /** @@ -243,8 +647,11 @@ ice_flow_val_hdrs(struct ice_flow_seg_info *segs, u8 segs_cnt) /* Sizes of fixed known protocol headers without header options */ #define ICE_FLOW_PROT_HDR_SZ_MAC 14 +#define ICE_FLOW_PROT_HDR_SZ_MAC_VLAN (ICE_FLOW_PROT_HDR_SZ_MAC + 2) #define ICE_FLOW_PROT_HDR_SZ_IPV4 20 #define ICE_FLOW_PROT_HDR_SZ_IPV6 40 +#define ICE_FLOW_PROT_HDR_SZ_ARP 28 +#define ICE_FLOW_PROT_HDR_SZ_ICMP 8 #define ICE_FLOW_PROT_HDR_SZ_TCP 20 #define ICE_FLOW_PROT_HDR_SZ_UDP 8 #define ICE_FLOW_PROT_HDR_SZ_SCTP 12 @@ -256,16 +663,27 @@ ice_flow_val_hdrs(struct ice_flow_seg_info *segs, u8 segs_cnt) */ static u16 ice_flow_calc_seg_sz(struct ice_flow_prof_params *params, u8 seg) { - u16 sz = ICE_FLOW_PROT_HDR_SZ_MAC; + u16 sz; + + /* L2 headers */ + sz = (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_VLAN) ? + ICE_FLOW_PROT_HDR_SZ_MAC_VLAN : ICE_FLOW_PROT_HDR_SZ_MAC; /* L3 headers */ if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4) sz += ICE_FLOW_PROT_HDR_SZ_IPV4; else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV6) sz += ICE_FLOW_PROT_HDR_SZ_IPV6; + else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_ARP) + sz += ICE_FLOW_PROT_HDR_SZ_ARP; + else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK) + /* An L3 header is required if L4 is specified */ + return 0; /* L4 headers */ - if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_TCP) + if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_ICMP) + sz += ICE_FLOW_PROT_HDR_SZ_ICMP; + else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_TCP) sz += ICE_FLOW_PROT_HDR_SZ_TCP; else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_UDP) sz += ICE_FLOW_PROT_HDR_SZ_UDP; @@ -298,10 +716,41 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) hdrs = prof->segs[i].hdrs; + if (hdrs & ICE_FLOW_SEG_HDR_ETH) { + src = !i ? (const unsigned long *)ice_ptypes_mac_ofos : + (const unsigned long *)ice_ptypes_mac_il; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } + + if (i && hdrs & ICE_FLOW_SEG_HDR_VLAN) { + src = (const unsigned long *)ice_ptypes_macvlan_il; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } + + if (!i && hdrs & ICE_FLOW_SEG_HDR_ARP) { + bitmap_and(params->ptypes, params->ptypes, + (const unsigned long *)ice_ptypes_arp_of, + ICE_FLOW_PTYPE_MAX); + } + if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) && - !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)) { - src = !i ? (const unsigned long *)ice_ipv4_ofos_no_l4 : - (const unsigned long *)ice_ipv4_il_no_l4; + (hdrs & ICE_FLOW_SEG_HDR_IPV_OTHER)) { + src = i ? (const unsigned long *)ice_ptypes_ipv4_il : + (const unsigned long *)ice_ptypes_ipv4_ofos_all; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) && + (hdrs & ICE_FLOW_SEG_HDR_IPV_OTHER)) { + src = i ? (const unsigned long *)ice_ptypes_ipv6_il : + (const unsigned long *)ice_ptypes_ipv6_ofos_all; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) && + !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) { + src = !i ? (const unsigned long *)ice_ptypes_ipv4_ofos_no_l4 : + (const unsigned long *)ice_ptypes_ipv4_il_no_l4; bitmap_and(params->ptypes, params->ptypes, src, ICE_FLOW_PTYPE_MAX); } else if (hdrs & ICE_FLOW_SEG_HDR_IPV4) { @@ -310,9 +759,9 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) bitmap_and(params->ptypes, params->ptypes, src, ICE_FLOW_PTYPE_MAX); } else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) && - !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)) { - src = !i ? (const unsigned long *)ice_ipv6_ofos_no_l4 : - (const unsigned long *)ice_ipv6_il_no_l4; + !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) { + src = !i ? (const unsigned long *)ice_ptypes_ipv6_ofos_no_l4 : + (const unsigned long *)ice_ptypes_ipv6_il_no_l4; bitmap_and(params->ptypes, params->ptypes, src, ICE_FLOW_PTYPE_MAX); } else if (hdrs & ICE_FLOW_SEG_HDR_IPV6) { @@ -322,6 +771,20 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) ICE_FLOW_PTYPE_MAX); } + if (hdrs & ICE_FLOW_SEG_HDR_ETH_NON_IP) { + src = (const unsigned long *)ice_ptypes_mac_non_ip_ofos; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_PPPOE) { + src = (const unsigned long *)ice_ptypes_pppoe; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else { + src = (const unsigned long *)ice_ptypes_pppoe; + bitmap_andnot(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } + if (hdrs & ICE_FLOW_SEG_HDR_UDP) { src = (const unsigned long *)ice_ptypes_udp_il; bitmap_and(params->ptypes, params->ptypes, src, @@ -334,12 +797,89 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) src = (const unsigned long *)ice_ptypes_sctp_il; bitmap_and(params->ptypes, params->ptypes, src, ICE_FLOW_PTYPE_MAX); + } + + if (hdrs & ICE_FLOW_SEG_HDR_ICMP) { + src = !i ? (const unsigned long *)ice_ptypes_icmp_of : + (const unsigned long *)ice_ptypes_icmp_il; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); } else if (hdrs & ICE_FLOW_SEG_HDR_GRE) { if (!i) { src = (const unsigned long *)ice_ptypes_gre_of; bitmap_and(params->ptypes, params->ptypes, src, ICE_FLOW_PTYPE_MAX); } + } else if (hdrs & ICE_FLOW_SEG_HDR_GTPC) { + src = (const unsigned long *)ice_ptypes_gtpc; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_GTPC_TEID) { + src = (const unsigned long *)ice_ptypes_gtpc_tid; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_DWN) { + src = (const unsigned long *)ice_ptypes_gtpu; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + + /* Attributes for GTP packet with downlink */ + params->attr = ice_attr_gtpu_down; + params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_down); + } else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_UP) { + src = (const unsigned long *)ice_ptypes_gtpu; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + + /* Attributes for GTP packet with uplink */ + params->attr = ice_attr_gtpu_up; + params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_up); + } else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_EH) { + src = (const unsigned long *)ice_ptypes_gtpu; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + + /* Attributes for GTP packet with Extension Header */ + params->attr = ice_attr_gtpu_eh; + params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_eh); + } else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_IP) { + src = (const unsigned long *)ice_ptypes_gtpu; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_L2TPV3) { + src = (const unsigned long *)ice_ptypes_l2tpv3; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_ESP) { + src = (const unsigned long *)ice_ptypes_esp; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_AH) { + src = (const unsigned long *)ice_ptypes_ah; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_NAT_T_ESP) { + src = (const unsigned long *)ice_ptypes_nat_t_esp; + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } + + if (hdrs & ICE_FLOW_SEG_HDR_PFCP) { + if (hdrs & ICE_FLOW_SEG_HDR_PFCP_NODE) + src = (const unsigned long *)ice_ptypes_pfcp_node; + else + src = (const unsigned long *)ice_ptypes_pfcp_session; + + bitmap_and(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + } else { + src = (const unsigned long *)ice_ptypes_pfcp_node; + bitmap_andnot(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); + + src = (const unsigned long *)ice_ptypes_pfcp_session; + bitmap_andnot(params->ptypes, params->ptypes, src, + ICE_FLOW_PTYPE_MAX); } } @@ -352,6 +892,7 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) * @params: information about the flow to be processed * @seg: packet segment index of the field to be extracted * @fld: ID of field to be extracted + * @match: bit field of all fields * * This function determines the protocol ID, offset, and size of the given * field. It then allocates one or more extraction sequence entries for the @@ -359,17 +900,73 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) */ static enum ice_status ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params, - u8 seg, enum ice_flow_field fld) + u8 seg, enum ice_flow_field fld, u64 match) { + enum ice_flow_field sib = ICE_FLOW_FIELD_IDX_MAX; enum ice_prot_id prot_id = ICE_PROT_ID_INVAL; u8 fv_words = hw->blk[params->blk].es.fvw; struct ice_flow_fld_info *flds; u16 cnt, ese_bits, i; + u16 sib_mask = 0; + u16 mask; u16 off; flds = params->prof->segs[seg].fields; switch (fld) { + case ICE_FLOW_FIELD_IDX_ETH_DA: + case ICE_FLOW_FIELD_IDX_ETH_SA: + case ICE_FLOW_FIELD_IDX_S_VLAN: + case ICE_FLOW_FIELD_IDX_C_VLAN: + prot_id = seg == 0 ? ICE_PROT_MAC_OF_OR_S : ICE_PROT_MAC_IL; + break; + case ICE_FLOW_FIELD_IDX_ETH_TYPE: + prot_id = seg == 0 ? ICE_PROT_ETYPE_OL : ICE_PROT_ETYPE_IL; + break; + case ICE_FLOW_FIELD_IDX_IPV4_DSCP: + prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL; + break; + case ICE_FLOW_FIELD_IDX_IPV6_DSCP: + prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL; + break; + case ICE_FLOW_FIELD_IDX_IPV4_TTL: + case ICE_FLOW_FIELD_IDX_IPV4_PROT: + prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL; + + /* TTL and PROT share the same extraction seq. entry. + * Each is considered a sibling to the other in terms of sharing + * the same extraction sequence entry. + */ + if (fld == ICE_FLOW_FIELD_IDX_IPV4_TTL) + sib = ICE_FLOW_FIELD_IDX_IPV4_PROT; + else if (fld == ICE_FLOW_FIELD_IDX_IPV4_PROT) + sib = ICE_FLOW_FIELD_IDX_IPV4_TTL; + + /* If the sibling field is also included, that field's + * mask needs to be included. + */ + if (match & BIT(sib)) + sib_mask = ice_flds_info[sib].mask; + break; + case ICE_FLOW_FIELD_IDX_IPV6_TTL: + case ICE_FLOW_FIELD_IDX_IPV6_PROT: + prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL; + + /* TTL and PROT share the same extraction seq. entry. + * Each is considered a sibling to the other in terms of sharing + * the same extraction sequence entry. + */ + if (fld == ICE_FLOW_FIELD_IDX_IPV6_TTL) + sib = ICE_FLOW_FIELD_IDX_IPV6_PROT; + else if (fld == ICE_FLOW_FIELD_IDX_IPV6_PROT) + sib = ICE_FLOW_FIELD_IDX_IPV6_TTL; + + /* If the sibling field is also included, that field's + * mask needs to be included. + */ + if (match & BIT(sib)) + sib_mask = ice_flds_info[sib].mask; + break; case ICE_FLOW_FIELD_IDX_IPV4_SA: case ICE_FLOW_FIELD_IDX_IPV4_DA: prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL; @@ -380,6 +977,7 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params, break; case ICE_FLOW_FIELD_IDX_TCP_SRC_PORT: case ICE_FLOW_FIELD_IDX_TCP_DST_PORT: + case ICE_FLOW_FIELD_IDX_TCP_FLAGS: prot_id = ICE_PROT_TCP_IL; break; case ICE_FLOW_FIELD_IDX_UDP_SRC_PORT: @@ -390,6 +988,49 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params, case ICE_FLOW_FIELD_IDX_SCTP_DST_PORT: prot_id = ICE_PROT_SCTP_IL; break; + case ICE_FLOW_FIELD_IDX_GTPC_TEID: + case ICE_FLOW_FIELD_IDX_GTPU_IP_TEID: + case ICE_FLOW_FIELD_IDX_GTPU_UP_TEID: + case ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID: + case ICE_FLOW_FIELD_IDX_GTPU_EH_TEID: + case ICE_FLOW_FIELD_IDX_GTPU_EH_QFI: + /* GTP is accessed through UDP OF protocol */ + prot_id = ICE_PROT_UDP_OF; + break; + case ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID: + prot_id = ICE_PROT_PPPOE; + break; + case ICE_FLOW_FIELD_IDX_PFCP_SEID: + prot_id = ICE_PROT_UDP_IL_OR_S; + break; + case ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID: + prot_id = ICE_PROT_L2TPV3; + break; + case ICE_FLOW_FIELD_IDX_ESP_SPI: + prot_id = ICE_PROT_ESP_F; + break; + case ICE_FLOW_FIELD_IDX_AH_SPI: + prot_id = ICE_PROT_ESP_2; + break; + case ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI: + prot_id = ICE_PROT_UDP_IL_OR_S; + break; + case ICE_FLOW_FIELD_IDX_ARP_SIP: + case ICE_FLOW_FIELD_IDX_ARP_DIP: + case ICE_FLOW_FIELD_IDX_ARP_SHA: + case ICE_FLOW_FIELD_IDX_ARP_DHA: + case ICE_FLOW_FIELD_IDX_ARP_OP: + prot_id = ICE_PROT_ARP_OF; + break; + case ICE_FLOW_FIELD_IDX_ICMP_TYPE: + case ICE_FLOW_FIELD_IDX_ICMP_CODE: + /* ICMP type and code share the same extraction seq. entry */ + prot_id = (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4) ? + ICE_PROT_ICMP_IL : ICE_PROT_ICMPV6_IL; + sib = fld == ICE_FLOW_FIELD_IDX_ICMP_TYPE ? + ICE_FLOW_FIELD_IDX_ICMP_CODE : + ICE_FLOW_FIELD_IDX_ICMP_TYPE; + break; case ICE_FLOW_FIELD_IDX_GRE_KEYID: prot_id = ICE_PROT_GRE_OF; break; @@ -407,6 +1048,7 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params, ICE_FLOW_FV_EXTRACT_SZ; flds[fld].xtrct.disp = (u8)(ice_flds_info[fld].off % ese_bits); flds[fld].xtrct.idx = params->es_cnt; + flds[fld].xtrct.mask = ice_flds_info[fld].mask; /* Adjust the next field-entry index after accommodating the number of * entries this field consumes @@ -416,24 +1058,34 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params, /* Fill in the extraction sequence entries needed for this field */ off = flds[fld].xtrct.off; + mask = flds[fld].xtrct.mask; for (i = 0; i < cnt; i++) { - u8 idx; - - /* Make sure the number of extraction sequence required - * does not exceed the block's capability + /* Only consume an extraction sequence entry if there is no + * sibling field associated with this field or the sibling entry + * already extracts the word shared with this field. */ - if (params->es_cnt >= fv_words) - return ICE_ERR_MAX_LIMIT; + if (sib == ICE_FLOW_FIELD_IDX_MAX || + flds[sib].xtrct.prot_id == ICE_PROT_ID_INVAL || + flds[sib].xtrct.off != off) { + u8 idx; - /* some blocks require a reversed field vector layout */ - if (hw->blk[params->blk].es.reverse) - idx = fv_words - params->es_cnt - 1; - else - idx = params->es_cnt; + /* Make sure the number of extraction sequence required + * does not exceed the block's capability + */ + if (params->es_cnt >= fv_words) + return ICE_ERR_MAX_LIMIT; - params->es[idx].prot_id = prot_id; - params->es[idx].off = off; - params->es_cnt++; + /* some blocks require a reversed field vector layout */ + if (hw->blk[params->blk].es.reverse) + idx = fv_words - params->es_cnt - 1; + else + idx = params->es_cnt; + + params->es[idx].prot_id = prot_id; + params->es[idx].off = off; + params->mask[idx] = mask | sib_mask; + params->es_cnt++; + } off += ICE_FLOW_FV_EXTRACT_SZ; } @@ -533,14 +1185,15 @@ ice_flow_create_xtrct_seq(struct ice_hw *hw, u8 i; for (i = 0; i < prof->segs_cnt; i++) { - u8 j; + u64 match = params->prof->segs[i].match; + enum ice_flow_field j; - for_each_set_bit(j, (unsigned long *)&prof->segs[i].match, + for_each_set_bit(j, (unsigned long *)&match, ICE_FLOW_FIELD_IDX_MAX) { - status = ice_flow_xtract_fld(hw, params, i, - (enum ice_flow_field)j); + status = ice_flow_xtract_fld(hw, params, i, j, match); if (status) return status; + clear_bit(j, (unsigned long *)&match); } /* Process raw matching bytes */ @@ -751,7 +1404,8 @@ ice_flow_add_prof_sync(struct ice_hw *hw, enum ice_block blk, /* Add a HW profile for this flow profile */ status = ice_add_prof(hw, blk, prof_id, (u8 *)params->ptypes, - params->es); + params->attr, params->attr_cnt, params->es, + params->mask); if (status) { ice_debug(hw, ICE_DBG_FLOW, "Error adding a HW flow profile\n"); goto out; @@ -1158,6 +1812,9 @@ ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len, seg->raws_cnt++; } +#define ICE_FLOW_RSS_SEG_HDR_L2_MASKS \ + (ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN) + #define ICE_FLOW_RSS_SEG_HDR_L3_MASKS \ (ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6) @@ -1165,7 +1822,8 @@ ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len, (ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP) #define ICE_FLOW_RSS_SEG_HDR_VAL_MASKS \ - (ICE_FLOW_RSS_SEG_HDR_L3_MASKS | \ + (ICE_FLOW_RSS_SEG_HDR_L2_MASKS | \ + ICE_FLOW_RSS_SEG_HDR_L3_MASKS | \ ICE_FLOW_RSS_SEG_HDR_L4_MASKS) /** @@ -1193,7 +1851,8 @@ ice_flow_set_rss_seg_info(struct ice_flow_seg_info *segs, u64 hash_fields, ICE_FLOW_SET_HDRS(segs, flow_hdr); - if (segs->hdrs & ~ICE_FLOW_RSS_SEG_HDR_VAL_MASKS) + if (segs->hdrs & ~ICE_FLOW_RSS_SEG_HDR_VAL_MASKS & + ~ICE_FLOW_RSS_HDRS_INNER_MASK & ~ICE_FLOW_SEG_HDR_IPV_OTHER) return ICE_ERR_PARAM; val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L3_MASKS); @@ -1349,9 +2008,9 @@ ice_add_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof) * [63] - Encapsulation flag, 0 if non-tunneled, 1 if tunneled */ #define ICE_FLOW_GEN_PROFID(hash, hdr, segs_cnt) \ - (u64)(((u64)(hash) & ICE_FLOW_PROF_HASH_M) | \ - (((u64)(hdr) << ICE_FLOW_PROF_HDR_S) & ICE_FLOW_PROF_HDR_M) | \ - ((u8)((segs_cnt) - 1) ? ICE_FLOW_PROF_ENCAP_M : 0)) + ((u64)(((u64)(hash) & ICE_FLOW_PROF_HASH_M) | \ + (((u64)(hdr) << ICE_FLOW_PROF_HDR_S) & ICE_FLOW_PROF_HDR_M) | \ + ((u8)((segs_cnt) - 1) ? ICE_FLOW_PROF_ENCAP_M : 0))) /** * ice_add_rss_cfg_sync - add an RSS configuration @@ -1490,6 +2149,94 @@ ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds, return status; } +/** + * ice_rem_rss_cfg_sync - remove an existing RSS configuration + * @hw: pointer to the hardware structure + * @vsi_handle: software VSI handle + * @hashed_flds: Packet hash types (ICE_FLOW_HASH_*) to remove + * @addl_hdrs: Protocol header fields within a packet segment + * @segs_cnt: packet segment count + * + * Assumption: lock has already been acquired for RSS list + */ +static enum ice_status +ice_rem_rss_cfg_sync(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds, + u32 addl_hdrs, u8 segs_cnt) +{ + const enum ice_block blk = ICE_BLK_RSS; + struct ice_flow_seg_info *segs; + struct ice_flow_prof *prof; + enum ice_status status; + + segs = kcalloc(segs_cnt, sizeof(*segs), GFP_KERNEL); + if (!segs) + return ICE_ERR_NO_MEMORY; + + /* Construct the packet segment info from the hashed fields */ + status = ice_flow_set_rss_seg_info(&segs[segs_cnt - 1], hashed_flds, + addl_hdrs); + if (status) + goto out; + + prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt, + vsi_handle, + ICE_FLOW_FIND_PROF_CHK_FLDS); + if (!prof) { + status = ICE_ERR_DOES_NOT_EXIST; + goto out; + } + + status = ice_flow_disassoc_prof(hw, blk, prof, vsi_handle); + if (status) + goto out; + + /* Remove RSS configuration from VSI context before deleting + * the flow profile. + */ + ice_rem_rss_list(hw, vsi_handle, prof); + + if (bitmap_empty(prof->vsis, ICE_MAX_VSI)) + status = ice_flow_rem_prof(hw, blk, prof->id); + +out: + kfree(segs); + return status; +} + +/** + * ice_rem_rss_cfg - remove an existing RSS config with matching hashed fields + * @hw: pointer to the hardware structure + * @vsi_handle: software VSI handle + * @hashed_flds: Packet hash types (ICE_FLOW_HASH_*) to remove + * @addl_hdrs: Protocol header fields within a packet segment + * + * This function will lookup the flow profile based on the input + * hash field bitmap, iterate through the profile entry list of + * that profile and find entry associated with input VSI to be + * removed. Calls are made to underlying flow s which will APIs + * turn build or update buffers for RSS XLT1 section. + */ +enum ice_status __maybe_unused +ice_rem_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds, + u32 addl_hdrs) +{ + enum ice_status status; + + if (hashed_flds == ICE_HASH_INVALID || + !ice_is_vsi_valid(hw, vsi_handle)) + return ICE_ERR_PARAM; + + mutex_lock(&hw->rss_locks); + status = ice_rem_rss_cfg_sync(hw, vsi_handle, hashed_flds, addl_hdrs, + ICE_RSS_OUTER_HEADERS); + if (!status) + status = ice_rem_rss_cfg_sync(hw, vsi_handle, hashed_flds, + addl_hdrs, ICE_RSS_INNER_HEADERS); + mutex_unlock(&hw->rss_locks); + + return status; +} + /* Mapping of AVF hash bit fields to an L3-L4 hash combination. * As the ice_flow_avf_hdr_field represent individual bit shifts in a hash, * convert its values to their appropriate flow L3, L4 values. diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h index 829f90b1e998..2a2d8c1536cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.h +++ b/drivers/net/ethernet/intel/ice/ice_flow.h @@ -8,6 +8,9 @@ #define ICE_FLOW_FLD_OFF_INVAL 0xffff /* Generate flow hash field from flow field type(s) */ +#define ICE_FLOW_HASH_ETH \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA) | \ + BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA)) #define ICE_FLOW_HASH_IPV4 \ (BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | \ BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)) @@ -30,6 +33,80 @@ #define ICE_HASH_UDP_IPV4 (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_UDP_PORT) #define ICE_HASH_UDP_IPV6 (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_UDP_PORT) +#define ICE_FLOW_HASH_GTP_TEID \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID)) + +#define ICE_FLOW_HASH_GTP_IPV4_TEID \ + (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_TEID) +#define ICE_FLOW_HASH_GTP_IPV6_TEID \ + (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_TEID) + +#define ICE_FLOW_HASH_GTP_U_TEID \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID)) + +#define ICE_FLOW_HASH_GTP_U_IPV4_TEID \ + (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_U_TEID) +#define ICE_FLOW_HASH_GTP_U_IPV6_TEID \ + (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_TEID) + +#define ICE_FLOW_HASH_GTP_U_EH_TEID \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_EH_TEID)) + +#define ICE_FLOW_HASH_GTP_U_EH_QFI \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_EH_QFI)) + +#define ICE_FLOW_HASH_GTP_U_IPV4_EH \ + (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_U_EH_TEID | \ + ICE_FLOW_HASH_GTP_U_EH_QFI) +#define ICE_FLOW_HASH_GTP_U_IPV6_EH \ + (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_EH_TEID | \ + ICE_FLOW_HASH_GTP_U_EH_QFI) + +#define ICE_FLOW_HASH_PPPOE_SESS_ID \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)) + +#define ICE_FLOW_HASH_PPPOE_SESS_ID_ETH \ + (ICE_FLOW_HASH_ETH | ICE_FLOW_HASH_PPPOE_SESS_ID) +#define ICE_FLOW_HASH_PPPOE_TCP_ID \ + (ICE_FLOW_HASH_TCP_PORT | ICE_FLOW_HASH_PPPOE_SESS_ID) +#define ICE_FLOW_HASH_PPPOE_UDP_ID \ + (ICE_FLOW_HASH_UDP_PORT | ICE_FLOW_HASH_PPPOE_SESS_ID) + +#define ICE_FLOW_HASH_PFCP_SEID \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)) +#define ICE_FLOW_HASH_PFCP_IPV4_SEID \ + (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_PFCP_SEID) +#define ICE_FLOW_HASH_PFCP_IPV6_SEID \ + (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_PFCP_SEID) + +#define ICE_FLOW_HASH_L2TPV3_SESS_ID \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID)) +#define ICE_FLOW_HASH_L2TPV3_IPV4_SESS_ID \ + (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_L2TPV3_SESS_ID) +#define ICE_FLOW_HASH_L2TPV3_IPV6_SESS_ID \ + (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_L2TPV3_SESS_ID) + +#define ICE_FLOW_HASH_ESP_SPI \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)) +#define ICE_FLOW_HASH_ESP_IPV4_SPI \ + (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_ESP_SPI) +#define ICE_FLOW_HASH_ESP_IPV6_SPI \ + (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_ESP_SPI) + +#define ICE_FLOW_HASH_AH_SPI \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)) +#define ICE_FLOW_HASH_AH_IPV4_SPI \ + (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_AH_SPI) +#define ICE_FLOW_HASH_AH_IPV6_SPI \ + (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_AH_SPI) + +#define ICE_FLOW_HASH_NAT_T_ESP_SPI \ + (BIT_ULL(ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI)) +#define ICE_FLOW_HASH_NAT_T_ESP_IPV4_SPI \ + (ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_NAT_T_ESP_SPI) +#define ICE_FLOW_HASH_NAT_T_ESP_IPV6_SPI \ + (ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_NAT_T_ESP_SPI) + /* Protocol header fields within a packet segment. A segment consists of one or * more protocol headers that make up a logical group of protocol headers. Each * logical group of protocol headers encapsulates or is encapsulated using/by @@ -38,16 +115,66 @@ */ enum ice_flow_seg_hdr { ICE_FLOW_SEG_HDR_NONE = 0x00000000, + ICE_FLOW_SEG_HDR_ETH = 0x00000001, + ICE_FLOW_SEG_HDR_VLAN = 0x00000002, ICE_FLOW_SEG_HDR_IPV4 = 0x00000004, ICE_FLOW_SEG_HDR_IPV6 = 0x00000008, + ICE_FLOW_SEG_HDR_ARP = 0x00000010, + ICE_FLOW_SEG_HDR_ICMP = 0x00000020, ICE_FLOW_SEG_HDR_TCP = 0x00000040, ICE_FLOW_SEG_HDR_UDP = 0x00000080, ICE_FLOW_SEG_HDR_SCTP = 0x00000100, ICE_FLOW_SEG_HDR_GRE = 0x00000200, + ICE_FLOW_SEG_HDR_GTPC = 0x00000400, + ICE_FLOW_SEG_HDR_GTPC_TEID = 0x00000800, + ICE_FLOW_SEG_HDR_GTPU_IP = 0x00001000, + ICE_FLOW_SEG_HDR_GTPU_EH = 0x00002000, + ICE_FLOW_SEG_HDR_GTPU_DWN = 0x00004000, + ICE_FLOW_SEG_HDR_GTPU_UP = 0x00008000, + ICE_FLOW_SEG_HDR_PPPOE = 0x00010000, + ICE_FLOW_SEG_HDR_PFCP_NODE = 0x00020000, + ICE_FLOW_SEG_HDR_PFCP_SESSION = 0x00040000, + ICE_FLOW_SEG_HDR_L2TPV3 = 0x00080000, + ICE_FLOW_SEG_HDR_ESP = 0x00100000, + ICE_FLOW_SEG_HDR_AH = 0x00200000, + ICE_FLOW_SEG_HDR_NAT_T_ESP = 0x00400000, + ICE_FLOW_SEG_HDR_ETH_NON_IP = 0x00800000, + /* The following is an additive bit for ICE_FLOW_SEG_HDR_IPV4 and + * ICE_FLOW_SEG_HDR_IPV6 which include the IPV4 other PTYPEs + */ + ICE_FLOW_SEG_HDR_IPV_OTHER = 0x20000000, }; +/* These segments all have the same PTYPES, but are otherwise distinguished by + * the value of the gtp_eh_pdu and gtp_eh_pdu_link flags: + * + * gtp_eh_pdu gtp_eh_pdu_link + * ICE_FLOW_SEG_HDR_GTPU_IP 0 0 + * ICE_FLOW_SEG_HDR_GTPU_EH 1 don't care + * ICE_FLOW_SEG_HDR_GTPU_DWN 1 0 + * ICE_FLOW_SEG_HDR_GTPU_UP 1 1 + */ +#define ICE_FLOW_SEG_HDR_GTPU (ICE_FLOW_SEG_HDR_GTPU_IP | \ + ICE_FLOW_SEG_HDR_GTPU_EH | \ + ICE_FLOW_SEG_HDR_GTPU_DWN | \ + ICE_FLOW_SEG_HDR_GTPU_UP) +#define ICE_FLOW_SEG_HDR_PFCP (ICE_FLOW_SEG_HDR_PFCP_NODE | \ + ICE_FLOW_SEG_HDR_PFCP_SESSION) + enum ice_flow_field { + /* L2 */ + ICE_FLOW_FIELD_IDX_ETH_DA, + ICE_FLOW_FIELD_IDX_ETH_SA, + ICE_FLOW_FIELD_IDX_S_VLAN, + ICE_FLOW_FIELD_IDX_C_VLAN, + ICE_FLOW_FIELD_IDX_ETH_TYPE, /* L3 */ + ICE_FLOW_FIELD_IDX_IPV4_DSCP, + ICE_FLOW_FIELD_IDX_IPV6_DSCP, + ICE_FLOW_FIELD_IDX_IPV4_TTL, + ICE_FLOW_FIELD_IDX_IPV4_PROT, + ICE_FLOW_FIELD_IDX_IPV6_TTL, + ICE_FLOW_FIELD_IDX_IPV6_PROT, ICE_FLOW_FIELD_IDX_IPV4_SA, ICE_FLOW_FIELD_IDX_IPV4_DA, ICE_FLOW_FIELD_IDX_IPV6_SA, @@ -59,9 +186,42 @@ enum ice_flow_field { ICE_FLOW_FIELD_IDX_UDP_DST_PORT, ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT, + ICE_FLOW_FIELD_IDX_TCP_FLAGS, + /* ARP */ + ICE_FLOW_FIELD_IDX_ARP_SIP, + ICE_FLOW_FIELD_IDX_ARP_DIP, + ICE_FLOW_FIELD_IDX_ARP_SHA, + ICE_FLOW_FIELD_IDX_ARP_DHA, + ICE_FLOW_FIELD_IDX_ARP_OP, + /* ICMP */ + ICE_FLOW_FIELD_IDX_ICMP_TYPE, + ICE_FLOW_FIELD_IDX_ICMP_CODE, /* GRE */ ICE_FLOW_FIELD_IDX_GRE_KEYID, - /* The total number of enums must not exceed 64 */ + /* GTPC_TEID */ + ICE_FLOW_FIELD_IDX_GTPC_TEID, + /* GTPU_IP */ + ICE_FLOW_FIELD_IDX_GTPU_IP_TEID, + /* GTPU_EH */ + ICE_FLOW_FIELD_IDX_GTPU_EH_TEID, + ICE_FLOW_FIELD_IDX_GTPU_EH_QFI, + /* GTPU_UP */ + ICE_FLOW_FIELD_IDX_GTPU_UP_TEID, + /* GTPU_DWN */ + ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID, + /* PPPoE */ + ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID, + /* PFCP */ + ICE_FLOW_FIELD_IDX_PFCP_SEID, + /* L2TPv3 */ + ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID, + /* ESP */ + ICE_FLOW_FIELD_IDX_ESP_SPI, + /* AH */ + ICE_FLOW_FIELD_IDX_AH_SPI, + /* NAT_T ESP */ + ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI, + /* The total number of enums must not exceed 64 */ ICE_FLOW_FIELD_IDX_MAX }; @@ -138,6 +298,7 @@ struct ice_flow_seg_xtrct { u16 off; /* Starting offset of the field in header in bytes */ u8 idx; /* Index of FV entry used */ u8 disp; /* Displacement of field in bits fr. FV entry's start */ + u16 mask; /* Mask for field */ }; enum ice_flow_fld_match_type { @@ -248,5 +409,8 @@ enum ice_status ice_rem_vsi_rss_cfg(struct ice_hw *hw, u16 vsi_handle); enum ice_status ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds, u32 addl_hdrs); +enum ice_status +ice_rem_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds, + u32 addl_hdrs); u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs); #endif /* _ICE_FLOW_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 093a1818a392..de38a0fc9665 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -130,6 +130,7 @@ #define GLINT_DYN_CTL_ITR_INDX_M ICE_M(0x3, 3) #define GLINT_DYN_CTL_INTERVAL_S 5 #define GLINT_DYN_CTL_INTERVAL_M ICE_M(0xFFF, 5) +#define GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(24) #define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25) #define GLINT_DYN_CTL_WB_ON_ITR_M BIT(30) #define GLINT_DYN_CTL_INTENA_MSK_M BIT(31) @@ -306,8 +307,23 @@ #define GLQF_FD_SIZE_FD_BSIZE_S 16 #define GLQF_FD_SIZE_FD_BSIZE_M ICE_M(0x7FFF, 16) #define GLQF_FDINSET(_i, _j) (0x00412000 + ((_i) * 4 + (_j) * 512)) +#define GLQF_FDMASK(_i) (0x00410800 + ((_i) * 4)) +#define GLQF_FDMASK_MAX_INDEX 31 +#define GLQF_FDMASK_MSK_INDEX_S 0 +#define GLQF_FDMASK_MSK_INDEX_M ICE_M(0x1F, 0) +#define GLQF_FDMASK_MASK_S 16 +#define GLQF_FDMASK_MASK_M ICE_M(0xFFFF, 16) #define GLQF_FDMASK_SEL(_i) (0x00410400 + ((_i) * 4)) #define GLQF_FDSWAP(_i, _j) (0x00413000 + ((_i) * 4 + (_j) * 512)) +#define GLQF_HMASK(_i) (0x0040FC00 + ((_i) * 4)) +#define GLQF_HMASK_MAX_INDEX 31 +#define GLQF_HMASK_MSK_INDEX_S 0 +#define GLQF_HMASK_MSK_INDEX_M ICE_M(0x1F, 0) +#define GLQF_HMASK_MASK_S 16 +#define GLQF_HMASK_MASK_M ICE_M(0xFFFF, 16) +#define GLQF_HMASK_SEL(_i) (0x00410000 + ((_i) * 4)) +#define GLQF_HMASK_SEL_MAX_INDEX 127 +#define GLQF_HMASK_SEL_MASK_SEL_S 0 #define PFQF_FD_ENA 0x0043A000 #define PFQF_FD_ENA_FD_ENA_M BIT(0) #define PFQF_FD_SIZE 0x00460100 @@ -369,6 +385,9 @@ #define VSIQF_FD_CNT(_VSI) (0x00464000 + ((_VSI) * 4)) #define VSIQF_FD_CNT_FD_GCNT_S 0 #define VSIQF_FD_CNT_FD_GCNT_M ICE_M(0x3FFF, 0) +#define VSIQF_FD_CNT_FD_BCNT_S 16 +#define VSIQF_FD_CNT_FD_BCNT_M ICE_M(0x3FFF, 16) +#define VSIQF_FD_SIZE(_VSI) (0x00462000 + ((_VSI) * 4)) #define VSIQF_HKEY_MAX_INDEX 12 #define VSIQF_HLUT_MAX_INDEX 15 #define PFPM_APM 0x000B8080 diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 4ec24c3e813f..21329ed3087e 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -55,6 +55,7 @@ struct ice_fltr_desc { #define ICE_FXD_FLTR_QW0_COMP_REPORT_M \ (0x3ULL << ICE_FXD_FLTR_QW0_COMP_REPORT_S) #define ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL 0x1ULL +#define ICE_FXD_FLTR_QW0_COMP_REPORT_SW 0x2ULL #define ICE_FXD_FLTR_QW0_FD_SPACE_S 14 #define ICE_FXD_FLTR_QW0_FD_SPACE_M (0x3ULL << ICE_FXD_FLTR_QW0_FD_SPACE_S) @@ -128,6 +129,7 @@ struct ice_fltr_desc { #define ICE_FXD_FLTR_QW1_FDID_PRI_S 25 #define ICE_FXD_FLTR_QW1_FDID_PRI_M (0x7ULL << ICE_FXD_FLTR_QW1_FDID_PRI_S) #define ICE_FXD_FLTR_QW1_FDID_PRI_ONE 0x1ULL +#define ICE_FXD_FLTR_QW1_FDID_PRI_THREE 0x3ULL #define ICE_FXD_FLTR_QW1_FDID_MDID_S 28 #define ICE_FXD_FLTR_QW1_FDID_MDID_M (0xFULL << ICE_FXD_FLTR_QW1_FDID_MDID_S) @@ -138,6 +140,26 @@ struct ice_fltr_desc { (0xFFFFFFFFULL << ICE_FXD_FLTR_QW1_FDID_S) #define ICE_FXD_FLTR_QW1_FDID_ZERO 0x0ULL +/* definition for FD filter programming status descriptor WB format */ +#define ICE_FXD_FLTR_WB_QW1_DD_S 0 +#define ICE_FXD_FLTR_WB_QW1_DD_M (0x1ULL << ICE_FXD_FLTR_WB_QW1_DD_S) +#define ICE_FXD_FLTR_WB_QW1_DD_YES 0x1ULL + +#define ICE_FXD_FLTR_WB_QW1_PROG_ID_S 1 +#define ICE_FXD_FLTR_WB_QW1_PROG_ID_M \ + (0x3ULL << ICE_FXD_FLTR_WB_QW1_PROG_ID_S) +#define ICE_FXD_FLTR_WB_QW1_PROG_ADD 0x0ULL +#define ICE_FXD_FLTR_WB_QW1_PROG_DEL 0x1ULL + +#define ICE_FXD_FLTR_WB_QW1_FAIL_S 4 +#define ICE_FXD_FLTR_WB_QW1_FAIL_M (0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_S) +#define ICE_FXD_FLTR_WB_QW1_FAIL_YES 0x1ULL + +#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S 5 +#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_M \ + (0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S) +#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES 0x1ULL + struct ice_rx_ptype_decoded { u32 ptype:10; u32 known:1; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 8d4e2ad4328d..82e2ce23df3d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -158,6 +158,8 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) if (vsi->type == ICE_VSI_VF) vsi->vf_id = vf_id; + else + vsi->vf_id = ICE_INVAL_VFID; switch (vsi->type) { case ICE_VSI_PF: @@ -343,6 +345,9 @@ static int ice_vsi_clear(struct ice_vsi *vsi) pf->vsi[vsi->idx] = NULL; if (vsi->idx < pf->next_vsi && vsi->type != ICE_VSI_CTRL) pf->next_vsi = vsi->idx; + if (vsi->idx < pf->next_vsi && vsi->type == ICE_VSI_CTRL && + vsi->vf_id != ICE_INVAL_VFID) + pf->next_vsi = vsi->idx; ice_vsi_free_arrays(vsi); mutex_unlock(&pf->sw_mutex); @@ -382,6 +387,8 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) if (!q_vector->tx.ring && !q_vector->rx.ring) return IRQ_HANDLED; + q_vector->total_events++; + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -419,7 +426,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) vsi->type = vsi_type; vsi->back = pf; - set_bit(__ICE_DOWN, vsi->state); + set_bit(ICE_VSI_DOWN, vsi->state); if (vsi_type == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf_id); @@ -454,8 +461,8 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) goto unlock_pf; } - if (vsi->type == ICE_VSI_CTRL) { - /* Use the last VSI slot as the index for the control VSI */ + if (vsi->type == ICE_VSI_CTRL && vf_id == ICE_INVAL_VFID) { + /* Use the last VSI slot as the index for PF control VSI */ vsi->idx = pf->num_alloc_vsi - 1; pf->ctrl_vsi_idx = vsi->idx; pf->vsi[vsi->idx] = vsi; @@ -468,6 +475,9 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi, pf->next_vsi); } + + if (vsi->type == ICE_VSI_CTRL && vf_id != ICE_INVAL_VFID) + pf->vf[vf_id].ctrl_vsi_idx = vsi->idx; goto unlock_pf; err_rings: @@ -506,7 +516,7 @@ static int ice_alloc_fd_res(struct ice_vsi *vsi) if (!b_val) return -EPERM; - if (vsi->type != ICE_VSI_PF) + if (!(vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF)) return -EPERM; if (!test_bit(ICE_FLAG_FD_ENA, pf->flags)) @@ -517,6 +527,13 @@ static int ice_alloc_fd_res(struct ice_vsi *vsi) /* each VSI gets same "best_effort" quota */ vsi->num_bfltr = b_val; + if (vsi->type == ICE_VSI_VF) { + vsi->num_gfltr = 0; + + /* each VSI gets same "best_effort" quota */ + vsi->num_bfltr = b_val; + } + return 0; } @@ -729,11 +746,10 @@ static void ice_set_dflt_vsi_ctx(struct ice_vsi_ctx *ctxt) */ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) { - u16 offset = 0, qmap = 0, tx_count = 0; + u16 offset = 0, qmap = 0, tx_count = 0, pow = 0; + u16 num_txq_per_tc, num_rxq_per_tc; u16 qcount_tx = vsi->alloc_txq; u16 qcount_rx = vsi->alloc_rxq; - u16 tx_numq_tc, rx_numq_tc; - u16 pow = 0, max_rss = 0; bool ena_tc0 = false; u8 netdev_tc = 0; int i; @@ -751,12 +767,15 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) vsi->tc_cfg.ena_tc |= 1; } - rx_numq_tc = qcount_rx / vsi->tc_cfg.numtc; - if (!rx_numq_tc) - rx_numq_tc = 1; - tx_numq_tc = qcount_tx / vsi->tc_cfg.numtc; - if (!tx_numq_tc) - tx_numq_tc = 1; + num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC); + if (!num_rxq_per_tc) + num_rxq_per_tc = 1; + num_txq_per_tc = qcount_tx / vsi->tc_cfg.numtc; + if (!num_txq_per_tc) + num_txq_per_tc = 1; + + /* find the (rounded up) power-of-2 of qcount */ + pow = (u16)order_base_2(num_rxq_per_tc); /* TC mapping is a function of the number of Rx queues assigned to the * VSI for each traffic class and the offset of these queues. @@ -769,26 +788,6 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) * * Setup number and offset of Rx queues for all TCs for the VSI */ - - qcount_rx = rx_numq_tc; - - /* qcount will change if RSS is enabled */ - if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) { - if (vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF) { - if (vsi->type == ICE_VSI_PF) - max_rss = ICE_MAX_LG_RSS_QS; - else - max_rss = ICE_MAX_RSS_QS_PER_VF; - qcount_rx = min_t(u16, rx_numq_tc, max_rss); - if (!vsi->req_rxq) - qcount_rx = min_t(u16, qcount_rx, - vsi->rss_size); - } - } - - /* find the (rounded up) power-of-2 of qcount */ - pow = (u16)order_base_2(qcount_rx); - ice_for_each_traffic_class(i) { if (!(vsi->tc_cfg.ena_tc & BIT(i))) { /* TC is not enabled */ @@ -802,16 +801,16 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) /* TC is enabled */ vsi->tc_cfg.tc_info[i].qoffset = offset; - vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx; - vsi->tc_cfg.tc_info[i].qcount_tx = tx_numq_tc; + vsi->tc_cfg.tc_info[i].qcount_rx = num_rxq_per_tc; + vsi->tc_cfg.tc_info[i].qcount_tx = num_txq_per_tc; vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++; qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & ICE_AQ_VSI_TC_Q_OFFSET_M) | ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M); - offset += qcount_rx; - tx_count += tx_numq_tc; + offset += num_rxq_per_tc; + tx_count += num_txq_per_tc; ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); } @@ -824,7 +823,7 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) if (offset) vsi->num_rxq = offset; else - vsi->num_rxq = qcount_rx; + vsi->num_rxq = num_rxq_per_tc; vsi->num_txq = tx_count; @@ -856,7 +855,8 @@ static void ice_set_fd_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) u8 dflt_q_group, dflt_q_prio; u16 dflt_q, report_q, val; - if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL) + if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL && + vsi->type != ICE_VSI_VF) return; val = ICE_AQ_VSI_PROP_FLOW_DIR_VALID; @@ -1179,7 +1179,24 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) num_q_vectors = vsi->num_q_vectors; /* reserve slots from OS requested IRQs */ - base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx); + if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { + struct ice_vf *vf; + int i; + + ice_for_each_vf(pf, i) { + vf = &pf->vf[i]; + if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) { + base = pf->vsi[vf->ctrl_vsi_idx]->base_vector; + break; + } + } + if (i == pf->num_alloc_vfs) + base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, + ICE_RES_VF_CTRL_VEC_ID); + } else { + base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, + vsi->idx); + } if (base < 0) { dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n", @@ -1296,14 +1313,13 @@ err_out: * LUT, while in the event of enable request for RSS, it will reconfigure RSS * LUT. */ -int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) +void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) { - int err = 0; u8 *lut; lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) - return -ENOMEM; + return; if (ena) { if (vsi->rss_lut_user) @@ -1313,9 +1329,8 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) vsi->rss_size); } - err = ice_set_rss(vsi, NULL, lut, vsi->rss_table_size); + ice_set_rss_lut(vsi, lut, vsi->rss_table_size); kfree(lut); - return err; } /** @@ -1324,12 +1339,10 @@ int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) */ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) { - struct ice_aqc_get_set_rss_keys *key; struct ice_pf *pf = vsi->back; - enum ice_status status; struct device *dev; - int err = 0; - u8 *lut; + u8 *lut, *key; + int err; dev = ice_pf_to_dev(pf); vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq); @@ -1343,37 +1356,26 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) else ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); - status = ice_aq_set_rss_lut(&pf->hw, vsi->idx, vsi->rss_lut_type, lut, - vsi->rss_table_size); - - if (status) { - dev_err(dev, "set_rss_lut failed, error %s\n", - ice_stat_str(status)); - err = -EIO; + err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size); + if (err) { + dev_err(dev, "set_rss_lut failed, error %d\n", err); goto ice_vsi_cfg_rss_exit; } - key = kzalloc(sizeof(*key), GFP_KERNEL); + key = kzalloc(ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE, GFP_KERNEL); if (!key) { err = -ENOMEM; goto ice_vsi_cfg_rss_exit; } if (vsi->rss_hkey_user) - memcpy(key, - (struct ice_aqc_get_set_rss_keys *)vsi->rss_hkey_user, - ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); + memcpy(key, vsi->rss_hkey_user, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); else - netdev_rss_key_fill((void *)key, - ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); + netdev_rss_key_fill((void *)key, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE); - status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key); - - if (status) { - dev_err(dev, "set_rss_key failed, error %s\n", - ice_stat_str(status)); - err = -EIO; - } + err = ice_set_rss_key(vsi, key); + if (err) + dev_err(dev, "set_rss_key failed, error %d\n", err); kfree(key); ice_vsi_cfg_rss_exit: @@ -1502,13 +1504,13 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi) */ bool ice_pf_state_is_nominal(struct ice_pf *pf) { - DECLARE_BITMAP(check_bits, __ICE_STATE_NBITS) = { 0 }; + DECLARE_BITMAP(check_bits, ICE_STATE_NBITS) = { 0 }; if (!pf) return false; - bitmap_set(check_bits, 0, __ICE_STATE_NOMINAL_CHECK_BITS); - if (bitmap_intersects(pf->state, check_bits, __ICE_STATE_NBITS)) + bitmap_set(check_bits, 0, ICE_STATE_NOMINAL_CHECK_BITS); + if (bitmap_intersects(pf->state, check_bits, ICE_STATE_NBITS)) return false; return true; @@ -1773,7 +1775,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) * This function converts a decimal interrupt rate limit in usecs to the format * expected by firmware. */ -u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) +static u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) { u32 val = intrl / gran; @@ -1783,6 +1785,51 @@ u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran) } /** + * ice_write_intrl - write throttle rate limit to interrupt specific register + * @q_vector: pointer to interrupt specific structure + * @intrl: throttle rate limit in microseconds to write + */ +void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl) +{ + struct ice_hw *hw = &q_vector->vsi->back->hw; + + wr32(hw, GLINT_RATE(q_vector->reg_idx), + ice_intrl_usec_to_reg(intrl, ICE_INTRL_GRAN_ABOVE_25)); +} + +/** + * __ice_write_itr - write throttle rate to register + * @q_vector: pointer to interrupt data structure + * @rc: pointer to ring container + * @itr: throttle rate in microseconds to write + */ +static void __ice_write_itr(struct ice_q_vector *q_vector, + struct ice_ring_container *rc, u16 itr) +{ + struct ice_hw *hw = &q_vector->vsi->back->hw; + + wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx), + ITR_REG_ALIGN(itr) >> ICE_ITR_GRAN_S); +} + +/** + * ice_write_itr - write throttle rate to queue specific register + * @rc: pointer to ring container + * @itr: throttle rate in microseconds to write + */ +void ice_write_itr(struct ice_ring_container *rc, u16 itr) +{ + struct ice_q_vector *q_vector; + + if (!rc->ring) + return; + + q_vector = rc->ring->q_vector; + + __ice_write_itr(q_vector, rc, itr); +} + +/** * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW * @vsi: the VSI being configured * @@ -1802,9 +1849,6 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi) ice_cfg_itr(hw, q_vector); - wr32(hw, GLINT_RATE(reg_idx), - ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); - /* Both Transmit Queue Interrupt Cause Control register * and Receive Queue Interrupt Cause control register * expects MSIX_INDX field to be the vector index @@ -2308,7 +2352,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, struct ice_vsi *vsi; int ret, i; - if (vsi_type == ICE_VSI_VF) + if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL) vsi = ice_vsi_alloc(pf, vsi_type, vf_id); else vsi = ice_vsi_alloc(pf, vsi_type, ICE_INVAL_VFID); @@ -2323,7 +2367,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (vsi->type == ICE_VSI_PF) vsi->ethtype = ETH_P_PAUSE; - if (vsi->type == ICE_VSI_VF) + if (vsi->type == ICE_VSI_VF || vsi->type == ICE_VSI_CTRL) vsi->vf_id = vf_id; ice_alloc_fd_res(vsi); @@ -2492,11 +2536,10 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++) { struct ice_q_vector *q_vector = vsi->q_vectors[i]; - u16 reg_idx = q_vector->reg_idx; - wr32(hw, GLINT_ITR(ICE_IDX_ITR0, reg_idx), 0); - wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0); + ice_write_intrl(q_vector, 0); for (q = 0; q < q_vector->num_ring_tx; q++) { + ice_write_itr(&q_vector->tx, 0); wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0); if (ice_is_xdp_ena_vsi(vsi)) { u32 xdp_txq = txq + vsi->num_xdp_txq; @@ -2507,6 +2550,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) } for (q = 0; q < q_vector->num_ring_rx; q++) { + ice_write_itr(&q_vector->rx, 0); wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0); rxq++; } @@ -2593,7 +2637,7 @@ void ice_vsi_free_rx_rings(struct ice_vsi *vsi) */ void ice_vsi_close(struct ice_vsi *vsi) { - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) + if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) ice_down(vsi); ice_vsi_free_irq(vsi); @@ -2610,17 +2654,17 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked) { int err = 0; - if (!test_bit(__ICE_NEEDS_RESTART, vsi->state)) + if (!test_bit(ICE_VSI_NEEDS_RESTART, vsi->state)) return 0; - clear_bit(__ICE_NEEDS_RESTART, vsi->state); + clear_bit(ICE_VSI_NEEDS_RESTART, vsi->state); if (vsi->netdev && vsi->type == ICE_VSI_PF) { if (netif_running(vsi->netdev)) { if (!locked) rtnl_lock(); - err = ice_open(vsi->netdev); + err = ice_open_internal(vsi->netdev); if (!locked) rtnl_unlock(); @@ -2639,17 +2683,17 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked) */ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) { - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state)) return; - set_bit(__ICE_NEEDS_RESTART, vsi->state); + set_bit(ICE_VSI_NEEDS_RESTART, vsi->state); if (vsi->type == ICE_VSI_PF && vsi->netdev) { if (netif_running(vsi->netdev)) { if (!locked) rtnl_lock(); - ice_stop(vsi->netdev); + ice_vsi_close(vsi); if (!locked) rtnl_unlock(); @@ -2752,11 +2796,14 @@ int ice_vsi_release(struct ice_vsi *vsi) * PF that is running the work queue items currently. This is done to * avoid check_flush_dependency() warning on this wq */ - if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) { + if (vsi->netdev && !ice_is_reset_in_progress(pf->state) && + (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state))) { unregister_netdev(vsi->netdev); - ice_devlink_destroy_port(vsi); + clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); } + ice_devlink_destroy_port(vsi); + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) ice_rss_clean(vsi); @@ -2770,7 +2817,24 @@ int ice_vsi_release(struct ice_vsi *vsi) * many interrupts each VF needs. SR-IOV MSIX resources are also * cleared in the same manner. */ - if (vsi->type != ICE_VSI_VF) { + if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { + struct ice_vf *vf; + int i; + + ice_for_each_vf(pf, i) { + vf = &pf->vf[i]; + if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) + break; + } + if (i == pf->num_alloc_vfs) { + /* No other VFs left that have control VSI, reclaim SW + * interrupts back to the common pool + */ + ice_free_res(pf->irq_tracker, vsi->base_vector, + ICE_RES_VF_CTRL_VEC_ID); + pf->num_avail_sw_msix += vsi->num_q_vectors; + } + } else if (vsi->type != ICE_VSI_VF) { /* reclaim SW interrupts back to the common pool */ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); pf->num_avail_sw_msix += vsi->num_q_vectors; @@ -2794,10 +2858,16 @@ int ice_vsi_release(struct ice_vsi *vsi) ice_vsi_delete(vsi); ice_vsi_free_q_vectors(vsi); - /* make sure unregister_netdev() was called by checking __ICE_DOWN */ - if (vsi->netdev && test_bit(__ICE_DOWN, vsi->state)) { - free_netdev(vsi->netdev); - vsi->netdev = NULL; + if (vsi->netdev) { + if (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state)) { + unregister_netdev(vsi->netdev); + clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); + } + if (test_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state)) { + free_netdev(vsi->netdev); + vsi->netdev = NULL; + clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); + } } if (vsi->type == ICE_VSI_VF && @@ -2818,39 +2888,6 @@ int ice_vsi_release(struct ice_vsi *vsi) } /** - * ice_vsi_rebuild_update_coalesce - set coalesce for a q_vector - * @q_vector: pointer to q_vector which is being updated - * @coalesce: pointer to array of struct with stored coalesce - * - * Set coalesce param in q_vector and update these parameters in HW. - */ -static void -ice_vsi_rebuild_update_coalesce(struct ice_q_vector *q_vector, - struct ice_coalesce_stored *coalesce) -{ - struct ice_ring_container *rx_rc = &q_vector->rx; - struct ice_ring_container *tx_rc = &q_vector->tx; - struct ice_hw *hw = &q_vector->vsi->back->hw; - - tx_rc->itr_setting = coalesce->itr_tx; - rx_rc->itr_setting = coalesce->itr_rx; - - /* dynamic ITR values will be updated during Tx/Rx */ - if (!ITR_IS_DYNAMIC(tx_rc->itr_setting)) - wr32(hw, GLINT_ITR(tx_rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(tx_rc->itr_setting) >> - ICE_ITR_GRAN_S); - if (!ITR_IS_DYNAMIC(rx_rc->itr_setting)) - wr32(hw, GLINT_ITR(rx_rc->itr_idx, q_vector->reg_idx), - ITR_REG_ALIGN(rx_rc->itr_setting) >> - ICE_ITR_GRAN_S); - - q_vector->intrl = coalesce->intrl; - wr32(hw, GLINT_RATE(q_vector->reg_idx), - ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); -} - -/** * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors * @vsi: VSI connected with q_vectors * @coalesce: array of struct with stored coalesce @@ -2869,6 +2906,11 @@ ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi, coalesce[i].itr_tx = q_vector->tx.itr_setting; coalesce[i].itr_rx = q_vector->rx.itr_setting; coalesce[i].intrl = q_vector->intrl; + + if (i < vsi->num_txq) + coalesce[i].tx_valid = true; + if (i < vsi->num_rxq) + coalesce[i].rx_valid = true; } return vsi->num_q_vectors; @@ -2888,22 +2930,75 @@ static void ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, struct ice_coalesce_stored *coalesce, int size) { + struct ice_ring_container *rc; int i; if ((size && !coalesce) || !vsi) return; - for (i = 0; i < size && i < vsi->num_q_vectors; i++) - ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i], - &coalesce[i]); + /* There are a couple of cases that have to be handled here: + * 1. The case where the number of queue vectors stays the same, but + * the number of Tx or Rx rings changes (the first for loop) + * 2. The case where the number of queue vectors increased (the + * second for loop) + */ + for (i = 0; i < size && i < vsi->num_q_vectors; i++) { + /* There are 2 cases to handle here and they are the same for + * both Tx and Rx: + * if the entry was valid previously (coalesce[i].[tr]x_valid + * and the loop variable is less than the number of rings + * allocated, then write the previous values + * + * if the entry was not valid previously, but the number of + * rings is less than are allocated (this means the number of + * rings increased from previously), then write out the + * values in the first element + * + * Also, always write the ITR, even if in ITR_IS_DYNAMIC + * as there is no harm because the dynamic algorithm + * will just overwrite. + */ + if (i < vsi->alloc_rxq && coalesce[i].rx_valid) { + rc = &vsi->q_vectors[i]->rx; + rc->itr_setting = coalesce[i].itr_rx; + ice_write_itr(rc, rc->itr_setting); + } else if (i < vsi->alloc_rxq) { + rc = &vsi->q_vectors[i]->rx; + rc->itr_setting = coalesce[0].itr_rx; + ice_write_itr(rc, rc->itr_setting); + } + + if (i < vsi->alloc_txq && coalesce[i].tx_valid) { + rc = &vsi->q_vectors[i]->tx; + rc->itr_setting = coalesce[i].itr_tx; + ice_write_itr(rc, rc->itr_setting); + } else if (i < vsi->alloc_txq) { + rc = &vsi->q_vectors[i]->tx; + rc->itr_setting = coalesce[0].itr_tx; + ice_write_itr(rc, rc->itr_setting); + } - /* number of q_vectors increased, so assume coalesce settings were - * changed globally (i.e. ethtool -C eth0 instead of per-queue) and use - * the previous settings from q_vector 0 for all of the new q_vectors + vsi->q_vectors[i]->intrl = coalesce[i].intrl; + ice_write_intrl(vsi->q_vectors[i], coalesce[i].intrl); + } + + /* the number of queue vectors increased so write whatever is in + * the first element */ - for (; i < vsi->num_q_vectors; i++) - ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i], - &coalesce[0]); + for (; i < vsi->num_q_vectors; i++) { + /* transmit */ + rc = &vsi->q_vectors[i]->tx; + rc->itr_setting = coalesce[0].itr_tx; + ice_write_itr(rc, rc->itr_setting); + + /* receive */ + rc = &vsi->q_vectors[i]->rx; + rc->itr_setting = coalesce[0].itr_rx; + ice_write_itr(rc, rc->itr_setting); + + vsi->q_vectors[i]->intrl = coalesce[0].intrl; + ice_write_intrl(vsi->q_vectors[i], coalesce[0].intrl); + } } /** @@ -2919,6 +3014,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) struct ice_coalesce_stored *coalesce; int prev_num_q_vectors = 0; struct ice_vf *vf = NULL; + enum ice_vsi_type vtype; enum ice_status status; struct ice_pf *pf; int ret, i; @@ -2927,14 +3023,17 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) return -EINVAL; pf = vsi->back; - if (vsi->type == ICE_VSI_VF) + vtype = vsi->type; + if (vtype == ICE_VSI_VF) vf = &pf->vf[vsi->vf_id]; coalesce = kcalloc(vsi->num_q_vectors, sizeof(struct ice_coalesce_stored), GFP_KERNEL); - if (coalesce) - prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, - coalesce); + if (!coalesce) + return -ENOMEM; + + prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); + ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ice_vsi_free_q_vectors(vsi); @@ -2943,7 +3042,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) * many interrupts each VF needs. SR-IOV MSIX resources are also * cleared in the same manner. */ - if (vsi->type != ICE_VSI_VF) { + if (vtype != ICE_VSI_VF) { /* reclaim SW interrupts back to the common pool */ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); pf->num_avail_sw_msix += vsi->num_q_vectors; @@ -2958,7 +3057,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) ice_vsi_put_qs(vsi); ice_vsi_clear_rings(vsi); ice_vsi_free_arrays(vsi); - if (vsi->type == ICE_VSI_VF) + if (vtype == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf->vf_id); else ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); @@ -2977,7 +3076,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) if (ret < 0) goto err_vsi; - switch (vsi->type) { + switch (vtype) { case ICE_VSI_CTRL: case ICE_VSI_PF: ret = ice_vsi_alloc_q_vectors(vsi); @@ -3004,7 +3103,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) goto err_vectors; } /* ICE_VSI_CTRL does not need RSS so skip RSS processing */ - if (vsi->type != ICE_VSI_CTRL) + if (vtype != ICE_VSI_CTRL) /* Do not exit if configuring RSS had an issue, at * least receive traffic on first queue. Hence no * need to capture return value @@ -3066,7 +3165,7 @@ err_rings: } err_vsi: ice_vsi_clear(vsi); - set_bit(__ICE_RESET_FAILED, pf->state); + set_bit(ICE_RESET_FAILED, pf->state); kfree(coalesce); return ret; } @@ -3077,11 +3176,10 @@ err_vsi: */ bool ice_is_reset_in_progress(unsigned long *state) { - return test_bit(__ICE_RESET_OICR_RECV, state) || - test_bit(__ICE_DCBNL_DEVRESET, state) || - test_bit(__ICE_PFR_REQ, state) || - test_bit(__ICE_CORER_REQ, state) || - test_bit(__ICE_GLOBR_REQ, state); + return test_bit(ICE_RESET_OICR_RECV, state) || + test_bit(ICE_PFR_REQ, state) || + test_bit(ICE_CORER_REQ, state) || + test_bit(ICE_GLOBR_REQ, state); } #ifdef CONFIG_DCB @@ -3169,20 +3267,15 @@ out: /** * ice_update_ring_stats - Update ring statistics * @ring: ring to update - * @cont: used to increment per-vector counters * @pkts: number of processed packets * @bytes: number of processed bytes * * This function assumes that caller has acquired a u64_stats_sync lock. */ -static void -ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont, - u64 pkts, u64 bytes) +static void ice_update_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes) { ring->stats.bytes += bytes; ring->stats.pkts += pkts; - cont->total_bytes += bytes; - cont->total_pkts += pkts; } /** @@ -3194,7 +3287,7 @@ ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont, void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&tx_ring->syncp); - ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes); + ice_update_ring_stats(tx_ring, pkts, bytes); u64_stats_update_end(&tx_ring->syncp); } @@ -3207,7 +3300,7 @@ void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes) void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&rx_ring->syncp); - ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes); + ice_update_ring_stats(rx_ring, pkts, bytes); u64_stats_update_end(&rx_ring->syncp); } @@ -3349,3 +3442,40 @@ int ice_clear_dflt_vsi(struct ice_sw *sw) return 0; } + +/** + * ice_set_link - turn on/off physical link + * @vsi: VSI to modify physical link on + * @ena: turn on/off physical link + */ +int ice_set_link(struct ice_vsi *vsi, bool ena) +{ + struct device *dev = ice_pf_to_dev(vsi->back); + struct ice_port_info *pi = vsi->port_info; + struct ice_hw *hw = pi->hw; + enum ice_status status; + + if (vsi->type != ICE_VSI_PF) + return -EINVAL; + + status = ice_aq_set_link_restart_an(pi, ena, NULL); + + /* if link is owned by manageability, FW will return ICE_AQ_RC_EMODE. + * this is not a fatal error, so print a warning message and return + * a success code. Return an error if FW returns an error code other + * than ICE_AQ_RC_EMODE + */ + if (status == ICE_ERR_AQ_ERROR) { + if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE) + dev_warn(dev, "can't set link to %s, err %s aq_err %s. not fatal, continuing\n", + (ena ? "ON" : "OFF"), ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + } else if (status) { + dev_err(dev, "can't set link to %s, err %s aq_err %s\n", + (ena ? "ON" : "OFF"), ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; + } + + return 0; +} diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 3da17895a2b1..511c2316c40c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -45,6 +45,8 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc); void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); +int ice_set_link(struct ice_vsi *vsi, bool ena); + #ifdef CONFIG_DCB int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc); #endif /* CONFIG_DCB */ @@ -83,7 +85,7 @@ void ice_vsi_free_rx_rings(struct ice_vsi *vsi); void ice_vsi_free_tx_rings(struct ice_vsi *vsi); -int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); +void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes); @@ -93,7 +95,8 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi); int ice_status_to_errno(enum ice_status err); -u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran); +void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl); +void ice_write_itr(struct ice_ring_container *rc, u16 itr); enum ice_status ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2c23c8f468a5..4ee85a217c6f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -84,7 +84,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) break; } - if (!vsi || test_bit(__ICE_DOWN, vsi->state)) + if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state)) return; if (!(vsi->netdev && netif_carrier_ok(vsi->netdev))) @@ -140,21 +140,10 @@ static int ice_init_mac_fltr(struct ice_pf *pf) perm_addr = vsi->port_info->mac.perm_addr; status = ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI); - if (!status) - return 0; - - /* We aren't useful with no MAC filters, so unregister if we - * had an error - */ - if (vsi->netdev->reg_state == NETREG_REGISTERED) { - dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %s. Unregistering device\n", - ice_stat_str(status)); - unregister_netdev(vsi->netdev); - free_netdev(vsi->netdev); - vsi->netdev = NULL; - } + if (status) + return -EIO; - return -EIO; + return 0; } /** @@ -209,9 +198,9 @@ static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr) */ static bool ice_vsi_fltr_changed(struct ice_vsi *vsi) { - return test_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags) || - test_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags) || - test_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) || + test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state) || + test_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); } /** @@ -268,7 +257,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) if (!vsi->netdev) return -EINVAL; - while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) + while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) usleep_range(1000, 2000); changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags; @@ -278,9 +267,9 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) INIT_LIST_HEAD(&vsi->tmp_unsync_list); if (ice_vsi_fltr_changed(vsi)) { - clear_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); - clear_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); - clear_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); + clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); + clear_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); /* grab the netdev's addr_list_lock */ netif_addr_lock_bh(netdev); @@ -318,7 +307,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) * space reserved for promiscuous filters. */ if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC && - !test_and_set_bit(__ICE_FLTR_OVERFLOW_PROMISC, + !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC, vsi->state)) { promisc_forced_on = true; netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n", @@ -361,8 +350,8 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) } if (((changed_flags & IFF_PROMISC) || promisc_forced_on) || - test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) { - clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); + test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) { + clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state); if (vsi->current_netdev_flags & IFF_PROMISC) { /* Apply Rx filter rule to get traffic from wire */ if (!ice_is_dflt_vsi_in_use(pf->first_sw)) { @@ -395,14 +384,14 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) goto exit; out_promisc: - set_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags); + set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state); goto exit; out: /* if something went wrong then set the changed flag so we try again */ - set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); - set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); + set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); exit: - clear_bit(__ICE_CFG_BUSY, vsi->state); + clear_bit(ICE_CFG_BUSY, vsi->state); return err; } @@ -447,7 +436,6 @@ static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked) for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++) pf->vf_agg_node[node].num_vsis = 0; - } /** @@ -463,7 +451,7 @@ ice_prepare_for_reset(struct ice_pf *pf) unsigned int i; /* already prepared for reset */ - if (test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) + if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; /* Notify VFs of impending reset */ @@ -484,7 +472,7 @@ ice_prepare_for_reset(struct ice_pf *pf) ice_shutdown_all_ctrlq(hw); - set_bit(__ICE_PREPARED_FOR_RESET, pf->state); + set_bit(ICE_PREPARED_FOR_RESET, pf->state); } /** @@ -505,12 +493,12 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) /* trigger the reset */ if (ice_reset(hw, reset_type)) { dev_err(dev, "reset %d failed\n", reset_type); - set_bit(__ICE_RESET_FAILED, pf->state); - clear_bit(__ICE_RESET_OICR_RECV, pf->state); - clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); - clear_bit(__ICE_PFR_REQ, pf->state); - clear_bit(__ICE_CORER_REQ, pf->state); - clear_bit(__ICE_GLOBR_REQ, pf->state); + set_bit(ICE_RESET_FAILED, pf->state); + clear_bit(ICE_RESET_OICR_RECV, pf->state); + clear_bit(ICE_PREPARED_FOR_RESET, pf->state); + clear_bit(ICE_PFR_REQ, pf->state); + clear_bit(ICE_CORER_REQ, pf->state); + clear_bit(ICE_GLOBR_REQ, pf->state); return; } @@ -521,8 +509,8 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) if (reset_type == ICE_RESET_PFR) { pf->pfr_count++; ice_rebuild(pf, reset_type); - clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); - clear_bit(__ICE_PFR_REQ, pf->state); + clear_bit(ICE_PREPARED_FOR_RESET, pf->state); + clear_bit(ICE_PFR_REQ, pf->state); ice_reset_all_vfs(pf, true); } } @@ -538,20 +526,20 @@ static void ice_reset_subtask(struct ice_pf *pf) /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an * OICR interrupt. The OICR handler (ice_misc_intr) determines what type * of reset is pending and sets bits in pf->state indicating the reset - * type and __ICE_RESET_OICR_RECV. So, if the latter bit is set + * type and ICE_RESET_OICR_RECV. So, if the latter bit is set * prepare for pending reset if not already (for PF software-initiated * global resets the software should already be prepared for it as - * indicated by __ICE_PREPARED_FOR_RESET; for global resets initiated + * indicated by ICE_PREPARED_FOR_RESET; for global resets initiated * by firmware or software on other PFs, that bit is not set so prepare * for the reset now), poll for reset done, rebuild and return. */ - if (test_bit(__ICE_RESET_OICR_RECV, pf->state)) { + if (test_bit(ICE_RESET_OICR_RECV, pf->state)) { /* Perform the largest reset requested */ - if (test_and_clear_bit(__ICE_CORER_RECV, pf->state)) + if (test_and_clear_bit(ICE_CORER_RECV, pf->state)) reset_type = ICE_RESET_CORER; - if (test_and_clear_bit(__ICE_GLOBR_RECV, pf->state)) + if (test_and_clear_bit(ICE_GLOBR_RECV, pf->state)) reset_type = ICE_RESET_GLOBR; - if (test_and_clear_bit(__ICE_EMPR_RECV, pf->state)) + if (test_and_clear_bit(ICE_EMPR_RECV, pf->state)) reset_type = ICE_RESET_EMPR; /* return if no valid reset type requested */ if (reset_type == ICE_RESET_INVAL) @@ -560,7 +548,7 @@ static void ice_reset_subtask(struct ice_pf *pf) /* make sure we are ready to rebuild */ if (ice_check_reset(&pf->hw)) { - set_bit(__ICE_RESET_FAILED, pf->state); + set_bit(ICE_RESET_FAILED, pf->state); } else { /* done with reset. start rebuild */ pf->hw.reset_ongoing = false; @@ -568,11 +556,11 @@ static void ice_reset_subtask(struct ice_pf *pf) /* clear bit to resume normal operations, but * ICE_NEEDS_RESTART bit is set in case rebuild failed */ - clear_bit(__ICE_RESET_OICR_RECV, pf->state); - clear_bit(__ICE_PREPARED_FOR_RESET, pf->state); - clear_bit(__ICE_PFR_REQ, pf->state); - clear_bit(__ICE_CORER_REQ, pf->state); - clear_bit(__ICE_GLOBR_REQ, pf->state); + clear_bit(ICE_RESET_OICR_RECV, pf->state); + clear_bit(ICE_PREPARED_FOR_RESET, pf->state); + clear_bit(ICE_PFR_REQ, pf->state); + clear_bit(ICE_CORER_REQ, pf->state); + clear_bit(ICE_GLOBR_REQ, pf->state); ice_reset_all_vfs(pf, true); } @@ -580,19 +568,19 @@ static void ice_reset_subtask(struct ice_pf *pf) } /* No pending resets to finish processing. Check for new resets */ - if (test_bit(__ICE_PFR_REQ, pf->state)) + if (test_bit(ICE_PFR_REQ, pf->state)) reset_type = ICE_RESET_PFR; - if (test_bit(__ICE_CORER_REQ, pf->state)) + if (test_bit(ICE_CORER_REQ, pf->state)) reset_type = ICE_RESET_CORER; - if (test_bit(__ICE_GLOBR_REQ, pf->state)) + if (test_bit(ICE_GLOBR_REQ, pf->state)) reset_type = ICE_RESET_GLOBR; /* If no valid reset type requested just return */ if (reset_type == ICE_RESET_INVAL) return; /* reset if not already down or busy */ - if (!test_bit(__ICE_DOWN, pf->state) && - !test_bit(__ICE_CFG_BUSY, pf->state)) { + if (!test_bit(ICE_DOWN, pf->state) && + !test_bit(ICE_CFG_BUSY, pf->state)) { ice_do_reset(pf, reset_type); } } @@ -609,7 +597,7 @@ static void ice_print_topo_conflict(struct ice_vsi *vsi) case ICE_AQ_LINK_TOPO_UNREACH_PRT: case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT: case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA: - netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n"); + netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n"); break; case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA: netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); @@ -731,7 +719,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) } status = ice_aq_get_phy_caps(vsi->port_info, false, - ICE_AQC_REPORT_SW_CFG, caps, NULL); + ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL); if (status) netdev_info(vsi->netdev, "Get phy capability failed.\n"); @@ -764,7 +752,7 @@ static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up) if (!vsi) return; - if (test_bit(__ICE_DOWN, vsi->state) || !vsi->netdev) + if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev) return; if (vsi->type == ICE_VSI_PF) { @@ -884,10 +872,10 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, { struct device *dev = ice_pf_to_dev(pf); struct ice_phy_info *phy_info; + enum ice_status status; struct ice_vsi *vsi; u16 old_link_speed; bool old_link; - int result; phy_info = &pi->phy; phy_info->link_info_old = phy_info->link_info; @@ -898,10 +886,11 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, /* update the link info structures and re-enable link events, * don't bail on failure due to other book keeping needed */ - result = ice_update_link_info(pi); - if (result) - dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n", - pi->lport); + status = ice_update_link_info(pi); + if (status) + dev_dbg(dev, "Failed to update link status on port %d, err %s aq_err %s\n", + pi->lport, ice_stat_str(status), + ice_aq_str(pi->hw->adminq.sq_last_status)); /* Check if the link state is up after updating link info, and treat * this event as an UP event since the link is actually UP now. @@ -917,18 +906,12 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) && !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) { set_bit(ICE_FLAG_NO_MEDIA, pf->flags); - - result = ice_aq_set_link_restart_an(pi, false, NULL); - if (result) { - dev_dbg(dev, "Failed to set link down, VSI %d error %d\n", - vsi->vsi_num, result); - return result; - } + ice_set_link(vsi, false); } /* if the old link up/down and speed is the same as the new */ if (link_up == old_link && link_speed == old_link_speed) - return result; + return 0; if (ice_is_dcb_active(pf)) { if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) @@ -942,7 +925,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, ice_vc_notify_link_state(pf); - return result; + return 0; } /** @@ -954,8 +937,8 @@ static void ice_watchdog_subtask(struct ice_pf *pf) int i; /* if interface is down do nothing */ - if (test_bit(__ICE_DOWN, pf->state) || - test_bit(__ICE_CFG_BUSY, pf->state)) + if (test_bit(ICE_DOWN, pf->state) || + test_bit(ICE_CFG_BUSY, pf->state)) return; /* make sure we don't do these things too often */ @@ -1044,7 +1027,7 @@ struct ice_aq_task { }; /** - * ice_wait_for_aq_event - Wait for an AdminQ event from firmware + * ice_aq_wait_for_event - Wait for an AdminQ event from firmware * @pf: pointer to the PF private structure * @opcode: the opcode to wait for * @timeout: how long to wait, in jiffies @@ -1199,7 +1182,7 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) u32 oldval, val; /* Do not clean control queue if/when PF reset fails */ - if (test_bit(__ICE_RESET_FAILED, pf->state)) + if (test_bit(ICE_RESET_FAILED, pf->state)) return 0; switch (q_type) { @@ -1210,6 +1193,10 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) case ICE_CTL_Q_MAILBOX: cq = &hw->mailboxq; qtype = "Mailbox"; + /* we are going to try to detect a malicious VF, so set the + * state to begin detection + */ + hw->mbx_snapshot.mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; break; default: dev_warn(dev, "Unknown control queue type 0x%x\n", q_type); @@ -1291,7 +1278,8 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) ice_vf_lan_overflow_event(pf, &event); break; case ice_mbx_opc_send_msg_to_pf: - ice_vc_process_vf_msg(pf, &event); + if (!ice_is_malicious_vf(pf, &event, i, pending)) + ice_vc_process_vf_msg(pf, &event); break; case ice_aqc_opc_fw_logging: ice_output_fw_log(hw, &event.desc, event.msg_buf); @@ -1334,13 +1322,13 @@ static void ice_clean_adminq_subtask(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - if (!test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) + if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state)) return; if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN)) return; - clear_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); + clear_bit(ICE_ADMINQ_EVENT_PENDING, pf->state); /* There might be a situation where new messages arrive to a control * queue between processing the last message and clearing the @@ -1361,13 +1349,13 @@ static void ice_clean_mailboxq_subtask(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - if (!test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state)) + if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state)) return; if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX)) return; - clear_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state); + clear_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state); if (ice_ctrlq_pending(hw, &hw->mailboxq)) __ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX); @@ -1383,9 +1371,9 @@ static void ice_clean_mailboxq_subtask(struct ice_pf *pf) */ void ice_service_task_schedule(struct ice_pf *pf) { - if (!test_bit(__ICE_SERVICE_DIS, pf->state) && - !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state) && - !test_bit(__ICE_NEEDS_RESTART, pf->state)) + if (!test_bit(ICE_SERVICE_DIS, pf->state) && + !test_and_set_bit(ICE_SERVICE_SCHED, pf->state) && + !test_bit(ICE_NEEDS_RESTART, pf->state)) queue_work(ice_wq, &pf->serv_task); } @@ -1395,32 +1383,32 @@ void ice_service_task_schedule(struct ice_pf *pf) */ static void ice_service_task_complete(struct ice_pf *pf) { - WARN_ON(!test_bit(__ICE_SERVICE_SCHED, pf->state)); + WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state)); /* force memory (pf->state) to sync before next service task */ smp_mb__before_atomic(); - clear_bit(__ICE_SERVICE_SCHED, pf->state); + clear_bit(ICE_SERVICE_SCHED, pf->state); } /** * ice_service_task_stop - stop service task and cancel works * @pf: board private structure * - * Return 0 if the __ICE_SERVICE_DIS bit was not already set, + * Return 0 if the ICE_SERVICE_DIS bit was not already set, * 1 otherwise. */ static int ice_service_task_stop(struct ice_pf *pf) { int ret; - ret = test_and_set_bit(__ICE_SERVICE_DIS, pf->state); + ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state); if (pf->serv_tmr.function) del_timer_sync(&pf->serv_tmr); if (pf->serv_task.func) cancel_work_sync(&pf->serv_task); - clear_bit(__ICE_SERVICE_SCHED, pf->state); + clear_bit(ICE_SERVICE_SCHED, pf->state); return ret; } @@ -1432,7 +1420,7 @@ static int ice_service_task_stop(struct ice_pf *pf) */ static void ice_service_task_restart(struct ice_pf *pf) { - clear_bit(__ICE_SERVICE_DIS, pf->state); + clear_bit(ICE_SERVICE_DIS, pf->state); ice_service_task_schedule(pf); } @@ -1465,7 +1453,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) unsigned int i; u32 reg; - if (!test_and_clear_bit(__ICE_MDD_EVENT_PENDING, pf->state)) { + if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) { /* Since the VF MDD event logging is rate limited, check if * there are pending MDD events. */ @@ -1557,7 +1545,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_PQM_VALID_M) { wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF); vf->mdd_tx_events.count++; - set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); + set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n", i); @@ -1567,7 +1555,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_TCLAN_VALID_M) { wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF); vf->mdd_tx_events.count++; - set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); + set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n", i); @@ -1577,7 +1565,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_TX_TDPU_VALID_M) { wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF); vf->mdd_tx_events.count++; - set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); + set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n", i); @@ -1587,7 +1575,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (reg & VP_MDET_RX_VALID_M) { wr32(hw, VP_MDET_RX(i), 0xFFFF); vf->mdd_rx_events.count++; - set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); + set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state); if (netif_msg_rx_err(pf)) dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n", i); @@ -1642,7 +1630,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) if (!pcaps) return -ENOMEM; - retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (retcode) { dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n", @@ -1702,7 +1690,7 @@ static int ice_init_nvm_phy_type(struct ice_port_info *pi) if (!pcaps) return -ENOMEM; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_NVM_CAP, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA, pcaps, NULL); if (status) { @@ -1748,15 +1736,18 @@ static void ice_init_link_dflt_override(struct ice_port_info *pi) * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings * @pi: port info structure * - * If default override is enabled, initialized the user PHY cfg speed and FEC + * If default override is enabled, initialize the user PHY cfg speed and FEC * settings using the default override mask from the NVM. * * The PHY should only be configured with the default override settings the - * first time media is available. The __ICE_LINK_DEFAULT_OVERRIDE_PENDING state + * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state * is used to indicate that the user PHY cfg default override is initialized * and the PHY has not been configured with the default override settings. The * state is set here, and cleared in ice_configure_phy the first time the PHY is * configured. + * + * This function should be called only if the FW doesn't support default + * configuration mode, as reported by ice_fw_supports_report_dflt_cfg. */ static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi) { @@ -1781,7 +1772,7 @@ static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi) cfg->link_fec_opt = ldo->fec_options; phy->curr_user_fec_req = ICE_FEC_AUTO; - set_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state); + set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state); } /** @@ -1804,22 +1795,21 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi) struct ice_phy_info *phy = &pi->phy; struct ice_pf *pf = pi->hw->back; enum ice_status status; - struct ice_vsi *vsi; int err = 0; if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) return -EIO; - vsi = ice_get_main_vsi(pf); - if (!vsi) - return -EINVAL; - pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); if (!pcaps) return -ENOMEM; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, - NULL); + if (ice_fw_supports_report_dflt_cfg(pi->hw)) + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG, + pcaps, NULL); + else + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, + pcaps, NULL); if (status) { dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n"); err = -EIO; @@ -1829,22 +1819,24 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi) ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg); /* check if lenient mode is supported and enabled */ - if (ice_fw_supports_link_override(&vsi->back->hw) && + if (ice_fw_supports_link_override(pi->hw) && !(pcaps->module_compliance_enforcement & ICE_AQC_MOD_ENFORCE_STRICT_MODE)) { set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags); - /* if link default override is enabled, initialize user PHY - * configuration with link default override values + /* if the FW supports default PHY configuration mode, then the driver + * does not have to apply link override settings. If not, + * initialize user PHY configuration with link override values */ - if (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN) { + if (!ice_fw_supports_report_dflt_cfg(pi->hw) && + (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) { ice_init_phy_cfg_dflt_override(pi); goto out; } } - /* if link default override is not enabled, initialize PHY using - * topology with media + /* if link default override is not enabled, set user flow control and + * FEC settings based on what get_phy_caps returned */ phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps, pcaps->link_fec_options); @@ -1852,7 +1844,7 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi) out: phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M; - set_bit(__ICE_PHY_INIT_COMPLETE, pf->state); + set_bit(ICE_PHY_INIT_COMPLETE, pf->state); err_out: kfree(pcaps); return err; @@ -1869,27 +1861,24 @@ err_out: static int ice_configure_phy(struct ice_vsi *vsi) { struct device *dev = ice_pf_to_dev(vsi->back); + struct ice_port_info *pi = vsi->port_info; struct ice_aqc_get_phy_caps_data *pcaps; struct ice_aqc_set_phy_cfg_data *cfg; - struct ice_port_info *pi; + struct ice_phy_info *phy = &pi->phy; + struct ice_pf *pf = vsi->back; enum ice_status status; int err = 0; - pi = vsi->port_info; - if (!pi) - return -EINVAL; - /* Ensure we have media as we cannot configure a medialess port */ - if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) + if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) return -EPERM; ice_print_topo_conflict(vsi); - if (vsi->port_info->phy.link_info.topo_media_conflict == - ICE_AQ_LINK_TOPO_UNSUPP_MEDIA) + if (phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA) return -EPERM; - if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) + if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) return ice_force_phys_link_state(vsi, true); pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); @@ -1897,7 +1886,7 @@ static int ice_configure_phy(struct ice_vsi *vsi) return -ENOMEM; /* Get current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (status) { dev_err(dev, "Failed to get PHY configuration, VSI %d error %s\n", @@ -1910,15 +1899,19 @@ static int ice_configure_phy(struct ice_vsi *vsi) * there's nothing to do */ if (pcaps->caps & ICE_AQC_PHY_EN_LINK && - ice_phy_caps_equals_cfg(pcaps, &pi->phy.curr_user_phy_cfg)) + ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg)) goto done; /* Use PHY topology as baseline for configuration */ memset(pcaps, 0, sizeof(*pcaps)); - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, - NULL); + if (ice_fw_supports_report_dflt_cfg(pi->hw)) + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG, + pcaps, NULL); + else + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, + pcaps, NULL); if (status) { - dev_err(dev, "Failed to get PHY topology, VSI %d error %s\n", + dev_err(dev, "Failed to get PHY caps, VSI %d error %s\n", vsi->vsi_num, ice_stat_str(status)); err = -EIO; goto done; @@ -1935,10 +1928,10 @@ static int ice_configure_phy(struct ice_vsi *vsi) /* Speed - If default override pending, use curr_user_phy_cfg set in * ice_init_phy_user_cfg_ldo. */ - if (test_and_clear_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING, + if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, vsi->back->state)) { - cfg->phy_type_low = pi->phy.curr_user_phy_cfg.phy_type_low; - cfg->phy_type_high = pi->phy.curr_user_phy_cfg.phy_type_high; + cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low; + cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high; } else { u64 phy_low = 0, phy_high = 0; @@ -1956,7 +1949,7 @@ static int ice_configure_phy(struct ice_vsi *vsi) } /* FEC */ - ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req); + ice_cfg_phy_fec(pi, cfg, phy->curr_user_fec_req); /* Can't provide what was requested; use PHY capabilities */ if (cfg->link_fec_opt != @@ -1968,12 +1961,12 @@ static int ice_configure_phy(struct ice_vsi *vsi) /* Flow Control - always supported; no need to check against * capabilities */ - ice_cfg_phy_fc(pi, cfg, pi->phy.curr_user_fc_req); + ice_cfg_phy_fc(pi, cfg, phy->curr_user_fc_req); /* Enable link and link update */ cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK; - status = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL); + status = ice_aq_set_phy_cfg(&pf->hw, pi, cfg, NULL); if (status) { dev_err(dev, "Failed to set phy config, VSI %d error %s\n", vsi->vsi_num, ice_stat_str(status)); @@ -2014,13 +2007,13 @@ static void ice_check_media_subtask(struct ice_pf *pf) return; if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { - if (!test_bit(__ICE_PHY_INIT_COMPLETE, pf->state)) + if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) ice_init_phy_user_cfg(pi); /* PHY settings are reset on media insertion, reconfigure * PHY to preserve settings. */ - if (test_bit(__ICE_DOWN, vsi->state) && + if (test_bit(ICE_VSI_DOWN, vsi->state) && test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) return; @@ -2050,8 +2043,8 @@ static void ice_service_task(struct work_struct *work) /* bail if a reset/recovery cycle is pending or rebuild failed */ if (ice_is_reset_in_progress(pf->state) || - test_bit(__ICE_SUSPENDED, pf->state) || - test_bit(__ICE_NEEDS_RESTART, pf->state)) { + test_bit(ICE_SUSPENDED, pf->state) || + test_bit(ICE_NEEDS_RESTART, pf->state)) { ice_service_task_complete(pf); return; } @@ -2071,7 +2064,9 @@ static void ice_service_task(struct work_struct *work) ice_process_vflr_event(pf); ice_clean_mailboxq_subtask(pf); ice_sync_arfs_fltrs(pf); - /* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */ + ice_flush_fdir_ctx(pf); + + /* Clear ICE_SERVICE_SCHED flag to allow scheduling next event */ ice_service_task_complete(pf); /* If the tasks have taken longer than one service timer period @@ -2079,10 +2074,11 @@ static void ice_service_task(struct work_struct *work) * schedule the service task now. */ if (time_after(jiffies, (start_time + pf->serv_tmr_period)) || - test_bit(__ICE_MDD_EVENT_PENDING, pf->state) || - test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) || - test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) || - test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state)) + test_bit(ICE_MDD_EVENT_PENDING, pf->state) || + test_bit(ICE_VFLR_EVENT_PENDING, pf->state) || + test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) || + test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) || + test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state)) mod_timer(&pf->serv_tmr, jiffies); } @@ -2112,7 +2108,7 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset) struct device *dev = ice_pf_to_dev(pf); /* bail out if earlier reset has failed */ - if (test_bit(__ICE_RESET_FAILED, pf->state)) { + if (test_bit(ICE_RESET_FAILED, pf->state)) { dev_dbg(dev, "earlier reset has failed\n"); return -EIO; } @@ -2124,13 +2120,13 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset) switch (reset) { case ICE_RESET_PFR: - set_bit(__ICE_PFR_REQ, pf->state); + set_bit(ICE_PFR_REQ, pf->state); break; case ICE_RESET_CORER: - set_bit(__ICE_CORER_REQ, pf->state); + set_bit(ICE_CORER_REQ, pf->state); break; case ICE_RESET_GLOBR: - set_bit(__ICE_GLOBR_REQ, pf->state); + set_bit(ICE_GLOBR_REQ, pf->state); break; default: return -EINVAL; @@ -2220,8 +2216,13 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) /* skip this unused q_vector */ continue; } - err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0, - q_vector->name, q_vector); + if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) + err = devm_request_irq(dev, irq_num, vsi->irq_handler, + IRQF_SHARED, q_vector->name, + q_vector); + else + err = devm_request_irq(dev, irq_num, vsi->irq_handler, + 0, q_vector->name, q_vector); if (err) { netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n", err); @@ -2524,7 +2525,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } /* need to stop netdev while setting up the program for Rx rings */ - if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) { + if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { ret = ice_down(vsi); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed"); @@ -2630,8 +2631,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) u32 oicr, ena_mask; dev = ice_pf_to_dev(pf); - set_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state); - set_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state); + set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state); + set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state); oicr = rd32(hw, PFINT_OICR); ena_mask = rd32(hw, PFINT_OICR_ENA); @@ -2643,18 +2644,18 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_MAL_DETECT_M) { ena_mask &= ~PFINT_OICR_MAL_DETECT_M; - set_bit(__ICE_MDD_EVENT_PENDING, pf->state); + set_bit(ICE_MDD_EVENT_PENDING, pf->state); } if (oicr & PFINT_OICR_VFLR_M) { /* disable any further VFLR event notifications */ - if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) { + if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) { u32 reg = rd32(hw, PFINT_OICR_ENA); reg &= ~PFINT_OICR_VFLR_M; wr32(hw, PFINT_OICR_ENA, reg); } else { ena_mask &= ~PFINT_OICR_VFLR_M; - set_bit(__ICE_VFLR_EVENT_PENDING, pf->state); + set_bit(ICE_VFLR_EVENT_PENDING, pf->state); } } @@ -2680,13 +2681,13 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) * We also make note of which reset happened so that peer * devices/drivers can be informed. */ - if (!test_and_set_bit(__ICE_RESET_OICR_RECV, pf->state)) { + if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) { if (reset == ICE_RESET_CORER) - set_bit(__ICE_CORER_RECV, pf->state); + set_bit(ICE_CORER_RECV, pf->state); else if (reset == ICE_RESET_GLOBR) - set_bit(__ICE_GLOBR_RECV, pf->state); + set_bit(ICE_GLOBR_RECV, pf->state); else - set_bit(__ICE_EMPR_RECV, pf->state); + set_bit(ICE_EMPR_RECV, pf->state); /* There are couple of different bits at play here. * hw->reset_ongoing indicates whether the hardware is @@ -2694,7 +2695,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) * is received and set back to false after the driver * has determined that the hardware is out of reset. * - * __ICE_RESET_OICR_RECV in pf->state indicates + * ICE_RESET_OICR_RECV in pf->state indicates * that a post reset rebuild is required before the * driver is operational again. This is set above. * @@ -2722,7 +2723,7 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_PCI_EXCEPTION_M | PFINT_OICR_ECC_ERR_M)) { - set_bit(__ICE_PFR_REQ, pf->state); + set_bit(ICE_PFR_REQ, pf->state); ice_service_task_schedule(pf); } } @@ -2975,19 +2976,13 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) struct ice_netdev_priv *np; struct net_device *netdev; u8 mac_addr[ETH_ALEN]; - int err; - - err = ice_devlink_create_port(vsi); - if (err) - return err; netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq, vsi->alloc_rxq); - if (!netdev) { - err = -ENOMEM; - goto err_destroy_devlink_port; - } + if (!netdev) + return -ENOMEM; + set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); vsi->netdev = netdev; np = netdev_priv(netdev); np->vsi = vsi; @@ -3014,25 +3009,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = ICE_MAX_MTU; - err = register_netdev(vsi->netdev); - if (err) - goto err_free_netdev; - - devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev); - - netif_carrier_off(vsi->netdev); - - /* make sure transmit queues start off as stopped */ - netif_tx_stop_all_queues(vsi->netdev); - return 0; - -err_free_netdev: - free_netdev(vsi->netdev); - vsi->netdev = NULL; -err_destroy_devlink_port: - ice_devlink_destroy_port(vsi); - return err; } /** @@ -3107,15 +3084,6 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto, struct ice_vsi *vsi = np->vsi; int ret; - if (vid >= VLAN_N_VID) { - netdev_err(netdev, "VLAN id requested %d is out of range %d\n", - vid, VLAN_N_VID); - return -EINVAL; - } - - if (vsi->info.pvid) - return -EINVAL; - /* VLAN 0 is added by default during load/reset */ if (!vid) return 0; @@ -3132,7 +3100,7 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto, */ ret = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI); if (!ret) - set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); return ret; } @@ -3153,9 +3121,6 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, struct ice_vsi *vsi = np->vsi; int ret; - if (vsi->info.pvid) - return -EINVAL; - /* don't allow removal of VLAN 0 */ if (!vid) return 0; @@ -3171,7 +3136,7 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, if (vsi->num_vlan == 1 && ice_vsi_is_vlan_pruning_ena(vsi)) ret = ice_cfg_vlan_pruning(vsi, false, false); - set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); return ret; } @@ -3230,8 +3195,7 @@ unroll_napi_add: if (vsi) { ice_napi_del(vsi); if (vsi->netdev) { - if (vsi->netdev->reg_state == NETREG_REGISTERED) - unregister_netdev(vsi->netdev); + clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); free_netdev(vsi->netdev); vsi->netdev = NULL; } @@ -3365,7 +3329,7 @@ static int ice_init_pf(struct ice_pf *pf) timer_setup(&pf->serv_tmr, ice_service_timer, 0); pf->serv_tmr_period = HZ; INIT_WORK(&pf->serv_task, ice_service_task); - clear_bit(__ICE_SERVICE_SCHED, pf->state); + clear_bit(ICE_SERVICE_SCHED, pf->state); mutex_init(&pf->avail_q_mutex); pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); @@ -3537,15 +3501,14 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) } /** - * ice_is_wol_supported - get NVM state of WoL - * @pf: board private structure + * ice_is_wol_supported - check if WoL is supported + * @hw: pointer to hardware info * * Check if WoL is supported based on the HW configuration. * Returns true if NVM supports and enables WoL for this port, false otherwise */ -bool ice_is_wol_supported(struct ice_pf *pf) +bool ice_is_wol_supported(struct ice_hw *hw) { - struct ice_hw *hw = &pf->hw; u16 wol_ctrl; /* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control @@ -3554,7 +3517,7 @@ bool ice_is_wol_supported(struct ice_pf *pf) if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl)) return false; - return !(BIT(hw->pf_id) & wol_ctrl); + return !(BIT(hw->port_info->lport) & wol_ctrl); } /** @@ -3575,7 +3538,7 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) if (!new_rx && !new_tx) return -EINVAL; - while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) { + while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) { timeout--; if (!timeout) return -EBUSY; @@ -3599,7 +3562,7 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) ice_pf_dcb_recfg(pf); ice_vsi_open(vsi); done: - clear_bit(__ICE_CFG_BUSY, pf->state); + clear_bit(ICE_CFG_BUSY, pf->state); return err; } @@ -3986,6 +3949,43 @@ static void ice_print_wake_reason(struct ice_pf *pf) } /** + * ice_register_netdev - register netdev and devlink port + * @pf: pointer to the PF struct + */ +static int ice_register_netdev(struct ice_pf *pf) +{ + struct ice_vsi *vsi; + int err = 0; + + vsi = ice_get_main_vsi(pf); + if (!vsi || !vsi->netdev) + return -EIO; + + err = register_netdev(vsi->netdev); + if (err) + goto err_register_netdev; + + set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); + netif_carrier_off(vsi->netdev); + netif_tx_stop_all_queues(vsi->netdev); + err = ice_devlink_create_port(vsi); + if (err) + goto err_devlink_create; + + devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev); + + return 0; +err_devlink_create: + unregister_netdev(vsi->netdev); + clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); +err_register_netdev: + free_netdev(vsi->netdev); + vsi->netdev = NULL; + clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); + return err; +} + +/** * ice_probe - Device initialization routine * @pdev: PCI device information struct * @ent: entry in ice_pci_tbl @@ -4007,7 +4007,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) if (err) return err; - err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), pci_name(pdev)); + err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), dev_driver_string(dev)); if (err) { dev_err(dev, "BAR0 I/O map error %d\n", err); return err; @@ -4031,9 +4031,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) pf->pdev = pdev; pci_set_drvdata(pdev, pf); - set_bit(__ICE_DOWN, pf->state); + set_bit(ICE_DOWN, pf->state); /* Disable service task until DOWN bit is cleared */ - set_bit(__ICE_SERVICE_DIS, pf->state); + set_bit(ICE_SERVICE_DIS, pf->state); hw = &pf->hw; hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0]; @@ -4173,7 +4173,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_alloc_sw_unroll; } - clear_bit(__ICE_SERVICE_DIS, pf->state); + clear_bit(ICE_SERVICE_DIS, pf->state); /* tell the firmware we are up */ err = ice_send_version(pf); @@ -4192,28 +4192,25 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_send_version_unroll; } + /* not a fatal error if this fails */ err = ice_init_nvm_phy_type(pf->hw.port_info); - if (err) { + if (err) dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err); - goto err_send_version_unroll; - } + /* not a fatal error if this fails */ err = ice_update_link_info(pf->hw.port_info); - if (err) { + if (err) dev_err(dev, "ice_update_link_info failed: %d\n", err); - goto err_send_version_unroll; - } ice_init_link_dflt_override(pf->hw.port_info); /* if media available, initialize PHY settings */ if (pf->hw.port_info->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { + /* not a fatal error if this fails */ err = ice_init_phy_user_cfg(pf->hw.port_info); - if (err) { + if (err) dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err); - goto err_send_version_unroll; - } if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) { struct ice_vsi *vsi = ice_get_main_vsi(pf); @@ -4265,15 +4262,20 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) pcie_print_link_status(pf->pdev); probe_done: + err = ice_register_netdev(pf); + if (err) + goto err_netdev_reg; + /* ready to go, so clear down state bit */ - clear_bit(__ICE_DOWN, pf->state); + clear_bit(ICE_DOWN, pf->state); return 0; +err_netdev_reg: err_send_version_unroll: ice_vsi_release_all(pf); err_alloc_sw_unroll: - set_bit(__ICE_SERVICE_DIS, pf->state); - set_bit(__ICE_DOWN, pf->state); + set_bit(ICE_SERVICE_DIS, pf->state); + set_bit(ICE_DOWN, pf->state); devm_kfree(dev, pf->first_sw); err_msix_misc_unroll: ice_free_irq_msix_misc(pf); @@ -4314,7 +4316,7 @@ static void ice_set_wake(struct ice_pf *pf) } /** - * ice_setup_magic_mc_wake - setup device to wake on multicast magic packet + * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet * @pf: pointer to the PF struct * * Issue firmware command to enable multicast magic wake, making @@ -4373,11 +4375,11 @@ static void ice_remove(struct pci_dev *pdev) } if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) { - set_bit(__ICE_VF_RESETS_DISABLED, pf->state); + set_bit(ICE_VF_RESETS_DISABLED, pf->state); ice_free_vfs(pf); } - set_bit(__ICE_DOWN, pf->state); + set_bit(ICE_DOWN, pf->state); ice_service_task_stop(pf); ice_aq_cancel_waiting_tasks(pf); @@ -4537,13 +4539,13 @@ static int __maybe_unused ice_suspend(struct device *dev) disabled = ice_service_task_stop(pf); /* Already suspended?, then there is nothing to do */ - if (test_and_set_bit(__ICE_SUSPENDED, pf->state)) { + if (test_and_set_bit(ICE_SUSPENDED, pf->state)) { if (!disabled) ice_service_task_restart(pf); return 0; } - if (test_bit(__ICE_DOWN, pf->state) || + if (test_bit(ICE_DOWN, pf->state) || ice_is_reset_in_progress(pf->state)) { dev_err(dev, "can't suspend device in reset or already down\n"); if (!disabled) @@ -4568,6 +4570,7 @@ static int __maybe_unused ice_suspend(struct device *dev) continue; ice_vsi_free_q_vectors(pf->vsi[v]); } + ice_free_cpu_rx_rmap(ice_get_main_vsi(pf)); ice_clear_interrupt_scheme(pf); pci_save_state(pdev); @@ -4614,16 +4617,16 @@ static int __maybe_unused ice_resume(struct device *dev) if (ret) dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret); - clear_bit(__ICE_DOWN, pf->state); + clear_bit(ICE_DOWN, pf->state); /* Now perform PF reset and rebuild */ reset_type = ICE_RESET_PFR; /* re-enable service task for reset, but allow reset to schedule it */ - clear_bit(__ICE_SERVICE_DIS, pf->state); + clear_bit(ICE_SERVICE_DIS, pf->state); if (ice_schedule_reset(pf, reset_type)) dev_err(dev, "Reset during resume failed.\n"); - clear_bit(__ICE_SUSPENDED, pf->state); + clear_bit(ICE_SUSPENDED, pf->state); ice_service_task_restart(pf); /* Restart the service task */ @@ -4652,11 +4655,11 @@ ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err) return PCI_ERS_RESULT_DISCONNECT; } - if (!test_bit(__ICE_SUSPENDED, pf->state)) { + if (!test_bit(ICE_SUSPENDED, pf->state)) { ice_service_task_stop(pf); - if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) { - set_bit(__ICE_PFR_REQ, pf->state); + if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { + set_bit(ICE_PFR_REQ, pf->state); ice_prepare_for_reset(pf); } } @@ -4723,7 +4726,7 @@ static void ice_pci_err_resume(struct pci_dev *pdev) return; } - if (test_bit(__ICE_SUSPENDED, pf->state)) { + if (test_bit(ICE_SUSPENDED, pf->state)) { dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n", __func__); return; @@ -4744,11 +4747,11 @@ static void ice_pci_err_reset_prepare(struct pci_dev *pdev) { struct ice_pf *pf = pci_get_drvdata(pdev); - if (!test_bit(__ICE_SUSPENDED, pf->state)) { + if (!test_bit(ICE_SUSPENDED, pf->state)) { ice_service_task_stop(pf); - if (!test_bit(__ICE_PREPARED_FOR_RESET, pf->state)) { - set_bit(__ICE_PFR_REQ, pf->state); + if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { + set_bit(ICE_PFR_REQ, pf->state); ice_prepare_for_reset(pf); } } @@ -4895,7 +4898,7 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) return 0; } - if (test_bit(__ICE_DOWN, pf->state) || + if (test_bit(ICE_DOWN, pf->state) || ice_is_reset_in_progress(pf->state)) { netdev_err(netdev, "can't set mac %pM. device not ready\n", mac); @@ -4964,8 +4967,8 @@ static void ice_set_rx_mode(struct net_device *netdev) * ndo_set_rx_mode may be triggered even without a change in netdev * flags */ - set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags); - set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags); + set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state); + set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state); set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags); /* schedule our worker thread which will take care of @@ -5114,10 +5117,10 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) * separate if/else statements to guarantee each feature is checked */ if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH)) - ret = ice_vsi_manage_rss_lut(vsi, true); + ice_vsi_manage_rss_lut(vsi, true); else if (!(features & NETIF_F_RXHASH) && netdev->features & NETIF_F_RXHASH) - ret = ice_vsi_manage_rss_lut(vsi, false); + ice_vsi_manage_rss_lut(vsi, false); if ((features & NETIF_F_HW_VLAN_CTAG_RX) && !(netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) @@ -5198,6 +5201,105 @@ int ice_vsi_cfg(struct ice_vsi *vsi) return err; } +/* THEORY OF MODERATION: + * The below code creates custom DIM profiles for use by this driver, because + * the ice driver hardware works differently than the hardware that DIMLIB was + * originally made for. ice hardware doesn't have packet count limits that + * can trigger an interrupt, but it *does* have interrupt rate limit support, + * and this code adds that capability to be used by the driver when it's using + * DIMLIB. The DIMLIB code was always designed to be a suggestion to the driver + * for how to "respond" to traffic and interrupts, so this driver uses a + * slightly different set of moderation parameters to get best performance. + */ +struct ice_dim { + /* the throttle rate for interrupts, basically worst case delay before + * an initial interrupt fires, value is stored in microseconds. + */ + u16 itr; + /* the rate limit for interrupts, which can cap a delay from a small + * ITR at a certain amount of interrupts per second. f.e. a 2us ITR + * could yield as much as 500,000 interrupts per second, but with a + * 10us rate limit, it limits to 100,000 interrupts per second. Value + * is stored in microseconds. + */ + u16 intrl; +}; + +/* Make a different profile for Rx that doesn't allow quite so aggressive + * moderation at the high end (it maxes out at 128us or about 8k interrupts a + * second. The INTRL/rate parameters here are only useful to cap small ITR + * values, which is why for larger ITR's - like 128, which can only generate + * 8k interrupts per second, there is no point to rate limit and the values + * are set to zero. The rate limit values do affect latency, and so must + * be reasonably small so to not impact latency sensitive tests. + */ +static const struct ice_dim rx_profile[] = { + {2, 10}, + {8, 16}, + {32, 0}, + {96, 0}, + {128, 0} +}; + +/* The transmit profile, which has the same sorts of values + * as the previous struct + */ +static const struct ice_dim tx_profile[] = { + {2, 10}, + {8, 16}, + {64, 0}, + {128, 0}, + {256, 0} +}; + +static void ice_tx_dim_work(struct work_struct *work) +{ + struct ice_ring_container *rc; + struct ice_q_vector *q_vector; + struct dim *dim; + u16 itr, intrl; + + dim = container_of(work, struct dim, work); + rc = container_of(dim, struct ice_ring_container, dim); + q_vector = container_of(rc, struct ice_q_vector, tx); + + if (dim->profile_ix >= ARRAY_SIZE(tx_profile)) + dim->profile_ix = ARRAY_SIZE(tx_profile) - 1; + + /* look up the values in our local table */ + itr = tx_profile[dim->profile_ix].itr; + intrl = tx_profile[dim->profile_ix].intrl; + + ice_write_itr(rc, itr); + ice_write_intrl(q_vector, intrl); + + dim->state = DIM_START_MEASURE; +} + +static void ice_rx_dim_work(struct work_struct *work) +{ + struct ice_ring_container *rc; + struct ice_q_vector *q_vector; + struct dim *dim; + u16 itr, intrl; + + dim = container_of(work, struct dim, work); + rc = container_of(dim, struct ice_ring_container, dim); + q_vector = container_of(rc, struct ice_q_vector, rx); + + if (dim->profile_ix >= ARRAY_SIZE(rx_profile)) + dim->profile_ix = ARRAY_SIZE(rx_profile) - 1; + + /* look up the values in our local table */ + itr = rx_profile[dim->profile_ix].itr; + intrl = rx_profile[dim->profile_ix].intrl; + + ice_write_itr(rc, itr); + ice_write_intrl(q_vector, intrl); + + dim->state = DIM_START_MEASURE; +} + /** * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI * @vsi: the VSI being configured @@ -5212,6 +5314,12 @@ static void ice_napi_enable_all(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; + INIT_WORK(&q_vector->tx.dim.work, ice_tx_dim_work); + q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + + INIT_WORK(&q_vector->rx.dim.work, ice_rx_dim_work); + q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + if (q_vector->rx.ring || q_vector->tx.ring) napi_enable(&q_vector->napi); } @@ -5238,7 +5346,7 @@ static int ice_up_complete(struct ice_vsi *vsi) if (err) return err; - clear_bit(__ICE_DOWN, vsi->state); + clear_bit(ICE_VSI_DOWN, vsi->state); ice_napi_enable_all(vsi); ice_vsi_ena_irq(vsi); @@ -5345,7 +5453,6 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) vsi->tx_linearize = 0; vsi->rx_buf_failed = 0; vsi->rx_page_failed = 0; - vsi->rx_gro_dropped = 0; rcu_read_lock(); @@ -5360,7 +5467,6 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) vsi_stats->rx_bytes += bytes; vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; vsi->rx_page_failed += ring->rx_stats.alloc_page_failed; - vsi->rx_gro_dropped += ring->rx_stats.gro_dropped; } /* update XDP Tx rings counters */ @@ -5381,8 +5487,8 @@ void ice_update_vsi_stats(struct ice_vsi *vsi) struct ice_eth_stats *cur_es = &vsi->eth_stats; struct ice_pf *pf = vsi->back; - if (test_bit(__ICE_DOWN, vsi->state) || - test_bit(__ICE_CFG_BUSY, pf->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state) || + test_bit(ICE_CFG_BUSY, pf->state)) return; /* get stats as recorded by Tx/Rx rings */ @@ -5392,7 +5498,7 @@ void ice_update_vsi_stats(struct ice_vsi *vsi) ice_update_eth_stats(vsi); cur_ns->tx_errors = cur_es->tx_errors; - cur_ns->rx_dropped = cur_es->rx_discards + vsi->rx_gro_dropped; + cur_ns->rx_dropped = cur_es->rx_discards; cur_ns->tx_dropped = cur_es->tx_discards; cur_ns->multicast = cur_es->rx_multicast; @@ -5586,7 +5692,7 @@ void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) * But, only call the update routine and read the registers if VSI is * not down. */ - if (!test_bit(__ICE_DOWN, vsi->state)) + if (!test_bit(ICE_VSI_DOWN, vsi->state)) ice_update_vsi_ring_stats(vsi); stats->tx_packets = vsi_stats->tx_packets; stats->tx_bytes = vsi_stats->tx_bytes; @@ -5622,6 +5728,9 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) if (q_vector->rx.ring || q_vector->tx.ring) napi_disable(&q_vector->napi); + + cancel_work_sync(&q_vector->tx.dim.work); + cancel_work_sync(&q_vector->rx.dim.work); } } @@ -5634,7 +5743,7 @@ int ice_down(struct ice_vsi *vsi) int i, tx_err, rx_err, link_err = 0; /* Caller of this function is expected to set the - * vsi->state __ICE_DOWN bit + * vsi->state ICE_DOWN bit */ if (vsi->netdev) { netif_carrier_off(vsi->netdev); @@ -5786,7 +5895,7 @@ int ice_vsi_open_ctrl(struct ice_vsi *vsi) if (err) goto err_up_complete; - clear_bit(__ICE_DOWN, vsi->state); + clear_bit(ICE_VSI_DOWN, vsi->state); ice_vsi_ena_irq(vsi); return 0; @@ -5982,7 +6091,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) enum ice_status ret; int err; - if (test_bit(__ICE_DOWN, pf->state)) + if (test_bit(ICE_DOWN, pf->state)) goto clear_recovery; dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type); @@ -6098,7 +6207,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ice_replay_post(hw); /* if we get here, reset flow is successful */ - clear_bit(__ICE_RESET_FAILED, pf->state); + clear_bit(ICE_RESET_FAILED, pf->state); return; err_vsi_rebuild: @@ -6106,10 +6215,10 @@ err_sched_init_port: ice_sched_cleanup_all(hw); err_init_ctrlq: ice_shutdown_all_ctrlq(hw); - set_bit(__ICE_RESET_FAILED, pf->state); + set_bit(ICE_RESET_FAILED, pf->state); clear_recovery: /* set this bit in PF state to control service task scheduling */ - set_bit(__ICE_NEEDS_RESTART, pf->state); + set_bit(ICE_NEEDS_RESTART, pf->state); dev_err(dev, "Rebuild failed, unload and reload driver\n"); } @@ -6173,7 +6282,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) netdev->mtu = (unsigned int)new_mtu; /* if VSI is up, bring it down and then back up */ - if (!test_and_set_bit(__ICE_DOWN, vsi->state)) { + if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { int err; err = ice_down(vsi); @@ -6308,89 +6417,118 @@ const char *ice_stat_str(enum ice_status stat_err) } /** - * ice_set_rss - Set RSS keys and lut + * ice_set_rss_lut - Set RSS LUT * @vsi: Pointer to VSI structure - * @seed: RSS hash seed * @lut: Lookup table * @lut_size: Lookup table size * * Returns 0 on success, negative on failure */ -int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; + struct ice_aq_get_set_rss_lut_params params = {}; + struct ice_hw *hw = &vsi->back->hw; enum ice_status status; - struct device *dev; - dev = ice_pf_to_dev(pf); - if (seed) { - struct ice_aqc_get_set_rss_keys *buf = - (struct ice_aqc_get_set_rss_keys *)seed; + if (!lut) + return -EINVAL; - status = ice_aq_set_rss_key(hw, vsi->idx, buf); + params.vsi_handle = vsi->idx; + params.lut_size = lut_size; + params.lut_type = vsi->rss_lut_type; + params.lut = lut; - if (status) { - dev_err(dev, "Cannot set RSS key, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + status = ice_aq_set_rss_lut(hw, ¶ms); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; } - if (lut) { - status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, - lut, lut_size); - if (status) { - dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + return 0; +} + +/** + * ice_set_rss_key - Set RSS key + * @vsi: Pointer to the VSI structure + * @seed: RSS hash seed + * + * Returns 0 on success, negative on failure + */ +int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed) +{ + struct ice_hw *hw = &vsi->back->hw; + enum ice_status status; + + if (!seed) + return -EINVAL; + + status = ice_aq_set_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; } return 0; } /** - * ice_get_rss - Get RSS keys and lut + * ice_get_rss_lut - Get RSS LUT * @vsi: Pointer to VSI structure - * @seed: Buffer to store the keys * @lut: Buffer to store the lookup table entries * @lut_size: Size of buffer to store the lookup table entries * * Returns 0 on success, negative on failure */ -int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size) { - struct ice_pf *pf = vsi->back; - struct ice_hw *hw = &pf->hw; + struct ice_aq_get_set_rss_lut_params params = {}; + struct ice_hw *hw = &vsi->back->hw; enum ice_status status; - struct device *dev; - dev = ice_pf_to_dev(pf); - if (seed) { - struct ice_aqc_get_set_rss_keys *buf = - (struct ice_aqc_get_set_rss_keys *)seed; + if (!lut) + return -EINVAL; - status = ice_aq_get_rss_key(hw, vsi->idx, buf); - if (status) { - dev_err(dev, "Cannot get RSS key, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + params.vsi_handle = vsi->idx; + params.lut_size = lut_size; + params.lut_type = vsi->rss_lut_type; + params.lut = lut; + + status = ice_aq_get_rss_lut(hw, ¶ms); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; } - if (lut) { - status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type, - lut, lut_size); - if (status) { - dev_err(dev, "Cannot get RSS lut, err %s aq_err %s\n", - ice_stat_str(status), - ice_aq_str(hw->adminq.sq_last_status)); - return -EIO; - } + return 0; +} + +/** + * ice_get_rss_key - Get RSS key + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the key in + * + * Returns 0 on success, negative on failure + */ +int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed) +{ + struct ice_hw *hw = &vsi->back->hw; + enum ice_status status; + + if (!seed) + return -EINVAL; + + status = ice_aq_get_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed); + if (status) { + dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + return -EIO; } return 0; @@ -6602,19 +6740,19 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) switch (pf->tx_timeout_recovery_level) { case 1: - set_bit(__ICE_PFR_REQ, pf->state); + set_bit(ICE_PFR_REQ, pf->state); break; case 2: - set_bit(__ICE_CORER_REQ, pf->state); + set_bit(ICE_CORER_REQ, pf->state); break; case 3: - set_bit(__ICE_GLOBR_REQ, pf->state); + set_bit(ICE_GLOBR_REQ, pf->state); break; default: netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n"); - set_bit(__ICE_DOWN, pf->state); - set_bit(__ICE_NEEDS_RESTART, vsi->state); - set_bit(__ICE_SERVICE_DIS, pf->state); + set_bit(ICE_DOWN, pf->state); + set_bit(ICE_VSI_NEEDS_RESTART, vsi->state); + set_bit(ICE_SERVICE_DIS, pf->state); break; } @@ -6637,35 +6775,53 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) int ice_open(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + + if (ice_is_reset_in_progress(pf->state)) { + netdev_err(netdev, "can't open net device while reset is in progress"); + return -EBUSY; + } + + return ice_open_internal(netdev); +} + +/** + * ice_open_internal - Called when a network interface becomes active + * @netdev: network interface device structure + * + * Internal ice_open implementation. Should not be used directly except for ice_open and reset + * handling routine + * + * Returns 0 on success, negative value on failure + */ +int ice_open_internal(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct ice_port_info *pi; + enum ice_status status; int err; - if (test_bit(__ICE_NEEDS_RESTART, pf->state)) { + if (test_bit(ICE_NEEDS_RESTART, pf->state)) { netdev_err(netdev, "driver needs to be unloaded and reloaded\n"); return -EIO; } - if (test_bit(__ICE_DOWN, pf->state)) { - netdev_err(netdev, "device is not ready yet\n"); - return -EBUSY; - } - netif_carrier_off(netdev); pi = vsi->port_info; - err = ice_update_link_info(pi); - if (err) { - netdev_err(netdev, "Failed to get link info, error %d\n", - err); - return err; + status = ice_update_link_info(pi); + if (status) { + netdev_err(netdev, "Failed to get link info, error %s\n", + ice_stat_str(status)); + return -EIO; } /* Set PHY if there is media, otherwise, turn off PHY */ if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { clear_bit(ICE_FLAG_NO_MEDIA, pf->flags); - if (!test_bit(__ICE_PHY_INIT_COMPLETE, pf->state)) { + if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) { err = ice_init_phy_user_cfg(pi); if (err) { netdev_err(netdev, "Failed to initialize PHY settings, error %d\n", @@ -6682,12 +6838,7 @@ int ice_open(struct net_device *netdev) } } else { set_bit(ICE_FLAG_NO_MEDIA, pf->flags); - err = ice_aq_set_link_restart_an(pi, false, NULL); - if (err) { - netdev_err(netdev, "Failed to set PHY state, VSI %d error %d\n", - vsi->vsi_num, err); - return err; - } + ice_set_link(vsi, false); } err = ice_vsi_open(vsi); @@ -6715,6 +6866,12 @@ int ice_stop(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + + if (ice_is_reset_in_progress(pf->state)) { + netdev_err(netdev, "can't stop net device while reset is in progress"); + return -EBUSY; + } ice_vsi_close(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 75ccbfc07f99..fee37a5844cf 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -644,6 +644,7 @@ ice_get_orom_civd_data(struct ice_hw *hw, enum ice_bank_select bank, /* Verify that the simple checksum is zero */ for (i = 0; i < sizeof(tmp); i++) + /* cppcheck-suppress objectIndex */ sum += ((u8 *)&tmp)[i]; if (sum) { diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index 7f4c1ec1eff2..199aa5b71540 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -13,6 +13,9 @@ enum ice_prot_id { ICE_PROT_ID_INVAL = 0, ICE_PROT_MAC_OF_OR_S = 1, + ICE_PROT_MAC_IL = 4, + ICE_PROT_ETYPE_OL = 9, + ICE_PROT_ETYPE_IL = 10, ICE_PROT_IPV4_OF_OR_S = 32, ICE_PROT_IPV4_IL = 33, ICE_PROT_IPV6_OF_OR_S = 40, @@ -21,7 +24,14 @@ enum ice_prot_id { ICE_PROT_UDP_OF = 52, ICE_PROT_UDP_IL_OR_S = 53, ICE_PROT_GRE_OF = 64, + ICE_PROT_ESP_F = 88, + ICE_PROT_ESP_2 = 89, ICE_PROT_SCTP_IL = 96, + ICE_PROT_ICMP_IL = 98, + ICE_PROT_ICMPV6_IL = 100, + ICE_PROT_PPPOE = 103, + ICE_PROT_L2TPV3 = 104, + ICE_PROT_ARP_OF = 118, ICE_PROT_META_ID = 255, /* when offset == metadata */ ICE_PROT_INVALID = 255 /* when offset == ICE_FV_OFFSET_INVAL */ }; diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 2403cb38b93c..2f097637e405 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -919,7 +919,7 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, } /** - * ice_sched_add_nodes_to_layer - Add nodes to a given layer + * ice_sched_add_nodes_to_hw_layer - Add nodes to HW layer * @pi: port information structure * @tc_node: pointer to TC node * @parent: pointer to parent node @@ -928,82 +928,107 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, * @first_node_teid: pointer to the first node TEID * @num_nodes_added: pointer to number of nodes added * - * This function add nodes to a given layer. + * Add nodes into specific HW layer. */ static enum ice_status -ice_sched_add_nodes_to_layer(struct ice_port_info *pi, - struct ice_sched_node *tc_node, - struct ice_sched_node *parent, u8 layer, - u16 num_nodes, u32 *first_node_teid, - u16 *num_nodes_added) +ice_sched_add_nodes_to_hw_layer(struct ice_port_info *pi, + struct ice_sched_node *tc_node, + struct ice_sched_node *parent, u8 layer, + u16 num_nodes, u32 *first_node_teid, + u16 *num_nodes_added) { - u32 *first_teid_ptr = first_node_teid; - u16 new_num_nodes, max_child_nodes; - enum ice_status status = 0; - struct ice_hw *hw = pi->hw; - u16 num_added = 0; - u32 temp; + u16 max_child_nodes; *num_nodes_added = 0; if (!num_nodes) - return status; + return 0; - if (!parent || layer < hw->sw_entry_point_layer) + if (!parent || layer < pi->hw->sw_entry_point_layer) return ICE_ERR_PARAM; /* max children per node per layer */ - max_child_nodes = hw->max_children[parent->tx_sched_layer]; + max_child_nodes = pi->hw->max_children[parent->tx_sched_layer]; - /* current number of children + required nodes exceed max children ? */ + /* current number of children + required nodes exceed max children */ if ((parent->num_children + num_nodes) > max_child_nodes) { /* Fail if the parent is a TC node */ if (parent == tc_node) return ICE_ERR_CFG; + return ICE_ERR_MAX_LIMIT; + } + + return ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes, + num_nodes_added, first_node_teid); +} + +/** + * ice_sched_add_nodes_to_layer - Add nodes to a given layer + * @pi: port information structure + * @tc_node: pointer to TC node + * @parent: pointer to parent node + * @layer: layer number to add nodes + * @num_nodes: number of nodes to be added + * @first_node_teid: pointer to the first node TEID + * @num_nodes_added: pointer to number of nodes added + * + * This function add nodes to a given layer. + */ +static enum ice_status +ice_sched_add_nodes_to_layer(struct ice_port_info *pi, + struct ice_sched_node *tc_node, + struct ice_sched_node *parent, u8 layer, + u16 num_nodes, u32 *first_node_teid, + u16 *num_nodes_added) +{ + u32 *first_teid_ptr = first_node_teid; + u16 new_num_nodes = num_nodes; + enum ice_status status = 0; + *num_nodes_added = 0; + while (*num_nodes_added < num_nodes) { + u16 max_child_nodes, num_added = 0; + /* cppcheck-suppress unusedVariable */ + u32 temp; + + status = ice_sched_add_nodes_to_hw_layer(pi, tc_node, parent, + layer, new_num_nodes, + first_teid_ptr, + &num_added); + if (!status) + *num_nodes_added += num_added; + /* added more nodes than requested ? */ + if (*num_nodes_added > num_nodes) { + ice_debug(pi->hw, ICE_DBG_SCHED, "added extra nodes %d %d\n", num_nodes, + *num_nodes_added); + status = ICE_ERR_CFG; + break; + } + /* break if all the nodes are added successfully */ + if (!status && (*num_nodes_added == num_nodes)) + break; + /* break if the error is not max limit */ + if (status && status != ICE_ERR_MAX_LIMIT) + break; + /* Exceeded the max children */ + max_child_nodes = pi->hw->max_children[parent->tx_sched_layer]; /* utilize all the spaces if the parent is not full */ if (parent->num_children < max_child_nodes) { new_num_nodes = max_child_nodes - parent->num_children; - /* this recursion is intentional, and wouldn't - * go more than 2 calls + } else { + /* This parent is full, try the next sibling */ + parent = parent->sibling; + /* Don't modify the first node TEID memory if the + * first node was added already in the above call. + * Instead send some temp memory for all other + * recursive calls. */ - status = ice_sched_add_nodes_to_layer(pi, tc_node, - parent, layer, - new_num_nodes, - first_node_teid, - &num_added); - if (status) - return status; + if (num_added) + first_teid_ptr = &temp; - *num_nodes_added += num_added; + new_num_nodes = num_nodes - *num_nodes_added; } - /* Don't modify the first node TEID memory if the first node was - * added already in the above call. Instead send some temp - * memory for all other recursive calls. - */ - if (num_added) - first_teid_ptr = &temp; - - new_num_nodes = num_nodes - num_added; - - /* This parent is full, try the next sibling */ - parent = parent->sibling; - - /* this recursion is intentional, for 1024 queues - * per VSI, it goes max of 16 iterations. - * 1024 / 8 = 128 layer 8 nodes - * 128 /8 = 16 (add 8 nodes per iteration) - */ - status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, - layer, new_num_nodes, - first_teid_ptr, - &num_added); - *num_nodes_added += num_added; - return status; } - - status = ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes, - num_nodes_added, first_node_teid); return status; } @@ -1857,7 +1882,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs, } /** - * ice_sched_rm_agg_vsi_entry - remove aggregator related VSI info entry + * ice_sched_rm_agg_vsi_info - remove aggregator related VSI info entry * @pi: port information structure * @vsi_handle: software VSI handle * diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 554f567476f3..aa11d07793d4 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -2,7 +2,6 @@ /* Copyright (c) 2018, Intel Corporation. */ #include "ice_common.h" -#include "ice_adminq_cmd.h" #include "ice_sriov.h" /** @@ -132,3 +131,402 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed) return speed; } + +/* The mailbox overflow detection algorithm helps to check if there + * is a possibility of a malicious VF transmitting too many MBX messages to the + * PF. + * 1. The mailbox snapshot structure, ice_mbx_snapshot, is initialized during + * driver initialization in ice_init_hw() using ice_mbx_init_snapshot(). + * The struct ice_mbx_snapshot helps to track and traverse a static window of + * messages within the mailbox queue while looking for a malicious VF. + * + * 2. When the caller starts processing its mailbox queue in response to an + * interrupt, the structure ice_mbx_snapshot is expected to be cleared before + * the algorithm can be run for the first time for that interrupt. This can be + * done via ice_mbx_reset_snapshot(). + * + * 3. For every message read by the caller from the MBX Queue, the caller must + * call the detection algorithm's entry function ice_mbx_vf_state_handler(). + * Before every call to ice_mbx_vf_state_handler() the struct ice_mbx_data is + * filled as it is required to be passed to the algorithm. + * + * 4. Every time a message is read from the MBX queue, a VFId is received which + * is passed to the state handler. The boolean output is_malvf of the state + * handler ice_mbx_vf_state_handler() serves as an indicator to the caller + * whether this VF is malicious or not. + * + * 5. When a VF is identified to be malicious, the caller can send a message + * to the system administrator. The caller can invoke ice_mbx_report_malvf() + * to help determine if a malicious VF is to be reported or not. This function + * requires the caller to maintain a global bitmap to track all malicious VFs + * and pass that to ice_mbx_report_malvf() along with the VFID which was identified + * to be malicious by ice_mbx_vf_state_handler(). + * + * 6. The global bitmap maintained by PF can be cleared completely if PF is in + * reset or the bit corresponding to a VF can be cleared if that VF is in reset. + * When a VF is shut down and brought back up, we assume that the new VF + * brought up is not malicious and hence report it if found malicious. + * + * 7. The function ice_mbx_reset_snapshot() is called to reset the information + * in ice_mbx_snapshot for every new mailbox interrupt handled. + * + * 8. The memory allocated for variables in ice_mbx_snapshot is de-allocated + * when driver is unloaded. + */ +#define ICE_RQ_DATA_MASK(rq_data) ((rq_data) & PF_MBX_ARQH_ARQH_M) +/* Using the highest value for an unsigned 16-bit value 0xFFFF to indicate that + * the max messages check must be ignored in the algorithm + */ +#define ICE_IGNORE_MAX_MSG_CNT 0xFFFF + +/** + * ice_mbx_traverse - Pass through mailbox snapshot + * @hw: pointer to the HW struct + * @new_state: new algorithm state + * + * Traversing the mailbox static snapshot without checking + * for malicious VFs. + */ +static void +ice_mbx_traverse(struct ice_hw *hw, + enum ice_mbx_snapshot_state *new_state) +{ + struct ice_mbx_snap_buffer_data *snap_buf; + u32 num_iterations; + + snap_buf = &hw->mbx_snapshot.mbx_buf; + + /* As mailbox buffer is circular, applying a mask + * on the incremented iteration count. + */ + num_iterations = ICE_RQ_DATA_MASK(++snap_buf->num_iterations); + + /* Checking either of the below conditions to exit snapshot traversal: + * Condition-1: If the number of iterations in the mailbox is equal to + * the mailbox head which would indicate that we have reached the end + * of the static snapshot. + * Condition-2: If the maximum messages serviced in the mailbox for a + * given interrupt is the highest possible value then there is no need + * to check if the number of messages processed is equal to it. If not + * check if the number of messages processed is greater than or equal + * to the maximum number of mailbox entries serviced in current work item. + */ + if (num_iterations == snap_buf->head || + (snap_buf->max_num_msgs_mbx < ICE_IGNORE_MAX_MSG_CNT && + ++snap_buf->num_msg_proc >= snap_buf->max_num_msgs_mbx)) + *new_state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; +} + +/** + * ice_mbx_detect_malvf - Detect malicious VF in snapshot + * @hw: pointer to the HW struct + * @vf_id: relative virtual function ID + * @new_state: new algorithm state + * @is_malvf: boolean output to indicate if VF is malicious + * + * This function tracks the number of asynchronous messages + * sent per VF and marks the VF as malicious if it exceeds + * the permissible number of messages to send. + */ +static enum ice_status +ice_mbx_detect_malvf(struct ice_hw *hw, u16 vf_id, + enum ice_mbx_snapshot_state *new_state, + bool *is_malvf) +{ + struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + + if (vf_id >= snap->mbx_vf.vfcntr_len) + return ICE_ERR_OUT_OF_RANGE; + + /* increment the message count in the VF array */ + snap->mbx_vf.vf_cntr[vf_id]++; + + if (snap->mbx_vf.vf_cntr[vf_id] >= ICE_ASYNC_VF_MSG_THRESHOLD) + *is_malvf = true; + + /* continue to iterate through the mailbox snapshot */ + ice_mbx_traverse(hw, new_state); + + return 0; +} + +/** + * ice_mbx_reset_snapshot - Reset mailbox snapshot structure + * @snap: pointer to mailbox snapshot structure in the ice_hw struct + * + * Reset the mailbox snapshot structure and clear VF counter array. + */ +static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap) +{ + u32 vfcntr_len; + + if (!snap || !snap->mbx_vf.vf_cntr) + return; + + /* Clear VF counters. */ + vfcntr_len = snap->mbx_vf.vfcntr_len; + if (vfcntr_len) + memset(snap->mbx_vf.vf_cntr, 0, + (vfcntr_len * sizeof(*snap->mbx_vf.vf_cntr))); + + /* Reset mailbox snapshot for a new capture. */ + memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); + snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; +} + +/** + * ice_mbx_vf_state_handler - Handle states of the overflow algorithm + * @hw: pointer to the HW struct + * @mbx_data: pointer to structure containing mailbox data + * @vf_id: relative virtual function (VF) ID + * @is_malvf: boolean output to indicate if VF is malicious + * + * The function serves as an entry point for the malicious VF + * detection algorithm by handling the different states and state + * transitions of the algorithm: + * New snapshot: This state is entered when creating a new static + * snapshot. The data from any previous mailbox snapshot is + * cleared and a new capture of the mailbox head and tail is + * logged. This will be the new static snapshot to detect + * asynchronous messages sent by VFs. On capturing the snapshot + * and depending on whether the number of pending messages in that + * snapshot exceed the watermark value, the state machine enters + * traverse or detect states. + * Traverse: If pending message count is below watermark then iterate + * through the snapshot without any action on VF. + * Detect: If pending message count exceeds watermark traverse + * the static snapshot and look for a malicious VF. + */ +enum ice_status +ice_mbx_vf_state_handler(struct ice_hw *hw, + struct ice_mbx_data *mbx_data, u16 vf_id, + bool *is_malvf) +{ + struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + struct ice_mbx_snap_buffer_data *snap_buf; + struct ice_ctl_q_info *cq = &hw->mailboxq; + enum ice_mbx_snapshot_state new_state; + enum ice_status status = 0; + + if (!is_malvf || !mbx_data) + return ICE_ERR_BAD_PTR; + + /* When entering the mailbox state machine assume that the VF + * is not malicious until detected. + */ + *is_malvf = false; + + /* Checking if max messages allowed to be processed while servicing current + * interrupt is not less than the defined AVF message threshold. + */ + if (mbx_data->max_num_msgs_mbx <= ICE_ASYNC_VF_MSG_THRESHOLD) + return ICE_ERR_INVAL_SIZE; + + /* The watermark value should not be lesser than the threshold limit + * set for the number of asynchronous messages a VF can send to mailbox + * nor should it be greater than the maximum number of messages in the + * mailbox serviced in current interrupt. + */ + if (mbx_data->async_watermark_val < ICE_ASYNC_VF_MSG_THRESHOLD || + mbx_data->async_watermark_val > mbx_data->max_num_msgs_mbx) + return ICE_ERR_PARAM; + + new_state = ICE_MAL_VF_DETECT_STATE_INVALID; + snap_buf = &snap->mbx_buf; + + switch (snap_buf->state) { + case ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT: + /* Clear any previously held data in mailbox snapshot structure. */ + ice_mbx_reset_snapshot(snap); + + /* Collect the pending ARQ count, number of messages processed and + * the maximum number of messages allowed to be processed from the + * Mailbox for current interrupt. + */ + snap_buf->num_pending_arq = mbx_data->num_pending_arq; + snap_buf->num_msg_proc = mbx_data->num_msg_proc; + snap_buf->max_num_msgs_mbx = mbx_data->max_num_msgs_mbx; + + /* Capture a new static snapshot of the mailbox by logging the + * head and tail of snapshot and set num_iterations to the tail + * value to mark the start of the iteration through the snapshot. + */ + snap_buf->head = ICE_RQ_DATA_MASK(cq->rq.next_to_clean + + mbx_data->num_pending_arq); + snap_buf->tail = ICE_RQ_DATA_MASK(cq->rq.next_to_clean - 1); + snap_buf->num_iterations = snap_buf->tail; + + /* Pending ARQ messages returned by ice_clean_rq_elem + * is the difference between the head and tail of the + * mailbox queue. Comparing this value against the watermark + * helps to check if we potentially have malicious VFs. + */ + if (snap_buf->num_pending_arq >= + mbx_data->async_watermark_val) { + new_state = ICE_MAL_VF_DETECT_STATE_DETECT; + status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf); + } else { + new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE; + ice_mbx_traverse(hw, &new_state); + } + break; + + case ICE_MAL_VF_DETECT_STATE_TRAVERSE: + new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE; + ice_mbx_traverse(hw, &new_state); + break; + + case ICE_MAL_VF_DETECT_STATE_DETECT: + new_state = ICE_MAL_VF_DETECT_STATE_DETECT; + status = ice_mbx_detect_malvf(hw, vf_id, &new_state, is_malvf); + break; + + default: + new_state = ICE_MAL_VF_DETECT_STATE_INVALID; + status = ICE_ERR_CFG; + } + + snap_buf->state = new_state; + + return status; +} + +/** + * ice_mbx_report_malvf - Track and note malicious VF + * @hw: pointer to the HW struct + * @all_malvfs: all malicious VFs tracked by PF + * @bitmap_len: length of bitmap in bits + * @vf_id: relative virtual function ID of the malicious VF + * @report_malvf: boolean to indicate if malicious VF must be reported + * + * This function will update a bitmap that keeps track of the malicious + * VFs attached to the PF. A malicious VF must be reported only once if + * discovered between VF resets or loading so the function checks + * the input vf_id against the bitmap to verify if the VF has been + * detected in any previous mailbox iterations. + */ +enum ice_status +ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs, + u16 bitmap_len, u16 vf_id, bool *report_malvf) +{ + if (!all_malvfs || !report_malvf) + return ICE_ERR_PARAM; + + *report_malvf = false; + + if (bitmap_len < hw->mbx_snapshot.mbx_vf.vfcntr_len) + return ICE_ERR_INVAL_SIZE; + + if (vf_id >= bitmap_len) + return ICE_ERR_OUT_OF_RANGE; + + /* If the vf_id is found in the bitmap set bit and boolean to true */ + if (!test_and_set_bit(vf_id, all_malvfs)) + *report_malvf = true; + + return 0; +} + +/** + * ice_mbx_clear_malvf - Clear VF bitmap and counter for VF ID + * @snap: pointer to the mailbox snapshot structure + * @all_malvfs: all malicious VFs tracked by PF + * @bitmap_len: length of bitmap in bits + * @vf_id: relative virtual function ID of the malicious VF + * + * In case of a VF reset, this function can be called to clear + * the bit corresponding to the VF ID in the bitmap tracking all + * malicious VFs attached to the PF. The function also clears the + * VF counter array at the index of the VF ID. This is to ensure + * that the new VF loaded is not considered malicious before going + * through the overflow detection algorithm. + */ +enum ice_status +ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs, + u16 bitmap_len, u16 vf_id) +{ + if (!snap || !all_malvfs) + return ICE_ERR_PARAM; + + if (bitmap_len < snap->mbx_vf.vfcntr_len) + return ICE_ERR_INVAL_SIZE; + + /* Ensure VF ID value is not larger than bitmap or VF counter length */ + if (vf_id >= bitmap_len || vf_id >= snap->mbx_vf.vfcntr_len) + return ICE_ERR_OUT_OF_RANGE; + + /* Clear VF ID bit in the bitmap tracking malicious VFs attached to PF */ + clear_bit(vf_id, all_malvfs); + + /* Clear the VF counter in the mailbox snapshot structure for that VF ID. + * This is to ensure that if a VF is unloaded and a new one brought back + * up with the same VF ID for a snapshot currently in traversal or detect + * state the counter for that VF ID does not increment on top of existing + * values in the mailbox overflow detection algorithm. + */ + snap->mbx_vf.vf_cntr[vf_id] = 0; + + return 0; +} + +/** + * ice_mbx_init_snapshot - Initialize mailbox snapshot structure + * @hw: pointer to the hardware structure + * @vf_count: number of VFs allocated on a PF + * + * Clear the mailbox snapshot structure and allocate memory + * for the VF counter array based on the number of VFs allocated + * on that PF. + * + * Assumption: This function will assume ice_get_caps() has already been + * called to ensure that the vf_count can be compared against the number + * of VFs supported as defined in the functional capabilities of the device. + */ +enum ice_status ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count) +{ + struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + + /* Ensure that the number of VFs allocated is non-zero and + * is not greater than the number of supported VFs defined in + * the functional capabilities of the PF. + */ + if (!vf_count || vf_count > hw->func_caps.num_allocd_vfs) + return ICE_ERR_INVAL_SIZE; + + snap->mbx_vf.vf_cntr = devm_kcalloc(ice_hw_to_dev(hw), vf_count, + sizeof(*snap->mbx_vf.vf_cntr), + GFP_KERNEL); + if (!snap->mbx_vf.vf_cntr) + return ICE_ERR_NO_MEMORY; + + /* Setting the VF counter length to the number of allocated + * VFs for given PF's functional capabilities. + */ + snap->mbx_vf.vfcntr_len = vf_count; + + /* Clear mbx_buf in the mailbox snaphot structure and setting the + * mailbox snapshot state to a new capture. + */ + memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); + snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT; + + return 0; +} + +/** + * ice_mbx_deinit_snapshot - Free mailbox snapshot structure + * @hw: pointer to the hardware structure + * + * Clear the mailbox snapshot structure and free the VF counter array. + */ +void ice_mbx_deinit_snapshot(struct ice_hw *hw) +{ + struct ice_mbx_snapshot *snap = &hw->mbx_snapshot; + + /* Free VF counter array and reset VF counter length */ + devm_kfree(ice_hw_to_dev(hw), snap->mbx_vf.vf_cntr); + snap->mbx_vf.vfcntr_len = 0; + + /* Clear mbx_buf in the mailbox snaphot structure */ + memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf)); +} diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h index 3d78a0795138..161dc55d9e9c 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.h +++ b/drivers/net/ethernet/intel/ice/ice_sriov.h @@ -4,7 +4,14 @@ #ifndef _ICE_SRIOV_H_ #define _ICE_SRIOV_H_ -#include "ice_common.h" +#include "ice_type.h" +#include "ice_controlq.h" + +/* Defining the mailbox message threshold as 63 asynchronous + * pending messages. Normal VF functionality does not require + * sending more than 63 asynchronous pending message. + */ +#define ICE_ASYNC_VF_MSG_THRESHOLD 63 #ifdef CONFIG_PCI_IOV enum ice_status @@ -12,6 +19,17 @@ ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen, struct ice_sq_cd *cd); u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed); +enum ice_status +ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data, + u16 vf_id, bool *is_mal_vf); +enum ice_status +ice_mbx_clear_malvf(struct ice_mbx_snapshot *snap, unsigned long *all_malvfs, + u16 bitmap_len, u16 vf_id); +enum ice_status ice_mbx_init_snapshot(struct ice_hw *hw, u16 vf_count); +void ice_mbx_deinit_snapshot(struct ice_hw *hw); +enum ice_status +ice_mbx_report_malvf(struct ice_hw *hw, unsigned long *all_malvfs, + u16 bitmap_len, u16 vf_id, bool *report_malvf); #else /* CONFIG_PCI_IOV */ static inline enum ice_status ice_aq_send_msg_to_vf(struct ice_hw __always_unused *hw, diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 67c965a3f5d2..357d3073d814 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -920,7 +920,7 @@ ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi, struct ice_vsi_list_map_info *v_map; int i; - v_map = devm_kcalloc(ice_hw_to_dev(hw), 1, sizeof(*v_map), GFP_KERNEL); + v_map = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*v_map), GFP_KERNEL); if (!v_map) return NULL; @@ -1238,6 +1238,9 @@ ice_add_update_vsi_list(struct ice_hw *hw, ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2, vsi_list_id); + if (!m_entry->vsi_list_info) + return ICE_ERR_NO_MEMORY; + /* If this entry was large action then the large action needs * to be updated to point to FWD to VSI list */ @@ -2220,6 +2223,7 @@ ice_vsi_uses_fltr(struct ice_fltr_mgmt_list_entry *fm_entry, u16 vsi_handle) return ((fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI && fm_entry->fltr_info.vsi_handle == vsi_handle) || (fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST && + fm_entry->vsi_list_info && (test_bit(vsi_handle, fm_entry->vsi_list_info->vsi_map)))); } @@ -2292,14 +2296,12 @@ ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle, return ICE_ERR_PARAM; list_for_each_entry(fm_entry, lkup_list_head, list_entry) { - struct ice_fltr_info *fi; - - fi = &fm_entry->fltr_info; - if (!fi || !ice_vsi_uses_fltr(fm_entry, vsi_handle)) + if (!ice_vsi_uses_fltr(fm_entry, vsi_handle)) continue; status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle, - vsi_list_head, fi); + vsi_list_head, + &fm_entry->fltr_info); if (status) return status; } @@ -2622,7 +2624,7 @@ ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_handle, &remove_list_head); mutex_unlock(rule_lock); if (status) - return; + goto free_fltr_list; switch (lkup) { case ICE_SW_LKUP_MAC: @@ -2645,6 +2647,7 @@ ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_handle, break; } +free_fltr_list: list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) { list_del(&fm_entry->list_entry); devm_kfree(ice_hw_to_dev(hw), fm_entry); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index b7dc25da1202..e2b4b29ea207 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -309,7 +309,7 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) smp_mb(); if (__netif_subqueue_stopped(tx_ring->netdev, tx_ring->q_index) && - !test_bit(__ICE_DOWN, vsi->state)) { + !test_bit(ICE_VSI_DOWN, vsi->state)) { netif_wake_subqueue(tx_ring->netdev, tx_ring->q_index); ++tx_ring->tx_stats.restart_q; @@ -444,22 +444,6 @@ void ice_free_rx_ring(struct ice_ring *rx_ring) } /** - * ice_rx_offset - Return expected offset into page to access data - * @rx_ring: Ring we are requesting offset of - * - * Returns the offset value for ring into the data buffer. - */ -static unsigned int ice_rx_offset(struct ice_ring *rx_ring) -{ - if (ice_ring_uses_build_skb(rx_ring)) - return ICE_SKB_PAD; - else if (ice_is_xdp_ena_vsi(rx_ring->vsi)) - return XDP_PACKET_HEADROOM; - - return 0; -} - -/** * ice_setup_rx_ring - Allocate the Rx descriptors * @rx_ring: the Rx ring to set up * @@ -493,7 +477,6 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring) rx_ring->next_to_use = 0; rx_ring->next_to_clean = 0; - rx_ring->rx_offset = ice_rx_offset(rx_ring); if (ice_is_xdp_ena_vsi(rx_ring->vsi)) WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog); @@ -571,8 +554,8 @@ ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp, * @frames: XDP frames to be transmitted * @flags: transmit flags * - * Returns number of frames successfully sent. Frames that fail are - * free'ed via XDP return API. + * Returns number of frames successfully sent. Failed frames + * will be free'ed by XDP core. * For error cases, a negative errno code is returned and no-frames * are transmitted (caller must handle freeing frames). */ @@ -584,9 +567,9 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, unsigned int queue_index = smp_processor_id(); struct ice_vsi *vsi = np->vsi; struct ice_ring *xdp_ring; - int drops = 0, i; + int nxmit = 0, i; - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state)) return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq) @@ -601,16 +584,15 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, int err; err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring); - if (err != ICE_XDP_TX) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err != ICE_XDP_TX) + break; + nxmit++; } if (unlikely(flags & XDP_XMIT_FLUSH)) ice_xdp_ring_update_tail(xdp_ring); - return n - drops; + return nxmit; } /** @@ -1115,6 +1097,11 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) dma_rmb(); if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) { + struct ice_vsi *ctrl_vsi = rx_ring->vsi; + + if (rx_desc->wb.rxdid == FDIR_DESC_RXDID && + ctrl_vsi->vf_id != ICE_INVAL_VFID) + ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc); ice_put_rx_buf(rx_ring, NULL, 0); cleaned_count++; continue; @@ -1236,216 +1223,50 @@ construct_skb: } /** - * ice_adjust_itr_by_size_and_speed - Adjust ITR based on current traffic - * @port_info: port_info structure containing the current link speed - * @avg_pkt_size: average size of Tx or Rx packets based on clean routine - * @itr: ITR value to update - * - * Calculate how big of an increment should be applied to the ITR value passed - * in based on wmem_default, SKB overhead, ethernet overhead, and the current - * link speed. - * - * The following is a calculation derived from: - * wmem_default / (size + overhead) = desired_pkts_per_int - * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate - * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value + * ice_net_dim - Update net DIM algorithm + * @q_vector: the vector associated with the interrupt * - * Assuming wmem_default is 212992 and overhead is 640 bytes per - * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the - * formula down to: + * Create a DIM sample and notify net_dim() so that it can possibly decide + * a new ITR value based on incoming packets, bytes, and interrupts. * - * wmem_default * bits_per_byte * usecs_per_sec pkt_size + 24 - * ITR = -------------------------------------------- * -------------- - * rate pkt_size + 640 + * This function is a no-op if the ring is not configured to dynamic ITR. */ -static unsigned int -ice_adjust_itr_by_size_and_speed(struct ice_port_info *port_info, - unsigned int avg_pkt_size, - unsigned int itr) +static void ice_net_dim(struct ice_q_vector *q_vector) { - switch (port_info->phy.link_info.link_speed) { - case ICE_AQ_LINK_SPEED_100GB: - itr += DIV_ROUND_UP(17 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_50GB: - itr += DIV_ROUND_UP(34 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_40GB: - itr += DIV_ROUND_UP(43 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_25GB: - itr += DIV_ROUND_UP(68 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_20GB: - itr += DIV_ROUND_UP(85 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - case ICE_AQ_LINK_SPEED_10GB: - default: - itr += DIV_ROUND_UP(170 * (avg_pkt_size + 24), - avg_pkt_size + 640); - break; - } - - if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { - itr &= ICE_ITR_ADAPTIVE_LATENCY; - itr += ICE_ITR_ADAPTIVE_MAX_USECS; - } + struct ice_ring_container *tx = &q_vector->tx; + struct ice_ring_container *rx = &q_vector->rx; - return itr; -} + if (ITR_IS_DYNAMIC(tx)) { + struct dim_sample dim_sample = {}; + u64 packets = 0, bytes = 0; + struct ice_ring *ring; -/** - * ice_update_itr - update the adaptive ITR value based on statistics - * @q_vector: structure containing interrupt and ring information - * @rc: structure containing ring performance data - * - * Stores a new ITR value based on packets and byte - * counts during the last interrupt. The advantage of per interrupt - * computation is faster updates and more accurate ITR for the current - * traffic pattern. Constants in this function were computed - * based on theoretical maximum wire speed and thresholds were set based - * on testing data as well as attempting to minimize response time - * while increasing bulk throughput. - */ -static void -ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc) -{ - unsigned long next_update = jiffies; - unsigned int packets, bytes, itr; - bool container_is_rx; + ice_for_each_ring(ring, q_vector->tx) { + packets += ring->stats.pkts; + bytes += ring->stats.bytes; + } - if (!rc->ring || !ITR_IS_DYNAMIC(rc->itr_setting)) - return; + dim_update_sample(q_vector->total_events, packets, bytes, + &dim_sample); - /* If itr_countdown is set it means we programmed an ITR within - * the last 4 interrupt cycles. This has a side effect of us - * potentially firing an early interrupt. In order to work around - * this we need to throw out any data received for a few - * interrupts following the update. - */ - if (q_vector->itr_countdown) { - itr = rc->target_itr; - goto clear_counts; + net_dim(&tx->dim, dim_sample); } - container_is_rx = (&q_vector->rx == rc); - /* For Rx we want to push the delay up and default to low latency. - * for Tx we want to pull the delay down and default to high latency. - */ - itr = container_is_rx ? - ICE_ITR_ADAPTIVE_MIN_USECS | ICE_ITR_ADAPTIVE_LATENCY : - ICE_ITR_ADAPTIVE_MAX_USECS | ICE_ITR_ADAPTIVE_LATENCY; - - /* If we didn't update within up to 1 - 2 jiffies we can assume - * that either packets are coming in so slow there hasn't been - * any work, or that there is so much work that NAPI is dealing - * with interrupt moderation and we don't need to do anything. - */ - if (time_after(next_update, rc->next_update)) - goto clear_counts; + if (ITR_IS_DYNAMIC(rx)) { + struct dim_sample dim_sample = {}; + u64 packets = 0, bytes = 0; + struct ice_ring *ring; - prefetch(q_vector->vsi->port_info); - - packets = rc->total_pkts; - bytes = rc->total_bytes; - - if (container_is_rx) { - /* If Rx there are 1 to 4 packets and bytes are less than - * 9000 assume insufficient data to use bulk rate limiting - * approach unless Tx is already in bulk rate limiting. We - * are likely latency driven. - */ - if (packets && packets < 4 && bytes < 9000 && - (q_vector->tx.target_itr & ICE_ITR_ADAPTIVE_LATENCY)) { - itr = ICE_ITR_ADAPTIVE_LATENCY; - goto adjust_by_size_and_speed; - } - } else if (packets < 4) { - /* If we have Tx and Rx ITR maxed and Tx ITR is running in - * bulk mode and we are receiving 4 or fewer packets just - * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so - * that the Rx can relax. - */ - if (rc->target_itr == ICE_ITR_ADAPTIVE_MAX_USECS && - (q_vector->rx.target_itr & ICE_ITR_MASK) == - ICE_ITR_ADAPTIVE_MAX_USECS) - goto clear_counts; - } else if (packets > 32) { - /* If we have processed over 32 packets in a single interrupt - * for Tx assume we need to switch over to "bulk" mode. - */ - rc->target_itr &= ~ICE_ITR_ADAPTIVE_LATENCY; - } - - /* We have no packets to actually measure against. This means - * either one of the other queues on this vector is active or - * we are a Tx queue doing TSO with too high of an interrupt rate. - * - * Between 4 and 56 we can assume that our current interrupt delay - * is only slightly too low. As such we should increase it by a small - * fixed amount. - */ - if (packets < 56) { - itr = rc->target_itr + ICE_ITR_ADAPTIVE_MIN_INC; - if ((itr & ICE_ITR_MASK) > ICE_ITR_ADAPTIVE_MAX_USECS) { - itr &= ICE_ITR_ADAPTIVE_LATENCY; - itr += ICE_ITR_ADAPTIVE_MAX_USECS; + ice_for_each_ring(ring, q_vector->rx) { + packets += ring->stats.pkts; + bytes += ring->stats.bytes; } - goto clear_counts; - } - if (packets <= 256) { - itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); - itr &= ICE_ITR_MASK; + dim_update_sample(q_vector->total_events, packets, bytes, + &dim_sample); - /* Between 56 and 112 is our "goldilocks" zone where we are - * working out "just right". Just report that our current - * ITR is good for us. - */ - if (packets <= 112) - goto clear_counts; - - /* If packet count is 128 or greater we are likely looking - * at a slight overrun of the delay we want. Try halving - * our delay to see if that will cut the number of packets - * in half per interrupt. - */ - itr >>= 1; - itr &= ICE_ITR_MASK; - if (itr < ICE_ITR_ADAPTIVE_MIN_USECS) - itr = ICE_ITR_ADAPTIVE_MIN_USECS; - - goto clear_counts; + net_dim(&rx->dim, dim_sample); } - - /* The paths below assume we are dealing with a bulk ITR since - * number of packets is greater than 256. We are just going to have - * to compute a value and try to bring the count under control, - * though for smaller packet sizes there isn't much we can do as - * NAPI polling will likely be kicking in sooner rather than later. - */ - itr = ICE_ITR_ADAPTIVE_BULK; - -adjust_by_size_and_speed: - - /* based on checks above packets cannot be 0 so division is safe */ - itr = ice_adjust_itr_by_size_and_speed(q_vector->vsi->port_info, - bytes / packets, itr); - -clear_counts: - /* write back value */ - rc->target_itr = itr; - - /* next update should occur within next jiffy */ - rc->next_update = next_update + 1; - - rc->total_bytes = 0; - rc->total_pkts = 0; } /** @@ -1469,72 +1290,46 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) (itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S)); } -/* The act of updating the ITR will cause it to immediately trigger. In order - * to prevent this from throwing off adaptive update statistics we defer the - * update so that it can only happen so often. So after either Tx or Rx are - * updated we make the adaptive scheme wait until either the ITR completely - * expires via the next_update expiration or we have been through at least - * 3 interrupts. - */ -#define ITR_COUNTDOWN_START 3 - /** - * ice_update_ena_itr - Update ITR and re-enable MSIX interrupt - * @q_vector: q_vector for which ITR is being updated and interrupt enabled + * ice_update_ena_itr - Update ITR moderation and re-enable MSI-X interrupt + * @q_vector: the vector associated with the interrupt to enable + * + * Update the net_dim() algorithm and re-enable the interrupt associated with + * this vector. + * + * If the VSI is down, the interrupt will not be re-enabled. */ static void ice_update_ena_itr(struct ice_q_vector *q_vector) { - struct ice_ring_container *tx = &q_vector->tx; - struct ice_ring_container *rx = &q_vector->rx; struct ice_vsi *vsi = q_vector->vsi; + bool wb_en = q_vector->wb_on_itr; u32 itr_val; - /* when exiting WB_ON_ITR just reset the countdown and let ITR - * resume it's normal "interrupts-enabled" path - */ - if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) - q_vector->itr_countdown = 0; - - /* This will do nothing if dynamic updates are not enabled */ - ice_update_itr(q_vector, tx); - ice_update_itr(q_vector, rx); + if (test_bit(ICE_DOWN, vsi->state)) + return; - /* This block of logic allows us to get away with only updating - * one ITR value with each interrupt. The idea is to perform a - * pseudo-lazy update with the following criteria. - * - * 1. Rx is given higher priority than Tx if both are in same state - * 2. If we must reduce an ITR that is given highest priority. - * 3. We then give priority to increasing ITR based on amount. + /* When exiting WB_ON_ITR, let ITR resume its normal + * interrupts-enabled path. */ - if (rx->target_itr < rx->current_itr) { - /* Rx ITR needs to be reduced, this is highest priority */ - itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); - rx->current_itr = rx->target_itr; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - } else if ((tx->target_itr < tx->current_itr) || - ((rx->target_itr - rx->current_itr) < - (tx->target_itr - tx->current_itr))) { - /* Tx ITR needs to be reduced, this is second priority - * Tx ITR needs to be increased more than Rx, fourth priority - */ - itr_val = ice_buildreg_itr(tx->itr_idx, tx->target_itr); - tx->current_itr = tx->target_itr; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - } else if (rx->current_itr != rx->target_itr) { - /* Rx ITR needs to be increased, third priority */ - itr_val = ice_buildreg_itr(rx->itr_idx, rx->target_itr); - rx->current_itr = rx->target_itr; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - } else { - /* Still have to re-enable the interrupts */ - itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); - if (q_vector->itr_countdown) - q_vector->itr_countdown--; + if (wb_en) + q_vector->wb_on_itr = false; + + /* This will do nothing if dynamic updates are not enabled. */ + ice_net_dim(q_vector); + + /* net_dim() updates ITR out-of-band using a work item */ + itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); + /* trigger an immediate software interrupt when exiting + * busy poll, to make sure to catch any pending cleanups + * that might have been missed due to interrupt state + * transition. + */ + if (wb_en) { + itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M | + GLINT_DYN_CTL_SW_ITR_INDX_M | + GLINT_DYN_CTL_SW_ITR_INDX_ENA_M; } - - if (!test_bit(__ICE_DOWN, vsi->state)) - wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); } /** @@ -1556,7 +1351,7 @@ static void ice_set_wb_on_itr(struct ice_q_vector *q_vector) struct ice_vsi *vsi = q_vector->vsi; /* already in wb_on_itr mode no need to change it */ - if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) + if (q_vector->wb_on_itr) return; /* use previously set ITR values for all of the ITR indices by @@ -1568,7 +1363,7 @@ static void ice_set_wb_on_itr(struct ice_q_vector *q_vector) GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M | GLINT_DYN_CTL_WB_ON_ITR_M); - q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE; + q_vector->wb_on_itr = true; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 5dab77504fa5..c5a92ac787d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -192,7 +192,11 @@ struct ice_rxq_stats { u64 non_eop_descs; u64 alloc_page_failed; u64 alloc_buf_failed; - u64 gro_dropped; /* GRO returned dropped */ +}; + +enum ice_ring_state_t { + ICE_TX_XPS_INIT_DONE, + ICE_TX_NBITS, }; /* this enum matches hardware bits and is meant to be used by DYN_CTLN @@ -219,23 +223,20 @@ enum ice_rx_dtype { #define ICE_TX_ITR ICE_IDX_ITR1 #define ICE_ITR_8K 124 #define ICE_ITR_20K 50 -#define ICE_ITR_MAX 8160 -#define ICE_DFLT_TX_ITR (ICE_ITR_20K | ICE_ITR_DYNAMIC) -#define ICE_DFLT_RX_ITR (ICE_ITR_20K | ICE_ITR_DYNAMIC) -#define ICE_ITR_DYNAMIC 0x8000 /* used as flag for itr_setting */ -#define ITR_IS_DYNAMIC(setting) (!!((setting) & ICE_ITR_DYNAMIC)) -#define ITR_TO_REG(setting) ((setting) & ~ICE_ITR_DYNAMIC) +#define ICE_ITR_MAX 8160 /* 0x1FE0 */ +#define ICE_DFLT_TX_ITR ICE_ITR_20K +#define ICE_DFLT_RX_ITR ICE_ITR_20K +enum ice_dynamic_itr { + ITR_STATIC = 0, + ITR_DYNAMIC = 1 +}; + +#define ITR_IS_DYNAMIC(rc) ((rc)->itr_mode == ITR_DYNAMIC) #define ICE_ITR_GRAN_S 1 /* ITR granularity is always 2us */ #define ICE_ITR_GRAN_US BIT(ICE_ITR_GRAN_S) #define ICE_ITR_MASK 0x1FFE /* ITR register value alignment mask */ #define ITR_REG_ALIGN(setting) ((setting) & ICE_ITR_MASK) -#define ICE_ITR_ADAPTIVE_MIN_INC 0x0002 -#define ICE_ITR_ADAPTIVE_MIN_USECS 0x0002 -#define ICE_ITR_ADAPTIVE_MAX_USECS 0x00FA -#define ICE_ITR_ADAPTIVE_LATENCY 0x8000 -#define ICE_ITR_ADAPTIVE_BULK 0x0000 - #define ICE_DFLT_INTRL 0 #define ICE_MAX_INTRL 236 @@ -292,6 +293,7 @@ struct ice_ring { }; struct rcu_head rcu; /* to avoid race on free */ + DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ struct bpf_prog *xdp_prog; struct xsk_buff_pool *xsk_pool; u16 rx_offset; @@ -334,23 +336,22 @@ static inline bool ice_ring_is_xdp(struct ice_ring *ring) struct ice_ring_container { /* head of linked-list of rings */ struct ice_ring *ring; - unsigned long next_update; /* jiffies value of next queue update */ - unsigned int total_bytes; /* total bytes processed this int */ - unsigned int total_pkts; /* total packets processed this int */ + struct dim dim; /* data for net_dim algorithm */ u16 itr_idx; /* index in the interrupt vector */ - u16 target_itr; /* value in usecs divided by the hw->itr_gran */ - u16 current_itr; /* value in usecs divided by the hw->itr_gran */ - /* high bit set means dynamic ITR, rest is used to store user - * readable ITR value in usecs and must be converted before programming - * to a register. + /* this matches the maximum number of ITR bits, but in usec + * values, so it is shifted left one bit (bit zero is ignored) */ - u16 itr_setting; + u16 itr_setting:13; + u16 itr_reserved:2; + u16 itr_mode:1; }; struct ice_coalesce_stored { u16 itr_tx; u16 itr_rx; u8 intrl; + u8 tx_valid; + u8 rx_valid; }; /* iterator for handling rings in ring container */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 02b12736ea80..207f6ee3a7f6 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -143,6 +143,7 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb, case ICE_RX_PTYPE_INNER_PROT_UDP: case ICE_RX_PTYPE_INNER_PROT_SCTP: skb->ip_summed = CHECKSUM_UNNECESSARY; + break; default: break; } diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index a6cb0c35748c..4474dd6a7ba1 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -192,6 +192,24 @@ enum ice_fltr_ptype { ICE_FLTR_PTYPE_NONF_IPV4_TCP, ICE_FLTR_PTYPE_NONF_IPV4_SCTP, ICE_FLTR_PTYPE_NONF_IPV4_OTHER, + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP, + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP, + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP, + ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER, + ICE_FLTR_PTYPE_NONF_IPV6_GTPU_IPV6_OTHER, + ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3, + ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3, + ICE_FLTR_PTYPE_NONF_IPV4_ESP, + ICE_FLTR_PTYPE_NONF_IPV6_ESP, + ICE_FLTR_PTYPE_NONF_IPV4_AH, + ICE_FLTR_PTYPE_NONF_IPV6_AH, + ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP, + ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP, + ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE, + ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION, + ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE, + ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION, + ICE_FLTR_PTYPE_NON_IP_L2, ICE_FLTR_PTYPE_FRAG_IPV4, ICE_FLTR_PTYPE_NONF_IPV6_UDP, ICE_FLTR_PTYPE_NONF_IPV6_TCP, @@ -533,9 +551,7 @@ struct ice_dcb_app_priority_table { #define ICE_TLV_STATUS_OPER 0x1 #define ICE_TLV_STATUS_SYNC 0x2 #define ICE_TLV_STATUS_ERR 0x4 -#define ICE_APP_PROT_ID_FCOE 0x8906 -#define ICE_APP_PROT_ID_ISCSI 0x0cbc -#define ICE_APP_PROT_ID_FIP 0x8914 +#define ICE_APP_PROT_ID_ISCSI_860 0x035c #define ICE_APP_SEL_ETHTYPE 0x1 #define ICE_APP_SEL_TCPIP 0x2 #define ICE_CEE_APP_SEL_ETHTYPE 0x0 @@ -614,6 +630,80 @@ struct ice_fw_log_cfg { struct ice_fw_log_evnt evnts[ICE_AQC_FW_LOG_ID_MAX]; }; +/* Enum defining the different states of the mailbox snapshot in the + * PF-VF mailbox overflow detection algorithm. The snapshot can be in + * states: + * 1. ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT - generate a new static snapshot + * within the mailbox buffer. + * 2. ICE_MAL_VF_DETECT_STATE_TRAVERSE - iterate through the mailbox snaphot + * 3. ICE_MAL_VF_DETECT_STATE_DETECT - track the messages sent per VF via the + * mailbox and mark any VFs sending more messages than the threshold limit set. + * 4. ICE_MAL_VF_DETECT_STATE_INVALID - Invalid mailbox state set to 0xFFFFFFFF. + */ +enum ice_mbx_snapshot_state { + ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT = 0, + ICE_MAL_VF_DETECT_STATE_TRAVERSE, + ICE_MAL_VF_DETECT_STATE_DETECT, + ICE_MAL_VF_DETECT_STATE_INVALID = 0xFFFFFFFF, +}; + +/* Structure to hold information of the static snapshot and the mailbox + * buffer data used to generate and track the snapshot. + * 1. state: the state of the mailbox snapshot in the malicious VF + * detection state handler ice_mbx_vf_state_handler() + * 2. head: head of the mailbox snapshot in a circular mailbox buffer + * 3. tail: tail of the mailbox snapshot in a circular mailbox buffer + * 4. num_iterations: number of messages traversed in circular mailbox buffer + * 5. num_msg_proc: number of messages processed in mailbox + * 6. num_pending_arq: number of pending asynchronous messages + * 7. max_num_msgs_mbx: maximum messages in mailbox for currently + * serviced work item or interrupt. + */ +struct ice_mbx_snap_buffer_data { + enum ice_mbx_snapshot_state state; + u32 head; + u32 tail; + u32 num_iterations; + u16 num_msg_proc; + u16 num_pending_arq; + u16 max_num_msgs_mbx; +}; + +/* Structure to track messages sent by VFs on mailbox: + * 1. vf_cntr: a counter array of VFs to track the number of + * asynchronous messages sent by each VF + * 2. vfcntr_len: number of entries in VF counter array + */ +struct ice_mbx_vf_counter { + u32 *vf_cntr; + u32 vfcntr_len; +}; + +/* Structure to hold data relevant to the captured static snapshot + * of the PF-VF mailbox. + */ +struct ice_mbx_snapshot { + struct ice_mbx_snap_buffer_data mbx_buf; + struct ice_mbx_vf_counter mbx_vf; +}; + +/* Structure to hold data to be used for capturing or updating a + * static snapshot. + * 1. num_msg_proc: number of messages processed in mailbox + * 2. num_pending_arq: number of pending asynchronous messages + * 3. max_num_msgs_mbx: maximum messages in mailbox for currently + * serviced work item or interrupt. + * 4. async_watermark_val: An upper threshold set by caller to determine + * if the pending arq count is large enough to assume that there is + * the possibility of a mailicious VF. + */ +struct ice_mbx_data { + u16 num_msg_proc; + u16 num_pending_arq; + u16 max_num_msgs_mbx; + u16 async_watermark_val; +}; + /* Port hardware description */ struct ice_hw { u8 __iomem *hw_addr; @@ -702,13 +792,13 @@ struct ice_hw { enum ice_aq_err pkg_dwnld_status; - /* Driver's package ver - (from the Metadata seg) */ + /* Driver's package ver - (from the Ice Metadata section) */ struct ice_pkg_ver pkg_ver; u8 pkg_name[ICE_PKG_NAME_SIZE]; - /* Driver's Ice package version (from the Ice seg) */ - struct ice_pkg_ver ice_pkg_ver; - u8 ice_pkg_name[ICE_PKG_NAME_SIZE]; + /* Driver's Ice segment format version and ID (from the Ice seg) */ + struct ice_pkg_ver ice_seg_fmt_ver; + u8 ice_seg_id[ICE_SEG_ID_SIZE]; /* Pointer to the ice segment */ struct ice_seg *seg; @@ -745,6 +835,7 @@ struct ice_hw { DECLARE_BITMAP(fdir_perfect_fltr, ICE_FLTR_PTYPE_MAX); struct mutex rss_locks; /* protect RSS configuration */ struct list_head rss_list_head; + struct ice_mbx_snapshot mbx_snapshot; }; /* Statistics collected by each port, VSI, VEB, and S-channel */ @@ -809,6 +900,14 @@ struct ice_hw_port_stats { u64 fd_sb_match; }; +struct ice_aq_get_set_rss_lut_params { + u16 vsi_handle; /* software VSI handle */ + u16 lut_size; /* size of the LUT buffer */ + u8 lut_type; /* type of the LUT (i.e. VSI, PF, Global) */ + u8 *lut; /* input RSS LUT for set and output RSS LUT for get */ + u8 global_lut_id; /* only valid when lut_type is global */ +}; + /* Checksum and Shadow RAM pointers */ #define ICE_SR_NVM_CTRL_WORD 0x00 #define ICE_SR_BOOT_CFG_PTR 0x132 @@ -915,4 +1014,9 @@ struct ice_hw_port_stats { #define ICE_FW_API_LLDP_FLTR_MIN 7 #define ICE_FW_API_LLDP_FLTR_PATCH 1 +/* AQ API version for report default configuration */ +#define ICE_FW_API_REPORT_DFLT_CFG_MAJ 1 +#define ICE_FW_API_REPORT_DFLT_CFG_MIN 7 +#define ICE_FW_API_REPORT_DFLT_CFG_PATCH 3 + #endif /* _ICE_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c new file mode 100644 index 000000000000..9feebe5f556c --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021, Intel Corporation. */ + +#include "ice_virtchnl_allowlist.h" + +/* Purpose of this file is to share functionality to allowlist or denylist + * opcodes used in PF <-> VF communication. Group of opcodes: + * - default -> should be always allowed after creating VF, + * default_allowlist_opcodes + * - opcodes needed by VF to work correctly, but not associated with caps -> + * should be allowed after successful VF resources allocation, + * working_allowlist_opcodes + * - opcodes needed by VF when caps are activated + * + * Caps that don't use new opcodes (no opcodes should be allowed): + * - VIRTCHNL_VF_OFFLOAD_RSS_AQ + * - VIRTCHNL_VF_OFFLOAD_RSS_REG + * - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR + * - VIRTCHNL_VF_OFFLOAD_CRC + * - VIRTCHNL_VF_OFFLOAD_RX_POLLING + * - VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 + * - VIRTCHNL_VF_OFFLOAD_ENCAP + * - VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM + * - VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM + * - VIRTCHNL_VF_OFFLOAD_USO + */ + +/* default opcodes to communicate with VF */ +static const u32 default_allowlist_opcodes[] = { + VIRTCHNL_OP_GET_VF_RESOURCES, VIRTCHNL_OP_VERSION, VIRTCHNL_OP_RESET_VF, +}; + +/* opcodes supported after successful VIRTCHNL_OP_GET_VF_RESOURCES */ +static const u32 working_allowlist_opcodes[] = { + VIRTCHNL_OP_CONFIG_TX_QUEUE, VIRTCHNL_OP_CONFIG_RX_QUEUE, + VIRTCHNL_OP_CONFIG_VSI_QUEUES, VIRTCHNL_OP_CONFIG_IRQ_MAP, + VIRTCHNL_OP_ENABLE_QUEUES, VIRTCHNL_OP_DISABLE_QUEUES, + VIRTCHNL_OP_GET_STATS, VIRTCHNL_OP_EVENT, +}; + +/* VIRTCHNL_VF_OFFLOAD_L2 */ +static const u32 l2_allowlist_opcodes[] = { + VIRTCHNL_OP_ADD_ETH_ADDR, VIRTCHNL_OP_DEL_ETH_ADDR, + VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, +}; + +/* VIRTCHNL_VF_OFFLOAD_REQ_QUEUES */ +static const u32 req_queues_allowlist_opcodes[] = { + VIRTCHNL_OP_REQUEST_QUEUES, +}; + +/* VIRTCHNL_VF_OFFLOAD_VLAN */ +static const u32 vlan_allowlist_opcodes[] = { + VIRTCHNL_OP_ADD_VLAN, VIRTCHNL_OP_DEL_VLAN, + VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING, +}; + +/* VIRTCHNL_VF_OFFLOAD_RSS_PF */ +static const u32 rss_pf_allowlist_opcodes[] = { + VIRTCHNL_OP_CONFIG_RSS_KEY, VIRTCHNL_OP_CONFIG_RSS_LUT, + VIRTCHNL_OP_GET_RSS_HENA_CAPS, VIRTCHNL_OP_SET_RSS_HENA, +}; + +/* VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF */ +static const u32 adv_rss_pf_allowlist_opcodes[] = { + VIRTCHNL_OP_ADD_RSS_CFG, VIRTCHNL_OP_DEL_RSS_CFG, +}; + +/* VIRTCHNL_VF_OFFLOAD_FDIR_PF */ +static const u32 fdir_pf_allowlist_opcodes[] = { + VIRTCHNL_OP_ADD_FDIR_FILTER, VIRTCHNL_OP_DEL_FDIR_FILTER, +}; + +struct allowlist_opcode_info { + const u32 *opcodes; + size_t size; +}; + +#define BIT_INDEX(caps) (HWEIGHT((caps) - 1)) +#define ALLOW_ITEM(caps, list) \ + [BIT_INDEX(caps)] = { \ + .opcodes = list, \ + .size = ARRAY_SIZE(list) \ + } +static const struct allowlist_opcode_info allowlist_opcodes[] = { + ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_L2, l2_allowlist_opcodes), + ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_REQ_QUEUES, req_queues_allowlist_opcodes), + ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN, vlan_allowlist_opcodes), + ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_RSS_PF, rss_pf_allowlist_opcodes), + ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF, adv_rss_pf_allowlist_opcodes), + ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes), +}; + +/** + * ice_vc_is_opcode_allowed - check if this opcode is allowed on this VF + * @vf: pointer to VF structure + * @opcode: virtchnl opcode + * + * Return true if message is allowed on this VF + */ +bool ice_vc_is_opcode_allowed(struct ice_vf *vf, u32 opcode) +{ + if (opcode >= VIRTCHNL_OP_MAX) + return false; + + return test_bit(opcode, vf->opcodes_allowlist); +} + +/** + * ice_vc_allowlist_opcodes - allowlist selected opcodes + * @vf: pointer to VF structure + * @opcodes: array of opocodes to allowlist + * @size: size of opcodes array + * + * Function should be called to allowlist opcodes on VF. + */ +static void +ice_vc_allowlist_opcodes(struct ice_vf *vf, const u32 *opcodes, size_t size) +{ + unsigned int i; + + for (i = 0; i < size; i++) + set_bit(opcodes[i], vf->opcodes_allowlist); +} + +/** + * ice_vc_clear_allowlist - clear all allowlist opcodes + * @vf: pointer to VF structure + */ +static void ice_vc_clear_allowlist(struct ice_vf *vf) +{ + bitmap_zero(vf->opcodes_allowlist, VIRTCHNL_OP_MAX); +} + +/** + * ice_vc_set_default_allowlist - allowlist default opcodes for VF + * @vf: pointer to VF structure + */ +void ice_vc_set_default_allowlist(struct ice_vf *vf) +{ + ice_vc_clear_allowlist(vf); + ice_vc_allowlist_opcodes(vf, default_allowlist_opcodes, + ARRAY_SIZE(default_allowlist_opcodes)); +} + +/** + * ice_vc_set_working_allowlist - allowlist opcodes needed to by VF to work + * @vf: pointer to VF structure + * + * allowlist opcodes that aren't associated with specific caps, but + * are needed by VF to work. + */ +void ice_vc_set_working_allowlist(struct ice_vf *vf) +{ + ice_vc_allowlist_opcodes(vf, working_allowlist_opcodes, + ARRAY_SIZE(working_allowlist_opcodes)); +} + +/** + * ice_vc_set_caps_allowlist - allowlist VF opcodes according caps + * @vf: pointer to VF structure + */ +void ice_vc_set_caps_allowlist(struct ice_vf *vf) +{ + unsigned long caps = vf->driver_caps; + unsigned int i; + + for_each_set_bit(i, &caps, ARRAY_SIZE(allowlist_opcodes)) + ice_vc_allowlist_opcodes(vf, allowlist_opcodes[i].opcodes, + allowlist_opcodes[i].size); +} diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h new file mode 100644 index 000000000000..d3ae86ded219 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021, Intel Corporation. */ + +#ifndef _ICE_VIRTCHNL_ALLOWLIST_H_ +#define _ICE_VIRTCHNL_ALLOWLIST_H_ +#include "ice.h" + +bool ice_vc_is_opcode_allowed(struct ice_vf *vf, u32 opcode); + +void ice_vc_set_default_allowlist(struct ice_vf *vf); +void ice_vc_set_working_allowlist(struct ice_vf *vf); +void ice_vc_set_caps_allowlist(struct ice_vf *vf); +#endif /* _ICE_VIRTCHNL_ALLOWLIST_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c new file mode 100644 index 000000000000..eee180d8c024 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -0,0 +1,2204 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021, Intel Corporation. */ + +#include "ice.h" +#include "ice_base.h" +#include "ice_lib.h" +#include "ice_flow.h" + +#define to_fltr_conf_from_desc(p) \ + container_of(p, struct virtchnl_fdir_fltr_conf, input) + +#define ICE_FLOW_PROF_TYPE_S 0 +#define ICE_FLOW_PROF_TYPE_M (0xFFFFFFFFULL << ICE_FLOW_PROF_TYPE_S) +#define ICE_FLOW_PROF_VSI_S 32 +#define ICE_FLOW_PROF_VSI_M (0xFFFFFFFFULL << ICE_FLOW_PROF_VSI_S) + +/* Flow profile ID format: + * [0:31] - flow type, flow + tun_offs + * [32:63] - VSI index + */ +#define ICE_FLOW_PROF_FD(vsi, flow, tun_offs) \ + ((u64)(((((flow) + (tun_offs)) & ICE_FLOW_PROF_TYPE_M)) | \ + (((u64)(vsi) << ICE_FLOW_PROF_VSI_S) & ICE_FLOW_PROF_VSI_M))) + +#define GTPU_TEID_OFFSET 4 +#define GTPU_EH_QFI_OFFSET 1 +#define GTPU_EH_QFI_MASK 0x3F +#define PFCP_S_OFFSET 0 +#define PFCP_S_MASK 0x1 +#define PFCP_PORT_NR 8805 + +#define FDIR_INSET_FLAG_ESP_S 0 +#define FDIR_INSET_FLAG_ESP_M BIT_ULL(FDIR_INSET_FLAG_ESP_S) +#define FDIR_INSET_FLAG_ESP_UDP BIT_ULL(FDIR_INSET_FLAG_ESP_S) +#define FDIR_INSET_FLAG_ESP_IPSEC (0ULL << FDIR_INSET_FLAG_ESP_S) + +enum ice_fdir_tunnel_type { + ICE_FDIR_TUNNEL_TYPE_NONE = 0, + ICE_FDIR_TUNNEL_TYPE_GTPU, + ICE_FDIR_TUNNEL_TYPE_GTPU_EH, +}; + +struct virtchnl_fdir_fltr_conf { + struct ice_fdir_fltr input; + enum ice_fdir_tunnel_type ttype; + u64 inset_flag; + u32 flow_id; +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ether[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv4[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv4_tcp[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_TCP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv4_udp[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_UDP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv4_sctp[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_SCTP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv6[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv6_tcp[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_TCP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv6_udp[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_UDP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv6_sctp[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_SCTP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv4_gtpu[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_UDP, + VIRTCHNL_PROTO_HDR_GTPU_IP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv4_gtpu_eh[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_UDP, + VIRTCHNL_PROTO_HDR_GTPU_IP, + VIRTCHNL_PROTO_HDR_GTPU_EH, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv4_l2tpv3[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_L2TPV3, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv6_l2tpv3[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_L2TPV3, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv4_esp[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_ESP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv6_esp[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_ESP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv4_ah[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_AH, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv6_ah[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_AH, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv4_nat_t_esp[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_UDP, + VIRTCHNL_PROTO_HDR_ESP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv6_nat_t_esp[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_UDP, + VIRTCHNL_PROTO_HDR_ESP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv4_pfcp[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV4, + VIRTCHNL_PROTO_HDR_UDP, + VIRTCHNL_PROTO_HDR_PFCP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +static enum virtchnl_proto_hdr_type vc_pattern_ipv6_pfcp[] = { + VIRTCHNL_PROTO_HDR_ETH, + VIRTCHNL_PROTO_HDR_IPV6, + VIRTCHNL_PROTO_HDR_UDP, + VIRTCHNL_PROTO_HDR_PFCP, + VIRTCHNL_PROTO_HDR_NONE, +}; + +struct virtchnl_fdir_pattern_match_item { + enum virtchnl_proto_hdr_type *list; + u64 input_set; + u64 *meta; +}; + +static const struct virtchnl_fdir_pattern_match_item vc_fdir_pattern_os[] = { + {vc_pattern_ipv4, 0, NULL}, + {vc_pattern_ipv4_tcp, 0, NULL}, + {vc_pattern_ipv4_udp, 0, NULL}, + {vc_pattern_ipv4_sctp, 0, NULL}, + {vc_pattern_ipv6, 0, NULL}, + {vc_pattern_ipv6_tcp, 0, NULL}, + {vc_pattern_ipv6_udp, 0, NULL}, + {vc_pattern_ipv6_sctp, 0, NULL}, +}; + +static const struct virtchnl_fdir_pattern_match_item vc_fdir_pattern_comms[] = { + {vc_pattern_ipv4, 0, NULL}, + {vc_pattern_ipv4_tcp, 0, NULL}, + {vc_pattern_ipv4_udp, 0, NULL}, + {vc_pattern_ipv4_sctp, 0, NULL}, + {vc_pattern_ipv6, 0, NULL}, + {vc_pattern_ipv6_tcp, 0, NULL}, + {vc_pattern_ipv6_udp, 0, NULL}, + {vc_pattern_ipv6_sctp, 0, NULL}, + {vc_pattern_ether, 0, NULL}, + {vc_pattern_ipv4_gtpu, 0, NULL}, + {vc_pattern_ipv4_gtpu_eh, 0, NULL}, + {vc_pattern_ipv4_l2tpv3, 0, NULL}, + {vc_pattern_ipv6_l2tpv3, 0, NULL}, + {vc_pattern_ipv4_esp, 0, NULL}, + {vc_pattern_ipv6_esp, 0, NULL}, + {vc_pattern_ipv4_ah, 0, NULL}, + {vc_pattern_ipv6_ah, 0, NULL}, + {vc_pattern_ipv4_nat_t_esp, 0, NULL}, + {vc_pattern_ipv6_nat_t_esp, 0, NULL}, + {vc_pattern_ipv4_pfcp, 0, NULL}, + {vc_pattern_ipv6_pfcp, 0, NULL}, +}; + +struct virtchnl_fdir_inset_map { + enum virtchnl_proto_hdr_field field; + enum ice_flow_field fld; + u64 flag; + u64 mask; +}; + +static const struct virtchnl_fdir_inset_map fdir_inset_map[] = { + {VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE, ICE_FLOW_FIELD_IDX_ETH_TYPE, 0, 0}, + {VIRTCHNL_PROTO_HDR_IPV4_SRC, ICE_FLOW_FIELD_IDX_IPV4_SA, 0, 0}, + {VIRTCHNL_PROTO_HDR_IPV4_DST, ICE_FLOW_FIELD_IDX_IPV4_DA, 0, 0}, + {VIRTCHNL_PROTO_HDR_IPV4_DSCP, ICE_FLOW_FIELD_IDX_IPV4_DSCP, 0, 0}, + {VIRTCHNL_PROTO_HDR_IPV4_TTL, ICE_FLOW_FIELD_IDX_IPV4_TTL, 0, 0}, + {VIRTCHNL_PROTO_HDR_IPV4_PROT, ICE_FLOW_FIELD_IDX_IPV4_PROT, 0, 0}, + {VIRTCHNL_PROTO_HDR_IPV6_SRC, ICE_FLOW_FIELD_IDX_IPV6_SA, 0, 0}, + {VIRTCHNL_PROTO_HDR_IPV6_DST, ICE_FLOW_FIELD_IDX_IPV6_DA, 0, 0}, + {VIRTCHNL_PROTO_HDR_IPV6_TC, ICE_FLOW_FIELD_IDX_IPV6_DSCP, 0, 0}, + {VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT, ICE_FLOW_FIELD_IDX_IPV6_TTL, 0, 0}, + {VIRTCHNL_PROTO_HDR_IPV6_PROT, ICE_FLOW_FIELD_IDX_IPV6_PROT, 0, 0}, + {VIRTCHNL_PROTO_HDR_UDP_SRC_PORT, ICE_FLOW_FIELD_IDX_UDP_SRC_PORT, 0, 0}, + {VIRTCHNL_PROTO_HDR_UDP_DST_PORT, ICE_FLOW_FIELD_IDX_UDP_DST_PORT, 0, 0}, + {VIRTCHNL_PROTO_HDR_TCP_SRC_PORT, ICE_FLOW_FIELD_IDX_TCP_SRC_PORT, 0, 0}, + {VIRTCHNL_PROTO_HDR_TCP_DST_PORT, ICE_FLOW_FIELD_IDX_TCP_DST_PORT, 0, 0}, + {VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT, 0, 0}, + {VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT, 0, 0}, + {VIRTCHNL_PROTO_HDR_GTPU_IP_TEID, ICE_FLOW_FIELD_IDX_GTPU_IP_TEID, 0, 0}, + {VIRTCHNL_PROTO_HDR_GTPU_EH_QFI, ICE_FLOW_FIELD_IDX_GTPU_EH_QFI, 0, 0}, + {VIRTCHNL_PROTO_HDR_ESP_SPI, ICE_FLOW_FIELD_IDX_ESP_SPI, + FDIR_INSET_FLAG_ESP_IPSEC, FDIR_INSET_FLAG_ESP_M}, + {VIRTCHNL_PROTO_HDR_ESP_SPI, ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI, + FDIR_INSET_FLAG_ESP_UDP, FDIR_INSET_FLAG_ESP_M}, + {VIRTCHNL_PROTO_HDR_AH_SPI, ICE_FLOW_FIELD_IDX_AH_SPI, 0, 0}, + {VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID, ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID, 0, 0}, + {VIRTCHNL_PROTO_HDR_PFCP_S_FIELD, ICE_FLOW_FIELD_IDX_UDP_DST_PORT, 0, 0}, +}; + +/** + * ice_vc_fdir_param_check + * @vf: pointer to the VF structure + * @vsi_id: VF relative VSI ID + * + * Check for the valid VSI ID, PF's state and VF's state + * + * Return: 0 on success, and -EINVAL on error. + */ +static int +ice_vc_fdir_param_check(struct ice_vf *vf, u16 vsi_id) +{ + struct ice_pf *pf = vf->pf; + + if (!test_bit(ICE_FLAG_FD_ENA, pf->flags)) + return -EINVAL; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) + return -EINVAL; + + if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF)) + return -EINVAL; + + if (vsi_id != vf->lan_vsi_num) + return -EINVAL; + + if (!ice_vc_isvalid_vsi_id(vf, vsi_id)) + return -EINVAL; + + if (!pf->vsi[vf->lan_vsi_idx]) + return -EINVAL; + + return 0; +} + +/** + * ice_vf_start_ctrl_vsi + * @vf: pointer to the VF structure + * + * Allocate ctrl_vsi for the first time and open the ctrl_vsi port for VF + * + * Return: 0 on success, and other on error. + */ +static int ice_vf_start_ctrl_vsi(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + struct ice_vsi *ctrl_vsi; + struct device *dev; + int err; + + dev = ice_pf_to_dev(pf); + if (vf->ctrl_vsi_idx != ICE_NO_VSI) + return -EEXIST; + + ctrl_vsi = ice_vf_ctrl_vsi_setup(vf); + if (!ctrl_vsi) { + dev_dbg(dev, "Could not setup control VSI for VF %d\n", + vf->vf_id); + return -ENOMEM; + } + + err = ice_vsi_open_ctrl(ctrl_vsi); + if (err) { + dev_dbg(dev, "Could not open control VSI for VF %d\n", + vf->vf_id); + goto err_vsi_open; + } + + return 0; + +err_vsi_open: + ice_vsi_release(ctrl_vsi); + if (vf->ctrl_vsi_idx != ICE_NO_VSI) { + pf->vsi[vf->ctrl_vsi_idx] = NULL; + vf->ctrl_vsi_idx = ICE_NO_VSI; + } + return err; +} + +/** + * ice_vc_fdir_alloc_prof - allocate profile for this filter flow type + * @vf: pointer to the VF structure + * @flow: filter flow type + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_fdir_alloc_prof(struct ice_vf *vf, enum ice_fltr_ptype flow) +{ + struct ice_vf_fdir *fdir = &vf->fdir; + + if (!fdir->fdir_prof) { + fdir->fdir_prof = devm_kcalloc(ice_pf_to_dev(vf->pf), + ICE_FLTR_PTYPE_MAX, + sizeof(*fdir->fdir_prof), + GFP_KERNEL); + if (!fdir->fdir_prof) + return -ENOMEM; + } + + if (!fdir->fdir_prof[flow]) { + fdir->fdir_prof[flow] = devm_kzalloc(ice_pf_to_dev(vf->pf), + sizeof(**fdir->fdir_prof), + GFP_KERNEL); + if (!fdir->fdir_prof[flow]) + return -ENOMEM; + } + + return 0; +} + +/** + * ice_vc_fdir_free_prof - free profile for this filter flow type + * @vf: pointer to the VF structure + * @flow: filter flow type + */ +static void +ice_vc_fdir_free_prof(struct ice_vf *vf, enum ice_fltr_ptype flow) +{ + struct ice_vf_fdir *fdir = &vf->fdir; + + if (!fdir->fdir_prof) + return; + + if (!fdir->fdir_prof[flow]) + return; + + devm_kfree(ice_pf_to_dev(vf->pf), fdir->fdir_prof[flow]); + fdir->fdir_prof[flow] = NULL; +} + +/** + * ice_vc_fdir_free_prof_all - free all the profile for this VF + * @vf: pointer to the VF structure + */ +static void ice_vc_fdir_free_prof_all(struct ice_vf *vf) +{ + struct ice_vf_fdir *fdir = &vf->fdir; + enum ice_fltr_ptype flow; + + if (!fdir->fdir_prof) + return; + + for (flow = ICE_FLTR_PTYPE_NONF_NONE; flow < ICE_FLTR_PTYPE_MAX; flow++) + ice_vc_fdir_free_prof(vf, flow); + + devm_kfree(ice_pf_to_dev(vf->pf), fdir->fdir_prof); + fdir->fdir_prof = NULL; +} + +/** + * ice_vc_fdir_parse_flow_fld + * @proto_hdr: virtual channel protocol filter header + * @conf: FDIR configuration for each filter + * @fld: field type array + * @fld_cnt: field counter + * + * Parse the virtual channel filter header and store them into field type array + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_fdir_parse_flow_fld(struct virtchnl_proto_hdr *proto_hdr, + struct virtchnl_fdir_fltr_conf *conf, + enum ice_flow_field *fld, int *fld_cnt) +{ + struct virtchnl_proto_hdr hdr; + u32 i; + + memcpy(&hdr, proto_hdr, sizeof(hdr)); + + for (i = 0; (i < ARRAY_SIZE(fdir_inset_map)) && + VIRTCHNL_GET_PROTO_HDR_FIELD(&hdr); i++) + if (VIRTCHNL_TEST_PROTO_HDR(&hdr, fdir_inset_map[i].field)) { + if (fdir_inset_map[i].mask && + ((fdir_inset_map[i].mask & conf->inset_flag) != + fdir_inset_map[i].flag)) + continue; + + fld[*fld_cnt] = fdir_inset_map[i].fld; + *fld_cnt += 1; + if (*fld_cnt >= ICE_FLOW_FIELD_IDX_MAX) + return -EINVAL; + VIRTCHNL_DEL_PROTO_HDR_FIELD(&hdr, + fdir_inset_map[i].field); + } + + return 0; +} + +/** + * ice_vc_fdir_set_flow_fld + * @vf: pointer to the VF structure + * @fltr: virtual channel add cmd buffer + * @conf: FDIR configuration for each filter + * @seg: array of one or more packet segments that describe the flow + * + * Parse the virtual channel add msg buffer's field vector and store them into + * flow's packet segment field + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_fdir_set_flow_fld(struct ice_vf *vf, struct virtchnl_fdir_add *fltr, + struct virtchnl_fdir_fltr_conf *conf, + struct ice_flow_seg_info *seg) +{ + struct virtchnl_fdir_rule *rule = &fltr->rule_cfg; + enum ice_flow_field fld[ICE_FLOW_FIELD_IDX_MAX]; + struct device *dev = ice_pf_to_dev(vf->pf); + struct virtchnl_proto_hdrs *proto; + int fld_cnt = 0; + int i; + + proto = &rule->proto_hdrs; + for (i = 0; i < proto->count; i++) { + struct virtchnl_proto_hdr *hdr = &proto->proto_hdr[i]; + int ret; + + ret = ice_vc_fdir_parse_flow_fld(hdr, conf, fld, &fld_cnt); + if (ret) + return ret; + } + + if (fld_cnt == 0) { + dev_dbg(dev, "Empty input set for VF %d\n", vf->vf_id); + return -EINVAL; + } + + for (i = 0; i < fld_cnt; i++) + ice_flow_set_fld(seg, fld[i], + ICE_FLOW_FLD_OFF_INVAL, + ICE_FLOW_FLD_OFF_INVAL, + ICE_FLOW_FLD_OFF_INVAL, false); + + return 0; +} + +/** + * ice_vc_fdir_set_flow_hdr - config the flow's packet segment header + * @vf: pointer to the VF structure + * @conf: FDIR configuration for each filter + * @seg: array of one or more packet segments that describe the flow + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_fdir_set_flow_hdr(struct ice_vf *vf, + struct virtchnl_fdir_fltr_conf *conf, + struct ice_flow_seg_info *seg) +{ + enum ice_fltr_ptype flow = conf->input.flow_type; + enum ice_fdir_tunnel_type ttype = conf->ttype; + struct device *dev = ice_pf_to_dev(vf->pf); + + switch (flow) { + case ICE_FLTR_PTYPE_NON_IP_L2: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ETH_NON_IP); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_L2TPV3 | + ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_ESP: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ESP | + ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_AH: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_AH | + ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_NAT_T_ESP | + ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_NODE | + ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_SESSION | + ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_OTHER: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_TCP: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_TCP | + ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_UDP: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_UDP | + ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP: + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP: + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP: + case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER: + if (ttype == ICE_FDIR_TUNNEL_TYPE_GTPU) { + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_IP | + ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + } else if (ttype == ICE_FDIR_TUNNEL_TYPE_GTPU_EH) { + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_EH | + ICE_FLOW_SEG_HDR_GTPU_IP | + ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + } else { + dev_dbg(dev, "Invalid tunnel type 0x%x for VF %d\n", + flow, vf->vf_id); + return -EINVAL; + } + break; + case ICE_FLTR_PTYPE_NONF_IPV4_SCTP: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_SCTP | + ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_L2TPV3 | + ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_ESP: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ESP | + ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_AH: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_AH | + ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_NAT_T_ESP | + ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_NODE | + ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_SESSION | + ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_OTHER: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_TCP: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_TCP | + ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_UDP: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_UDP | + ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + case ICE_FLTR_PTYPE_NONF_IPV6_SCTP: + ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_SCTP | + ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER); + break; + default: + dev_dbg(dev, "Invalid flow type 0x%x for VF %d failed\n", + flow, vf->vf_id); + return -EINVAL; + } + + return 0; +} + +/** + * ice_vc_fdir_rem_prof - remove profile for this filter flow type + * @vf: pointer to the VF structure + * @flow: filter flow type + * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter + */ +static void +ice_vc_fdir_rem_prof(struct ice_vf *vf, enum ice_fltr_ptype flow, int tun) +{ + struct ice_vf_fdir *fdir = &vf->fdir; + struct ice_fd_hw_prof *vf_prof; + struct ice_pf *pf = vf->pf; + struct ice_vsi *vf_vsi; + struct device *dev; + struct ice_hw *hw; + u64 prof_id; + int i; + + dev = ice_pf_to_dev(pf); + hw = &pf->hw; + if (!fdir->fdir_prof || !fdir->fdir_prof[flow]) + return; + + vf_prof = fdir->fdir_prof[flow]; + + vf_vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vf_vsi) { + dev_dbg(dev, "NULL vf %d vsi pointer\n", vf->vf_id); + return; + } + + if (!fdir->prof_entry_cnt[flow][tun]) + return; + + prof_id = ICE_FLOW_PROF_FD(vf_vsi->vsi_num, + flow, tun ? ICE_FLTR_PTYPE_MAX : 0); + + for (i = 0; i < fdir->prof_entry_cnt[flow][tun]; i++) + if (vf_prof->entry_h[i][tun]) { + u16 vsi_num = ice_get_hw_vsi_num(hw, vf_prof->vsi_h[i]); + + ice_rem_prof_id_flow(hw, ICE_BLK_FD, vsi_num, prof_id); + ice_flow_rem_entry(hw, ICE_BLK_FD, + vf_prof->entry_h[i][tun]); + vf_prof->entry_h[i][tun] = 0; + } + + ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id); + devm_kfree(dev, vf_prof->fdir_seg[tun]); + vf_prof->fdir_seg[tun] = NULL; + + for (i = 0; i < vf_prof->cnt; i++) + vf_prof->vsi_h[i] = 0; + + fdir->prof_entry_cnt[flow][tun] = 0; +} + +/** + * ice_vc_fdir_rem_prof_all - remove profile for this VF + * @vf: pointer to the VF structure + */ +static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf) +{ + enum ice_fltr_ptype flow; + + for (flow = ICE_FLTR_PTYPE_NONF_NONE; + flow < ICE_FLTR_PTYPE_MAX; flow++) { + ice_vc_fdir_rem_prof(vf, flow, 0); + ice_vc_fdir_rem_prof(vf, flow, 1); + } +} + +/** + * ice_vc_fdir_write_flow_prof + * @vf: pointer to the VF structure + * @flow: filter flow type + * @seg: array of one or more packet segments that describe the flow + * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter + * + * Write the flow's profile config and packet segment into the hardware + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_fdir_write_flow_prof(struct ice_vf *vf, enum ice_fltr_ptype flow, + struct ice_flow_seg_info *seg, int tun) +{ + struct ice_vf_fdir *fdir = &vf->fdir; + struct ice_vsi *vf_vsi, *ctrl_vsi; + struct ice_flow_seg_info *old_seg; + struct ice_flow_prof *prof = NULL; + struct ice_fd_hw_prof *vf_prof; + enum ice_status status; + struct device *dev; + struct ice_pf *pf; + struct ice_hw *hw; + u64 entry1_h = 0; + u64 entry2_h = 0; + u64 prof_id; + int ret; + + pf = vf->pf; + dev = ice_pf_to_dev(pf); + hw = &pf->hw; + vf_vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vf_vsi) + return -EINVAL; + + ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx]; + if (!ctrl_vsi) + return -EINVAL; + + vf_prof = fdir->fdir_prof[flow]; + old_seg = vf_prof->fdir_seg[tun]; + if (old_seg) { + if (!memcmp(old_seg, seg, sizeof(*seg))) { + dev_dbg(dev, "Duplicated profile for VF %d!\n", + vf->vf_id); + return -EEXIST; + } + + if (fdir->fdir_fltr_cnt[flow][tun]) { + ret = -EINVAL; + dev_dbg(dev, "Input set conflicts for VF %d\n", + vf->vf_id); + goto err_exit; + } + + /* remove previously allocated profile */ + ice_vc_fdir_rem_prof(vf, flow, tun); + } + + prof_id = ICE_FLOW_PROF_FD(vf_vsi->vsi_num, flow, + tun ? ICE_FLTR_PTYPE_MAX : 0); + + status = ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id, seg, + tun + 1, &prof); + ret = ice_status_to_errno(status); + if (ret) { + dev_dbg(dev, "Could not add VSI flow 0x%x for VF %d\n", + flow, vf->vf_id); + goto err_exit; + } + + status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, vf_vsi->idx, + vf_vsi->idx, ICE_FLOW_PRIO_NORMAL, + seg, &entry1_h); + ret = ice_status_to_errno(status); + if (ret) { + dev_dbg(dev, "Could not add flow 0x%x VSI entry for VF %d\n", + flow, vf->vf_id); + goto err_prof; + } + + status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, vf_vsi->idx, + ctrl_vsi->idx, ICE_FLOW_PRIO_NORMAL, + seg, &entry2_h); + ret = ice_status_to_errno(status); + if (ret) { + dev_dbg(dev, + "Could not add flow 0x%x Ctrl VSI entry for VF %d\n", + flow, vf->vf_id); + goto err_entry_1; + } + + vf_prof->fdir_seg[tun] = seg; + vf_prof->cnt = 0; + fdir->prof_entry_cnt[flow][tun] = 0; + + vf_prof->entry_h[vf_prof->cnt][tun] = entry1_h; + vf_prof->vsi_h[vf_prof->cnt] = vf_vsi->idx; + vf_prof->cnt++; + fdir->prof_entry_cnt[flow][tun]++; + + vf_prof->entry_h[vf_prof->cnt][tun] = entry2_h; + vf_prof->vsi_h[vf_prof->cnt] = ctrl_vsi->idx; + vf_prof->cnt++; + fdir->prof_entry_cnt[flow][tun]++; + + return 0; + +err_entry_1: + ice_rem_prof_id_flow(hw, ICE_BLK_FD, + ice_get_hw_vsi_num(hw, vf_vsi->idx), prof_id); + ice_flow_rem_entry(hw, ICE_BLK_FD, entry1_h); +err_prof: + ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id); +err_exit: + return ret; +} + +/** + * ice_vc_fdir_config_input_set + * @vf: pointer to the VF structure + * @fltr: virtual channel add cmd buffer + * @conf: FDIR configuration for each filter + * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter + * + * Config the input set type and value for virtual channel add msg buffer + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_fdir_config_input_set(struct ice_vf *vf, struct virtchnl_fdir_add *fltr, + struct virtchnl_fdir_fltr_conf *conf, int tun) +{ + struct ice_fdir_fltr *input = &conf->input; + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_flow_seg_info *seg; + enum ice_fltr_ptype flow; + int ret; + + flow = input->flow_type; + ret = ice_vc_fdir_alloc_prof(vf, flow); + if (ret) { + dev_dbg(dev, "Alloc flow prof for VF %d failed\n", vf->vf_id); + return ret; + } + + seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL); + if (!seg) + return -ENOMEM; + + ret = ice_vc_fdir_set_flow_fld(vf, fltr, conf, seg); + if (ret) { + dev_dbg(dev, "Set flow field for VF %d failed\n", vf->vf_id); + goto err_exit; + } + + ret = ice_vc_fdir_set_flow_hdr(vf, conf, seg); + if (ret) { + dev_dbg(dev, "Set flow hdr for VF %d failed\n", vf->vf_id); + goto err_exit; + } + + ret = ice_vc_fdir_write_flow_prof(vf, flow, seg, tun); + if (ret == -EEXIST) { + devm_kfree(dev, seg); + } else if (ret) { + dev_dbg(dev, "Write flow profile for VF %d failed\n", + vf->vf_id); + goto err_exit; + } + + return 0; + +err_exit: + devm_kfree(dev, seg); + return ret; +} + +/** + * ice_vc_fdir_match_pattern + * @fltr: virtual channel add cmd buffer + * @type: virtual channel protocol filter header type + * + * Matching the header type by comparing fltr and type's value. + * + * Return: true on success, and false on error. + */ +static bool +ice_vc_fdir_match_pattern(struct virtchnl_fdir_add *fltr, + enum virtchnl_proto_hdr_type *type) +{ + struct virtchnl_proto_hdrs *proto = &fltr->rule_cfg.proto_hdrs; + int i = 0; + + while ((i < proto->count) && + (*type == proto->proto_hdr[i].type) && + (*type != VIRTCHNL_PROTO_HDR_NONE)) { + type++; + i++; + } + + return ((i == proto->count) && (*type == VIRTCHNL_PROTO_HDR_NONE)); +} + +/** + * ice_vc_fdir_get_pattern - get while list pattern + * @vf: pointer to the VF info + * @len: filter list length + * + * Return: pointer to allowed filter list + */ +static const struct virtchnl_fdir_pattern_match_item * +ice_vc_fdir_get_pattern(struct ice_vf *vf, int *len) +{ + const struct virtchnl_fdir_pattern_match_item *item; + struct ice_pf *pf = vf->pf; + struct ice_hw *hw; + + hw = &pf->hw; + if (!strncmp(hw->active_pkg_name, "ICE COMMS Package", + sizeof(hw->active_pkg_name))) { + item = vc_fdir_pattern_comms; + *len = ARRAY_SIZE(vc_fdir_pattern_comms); + } else { + item = vc_fdir_pattern_os; + *len = ARRAY_SIZE(vc_fdir_pattern_os); + } + + return item; +} + +/** + * ice_vc_fdir_search_pattern + * @vf: pointer to the VF info + * @fltr: virtual channel add cmd buffer + * + * Search for matched pattern from supported pattern list + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_fdir_search_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr) +{ + const struct virtchnl_fdir_pattern_match_item *pattern; + int len, i; + + pattern = ice_vc_fdir_get_pattern(vf, &len); + + for (i = 0; i < len; i++) + if (ice_vc_fdir_match_pattern(fltr, pattern[i].list)) + return 0; + + return -EINVAL; +} + +/** + * ice_vc_fdir_parse_pattern + * @vf: pointer to the VF info + * @fltr: virtual channel add cmd buffer + * @conf: FDIR configuration for each filter + * + * Parse the virtual channel filter's pattern and store them into conf + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_fdir_parse_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr, + struct virtchnl_fdir_fltr_conf *conf) +{ + struct virtchnl_proto_hdrs *proto = &fltr->rule_cfg.proto_hdrs; + enum virtchnl_proto_hdr_type l3 = VIRTCHNL_PROTO_HDR_NONE; + enum virtchnl_proto_hdr_type l4 = VIRTCHNL_PROTO_HDR_NONE; + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_fdir_fltr *input = &conf->input; + int i; + + if (proto->count > VIRTCHNL_MAX_NUM_PROTO_HDRS) { + dev_dbg(dev, "Invalid protocol count:0x%x for VF %d\n", + proto->count, vf->vf_id); + return -EINVAL; + } + + for (i = 0; i < proto->count; i++) { + struct virtchnl_proto_hdr *hdr = &proto->proto_hdr[i]; + struct ip_esp_hdr *esph; + struct ip_auth_hdr *ah; + struct sctphdr *sctph; + struct ipv6hdr *ip6h; + struct udphdr *udph; + struct tcphdr *tcph; + struct ethhdr *eth; + struct iphdr *iph; + u8 s_field; + u8 *rawh; + + switch (hdr->type) { + case VIRTCHNL_PROTO_HDR_ETH: + eth = (struct ethhdr *)hdr->buffer; + input->flow_type = ICE_FLTR_PTYPE_NON_IP_L2; + + if (hdr->field_selector) + input->ext_data.ether_type = eth->h_proto; + break; + case VIRTCHNL_PROTO_HDR_IPV4: + iph = (struct iphdr *)hdr->buffer; + l3 = VIRTCHNL_PROTO_HDR_IPV4; + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_OTHER; + + if (hdr->field_selector) { + input->ip.v4.src_ip = iph->saddr; + input->ip.v4.dst_ip = iph->daddr; + input->ip.v4.tos = iph->tos; + input->ip.v4.proto = iph->protocol; + } + break; + case VIRTCHNL_PROTO_HDR_IPV6: + ip6h = (struct ipv6hdr *)hdr->buffer; + l3 = VIRTCHNL_PROTO_HDR_IPV6; + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_OTHER; + + if (hdr->field_selector) { + memcpy(input->ip.v6.src_ip, + ip6h->saddr.in6_u.u6_addr8, + sizeof(ip6h->saddr)); + memcpy(input->ip.v6.dst_ip, + ip6h->daddr.in6_u.u6_addr8, + sizeof(ip6h->daddr)); + input->ip.v6.tc = ((u8)(ip6h->priority) << 4) | + (ip6h->flow_lbl[0] >> 4); + input->ip.v6.proto = ip6h->nexthdr; + } + break; + case VIRTCHNL_PROTO_HDR_TCP: + tcph = (struct tcphdr *)hdr->buffer; + if (l3 == VIRTCHNL_PROTO_HDR_IPV4) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_TCP; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_TCP; + + if (hdr->field_selector) { + if (l3 == VIRTCHNL_PROTO_HDR_IPV4) { + input->ip.v4.src_port = tcph->source; + input->ip.v4.dst_port = tcph->dest; + } else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) { + input->ip.v6.src_port = tcph->source; + input->ip.v6.dst_port = tcph->dest; + } + } + break; + case VIRTCHNL_PROTO_HDR_UDP: + udph = (struct udphdr *)hdr->buffer; + if (l3 == VIRTCHNL_PROTO_HDR_IPV4) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_UDP; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_UDP; + + if (hdr->field_selector) { + if (l3 == VIRTCHNL_PROTO_HDR_IPV4) { + input->ip.v4.src_port = udph->source; + input->ip.v4.dst_port = udph->dest; + } else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) { + input->ip.v6.src_port = udph->source; + input->ip.v6.dst_port = udph->dest; + } + } + break; + case VIRTCHNL_PROTO_HDR_SCTP: + sctph = (struct sctphdr *)hdr->buffer; + if (l3 == VIRTCHNL_PROTO_HDR_IPV4) + input->flow_type = + ICE_FLTR_PTYPE_NONF_IPV4_SCTP; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) + input->flow_type = + ICE_FLTR_PTYPE_NONF_IPV6_SCTP; + + if (hdr->field_selector) { + if (l3 == VIRTCHNL_PROTO_HDR_IPV4) { + input->ip.v4.src_port = sctph->source; + input->ip.v4.dst_port = sctph->dest; + } else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) { + input->ip.v6.src_port = sctph->source; + input->ip.v6.dst_port = sctph->dest; + } + } + break; + case VIRTCHNL_PROTO_HDR_L2TPV3: + if (l3 == VIRTCHNL_PROTO_HDR_IPV4) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3; + + if (hdr->field_selector) + input->l2tpv3_data.session_id = *((__be32 *)hdr->buffer); + break; + case VIRTCHNL_PROTO_HDR_ESP: + esph = (struct ip_esp_hdr *)hdr->buffer; + if (l3 == VIRTCHNL_PROTO_HDR_IPV4 && + l4 == VIRTCHNL_PROTO_HDR_UDP) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 && + l4 == VIRTCHNL_PROTO_HDR_UDP) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV4 && + l4 == VIRTCHNL_PROTO_HDR_NONE) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_ESP; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 && + l4 == VIRTCHNL_PROTO_HDR_NONE) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_ESP; + + if (l4 == VIRTCHNL_PROTO_HDR_UDP) + conf->inset_flag |= FDIR_INSET_FLAG_ESP_UDP; + else + conf->inset_flag |= FDIR_INSET_FLAG_ESP_IPSEC; + + if (hdr->field_selector) { + if (l3 == VIRTCHNL_PROTO_HDR_IPV4) + input->ip.v4.sec_parm_idx = esph->spi; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) + input->ip.v6.sec_parm_idx = esph->spi; + } + break; + case VIRTCHNL_PROTO_HDR_AH: + ah = (struct ip_auth_hdr *)hdr->buffer; + if (l3 == VIRTCHNL_PROTO_HDR_IPV4) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_AH; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_AH; + + if (hdr->field_selector) { + if (l3 == VIRTCHNL_PROTO_HDR_IPV4) + input->ip.v4.sec_parm_idx = ah->spi; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) + input->ip.v6.sec_parm_idx = ah->spi; + } + break; + case VIRTCHNL_PROTO_HDR_PFCP: + rawh = (u8 *)hdr->buffer; + s_field = (rawh[0] >> PFCP_S_OFFSET) & PFCP_S_MASK; + if (l3 == VIRTCHNL_PROTO_HDR_IPV4 && s_field == 0) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV4 && s_field == 1) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 && s_field == 0) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE; + else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 && s_field == 1) + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION; + + if (hdr->field_selector) { + if (l3 == VIRTCHNL_PROTO_HDR_IPV4) + input->ip.v4.dst_port = cpu_to_be16(PFCP_PORT_NR); + else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) + input->ip.v6.dst_port = cpu_to_be16(PFCP_PORT_NR); + } + break; + case VIRTCHNL_PROTO_HDR_GTPU_IP: + rawh = (u8 *)hdr->buffer; + input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER; + + if (hdr->field_selector) + input->gtpu_data.teid = *(__be32 *)(&rawh[GTPU_TEID_OFFSET]); + conf->ttype = ICE_FDIR_TUNNEL_TYPE_GTPU; + break; + case VIRTCHNL_PROTO_HDR_GTPU_EH: + rawh = (u8 *)hdr->buffer; + + if (hdr->field_selector) + input->gtpu_data.qfi = rawh[GTPU_EH_QFI_OFFSET] & GTPU_EH_QFI_MASK; + conf->ttype = ICE_FDIR_TUNNEL_TYPE_GTPU_EH; + break; + default: + dev_dbg(dev, "Invalid header type 0x:%x for VF %d\n", + hdr->type, vf->vf_id); + return -EINVAL; + } + } + + return 0; +} + +/** + * ice_vc_fdir_parse_action + * @vf: pointer to the VF info + * @fltr: virtual channel add cmd buffer + * @conf: FDIR configuration for each filter + * + * Parse the virtual channel filter's action and store them into conf + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_fdir_parse_action(struct ice_vf *vf, struct virtchnl_fdir_add *fltr, + struct virtchnl_fdir_fltr_conf *conf) +{ + struct virtchnl_filter_action_set *as = &fltr->rule_cfg.action_set; + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_fdir_fltr *input = &conf->input; + u32 dest_num = 0; + u32 mark_num = 0; + int i; + + if (as->count > VIRTCHNL_MAX_NUM_ACTIONS) { + dev_dbg(dev, "Invalid action numbers:0x%x for VF %d\n", + as->count, vf->vf_id); + return -EINVAL; + } + + for (i = 0; i < as->count; i++) { + struct virtchnl_filter_action *action = &as->actions[i]; + + switch (action->type) { + case VIRTCHNL_ACTION_PASSTHRU: + dest_num++; + input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER; + break; + case VIRTCHNL_ACTION_DROP: + dest_num++; + input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DROP_PKT; + break; + case VIRTCHNL_ACTION_QUEUE: + dest_num++; + input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX; + input->q_index = action->act_conf.queue.index; + break; + case VIRTCHNL_ACTION_Q_REGION: + dest_num++; + input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP; + input->q_index = action->act_conf.queue.index; + input->q_region = action->act_conf.queue.region; + break; + case VIRTCHNL_ACTION_MARK: + mark_num++; + input->fltr_id = action->act_conf.mark_id; + input->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_THREE; + break; + default: + dev_dbg(dev, "Invalid action type:0x%x for VF %d\n", + action->type, vf->vf_id); + return -EINVAL; + } + } + + if (dest_num == 0 || dest_num >= 2) { + dev_dbg(dev, "Invalid destination action for VF %d\n", + vf->vf_id); + return -EINVAL; + } + + if (mark_num >= 2) { + dev_dbg(dev, "Too many mark actions for VF %d\n", vf->vf_id); + return -EINVAL; + } + + return 0; +} + +/** + * ice_vc_validate_fdir_fltr - validate the virtual channel filter + * @vf: pointer to the VF info + * @fltr: virtual channel add cmd buffer + * @conf: FDIR configuration for each filter + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_validate_fdir_fltr(struct ice_vf *vf, struct virtchnl_fdir_add *fltr, + struct virtchnl_fdir_fltr_conf *conf) +{ + int ret; + + ret = ice_vc_fdir_search_pattern(vf, fltr); + if (ret) + return ret; + + ret = ice_vc_fdir_parse_pattern(vf, fltr, conf); + if (ret) + return ret; + + return ice_vc_fdir_parse_action(vf, fltr, conf); +} + +/** + * ice_vc_fdir_comp_rules - compare if two filter rules have the same value + * @conf_a: FDIR configuration for filter a + * @conf_b: FDIR configuration for filter b + * + * Return: 0 on success, and other on error. + */ +static bool +ice_vc_fdir_comp_rules(struct virtchnl_fdir_fltr_conf *conf_a, + struct virtchnl_fdir_fltr_conf *conf_b) +{ + struct ice_fdir_fltr *a = &conf_a->input; + struct ice_fdir_fltr *b = &conf_b->input; + + if (conf_a->ttype != conf_b->ttype) + return false; + if (a->flow_type != b->flow_type) + return false; + if (memcmp(&a->ip, &b->ip, sizeof(a->ip))) + return false; + if (memcmp(&a->mask, &b->mask, sizeof(a->mask))) + return false; + if (memcmp(&a->gtpu_data, &b->gtpu_data, sizeof(a->gtpu_data))) + return false; + if (memcmp(&a->gtpu_mask, &b->gtpu_mask, sizeof(a->gtpu_mask))) + return false; + if (memcmp(&a->l2tpv3_data, &b->l2tpv3_data, sizeof(a->l2tpv3_data))) + return false; + if (memcmp(&a->l2tpv3_mask, &b->l2tpv3_mask, sizeof(a->l2tpv3_mask))) + return false; + if (memcmp(&a->ext_data, &b->ext_data, sizeof(a->ext_data))) + return false; + if (memcmp(&a->ext_mask, &b->ext_mask, sizeof(a->ext_mask))) + return false; + + return true; +} + +/** + * ice_vc_fdir_is_dup_fltr + * @vf: pointer to the VF info + * @conf: FDIR configuration for each filter + * + * Check if there is duplicated rule with same conf value + * + * Return: 0 true success, and false on error. + */ +static bool +ice_vc_fdir_is_dup_fltr(struct ice_vf *vf, struct virtchnl_fdir_fltr_conf *conf) +{ + struct ice_fdir_fltr *desc; + bool ret; + + list_for_each_entry(desc, &vf->fdir.fdir_rule_list, fltr_node) { + struct virtchnl_fdir_fltr_conf *node = + to_fltr_conf_from_desc(desc); + + ret = ice_vc_fdir_comp_rules(node, conf); + if (ret) + return true; + } + + return false; +} + +/** + * ice_vc_fdir_insert_entry + * @vf: pointer to the VF info + * @conf: FDIR configuration for each filter + * @id: pointer to ID value allocated by driver + * + * Insert FDIR conf entry into list and allocate ID for this filter + * + * Return: 0 true success, and other on error. + */ +static int +ice_vc_fdir_insert_entry(struct ice_vf *vf, + struct virtchnl_fdir_fltr_conf *conf, u32 *id) +{ + struct ice_fdir_fltr *input = &conf->input; + int i; + + /* alloc ID corresponding with conf */ + i = idr_alloc(&vf->fdir.fdir_rule_idr, conf, 0, + ICE_FDIR_MAX_FLTRS, GFP_KERNEL); + if (i < 0) + return -EINVAL; + *id = i; + + list_add(&input->fltr_node, &vf->fdir.fdir_rule_list); + return 0; +} + +/** + * ice_vc_fdir_remove_entry - remove FDIR conf entry by ID value + * @vf: pointer to the VF info + * @conf: FDIR configuration for each filter + * @id: filter rule's ID + */ +static void +ice_vc_fdir_remove_entry(struct ice_vf *vf, + struct virtchnl_fdir_fltr_conf *conf, u32 id) +{ + struct ice_fdir_fltr *input = &conf->input; + + idr_remove(&vf->fdir.fdir_rule_idr, id); + list_del(&input->fltr_node); +} + +/** + * ice_vc_fdir_lookup_entry - lookup FDIR conf entry by ID value + * @vf: pointer to the VF info + * @id: filter rule's ID + * + * Return: NULL on error, and other on success. + */ +static struct virtchnl_fdir_fltr_conf * +ice_vc_fdir_lookup_entry(struct ice_vf *vf, u32 id) +{ + return idr_find(&vf->fdir.fdir_rule_idr, id); +} + +/** + * ice_vc_fdir_flush_entry - remove all FDIR conf entry + * @vf: pointer to the VF info + */ +static void ice_vc_fdir_flush_entry(struct ice_vf *vf) +{ + struct virtchnl_fdir_fltr_conf *conf; + struct ice_fdir_fltr *desc, *temp; + + list_for_each_entry_safe(desc, temp, + &vf->fdir.fdir_rule_list, fltr_node) { + conf = to_fltr_conf_from_desc(desc); + list_del(&desc->fltr_node); + devm_kfree(ice_pf_to_dev(vf->pf), conf); + } +} + +/** + * ice_vc_fdir_write_fltr - write filter rule into hardware + * @vf: pointer to the VF info + * @conf: FDIR configuration for each filter + * @add: true implies add rule, false implies del rules + * @is_tun: false implies non-tunnel type filter, true implies tunnel filter + * + * Return: 0 on success, and other on error. + */ +static int ice_vc_fdir_write_fltr(struct ice_vf *vf, + struct virtchnl_fdir_fltr_conf *conf, + bool add, bool is_tun) +{ + struct ice_fdir_fltr *input = &conf->input; + struct ice_vsi *vsi, *ctrl_vsi; + struct ice_fltr_desc desc; + enum ice_status status; + struct device *dev; + struct ice_pf *pf; + struct ice_hw *hw; + int ret; + u8 *pkt; + + pf = vf->pf; + dev = ice_pf_to_dev(pf); + hw = &pf->hw; + vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vsi) { + dev_dbg(dev, "Invalid vsi for VF %d\n", vf->vf_id); + return -EINVAL; + } + + input->dest_vsi = vsi->idx; + input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW; + + ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx]; + if (!ctrl_vsi) { + dev_dbg(dev, "Invalid ctrl_vsi for VF %d\n", vf->vf_id); + return -EINVAL; + } + + pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + ice_fdir_get_prgm_desc(hw, input, &desc, add); + status = ice_fdir_get_gen_prgm_pkt(hw, input, pkt, false, is_tun); + ret = ice_status_to_errno(status); + if (ret) { + dev_dbg(dev, "Gen training pkt for VF %d ptype %d failed\n", + vf->vf_id, input->flow_type); + goto err_free_pkt; + } + + ret = ice_prgm_fdir_fltr(ctrl_vsi, &desc, pkt); + if (ret) + goto err_free_pkt; + + return 0; + +err_free_pkt: + devm_kfree(dev, pkt); + return ret; +} + +/** + * ice_vf_fdir_timer - FDIR program waiting timer interrupt handler + * @t: pointer to timer_list + */ +static void ice_vf_fdir_timer(struct timer_list *t) +{ + struct ice_vf_fdir_ctx *ctx_irq = from_timer(ctx_irq, t, rx_tmr); + struct ice_vf_fdir_ctx *ctx_done; + struct ice_vf_fdir *fdir; + unsigned long flags; + struct ice_vf *vf; + struct ice_pf *pf; + + fdir = container_of(ctx_irq, struct ice_vf_fdir, ctx_irq); + vf = container_of(fdir, struct ice_vf, fdir); + ctx_done = &fdir->ctx_done; + pf = vf->pf; + spin_lock_irqsave(&fdir->ctx_lock, flags); + if (!(ctx_irq->flags & ICE_VF_FDIR_CTX_VALID)) { + spin_unlock_irqrestore(&fdir->ctx_lock, flags); + WARN_ON_ONCE(1); + return; + } + + ctx_irq->flags &= ~ICE_VF_FDIR_CTX_VALID; + + ctx_done->flags |= ICE_VF_FDIR_CTX_VALID; + ctx_done->conf = ctx_irq->conf; + ctx_done->stat = ICE_FDIR_CTX_TIMEOUT; + ctx_done->v_opcode = ctx_irq->v_opcode; + spin_unlock_irqrestore(&fdir->ctx_lock, flags); + + set_bit(ICE_FD_VF_FLUSH_CTX, pf->state); + ice_service_task_schedule(pf); +} + +/** + * ice_vc_fdir_irq_handler - ctrl_vsi Rx queue interrupt handler + * @ctrl_vsi: pointer to a VF's CTRL VSI + * @rx_desc: pointer to FDIR Rx queue descriptor + */ +void +ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi, + union ice_32b_rx_flex_desc *rx_desc) +{ + struct ice_pf *pf = ctrl_vsi->back; + struct ice_vf_fdir_ctx *ctx_done; + struct ice_vf_fdir_ctx *ctx_irq; + struct ice_vf_fdir *fdir; + unsigned long flags; + struct device *dev; + struct ice_vf *vf; + int ret; + + vf = &pf->vf[ctrl_vsi->vf_id]; + + fdir = &vf->fdir; + ctx_done = &fdir->ctx_done; + ctx_irq = &fdir->ctx_irq; + dev = ice_pf_to_dev(pf); + spin_lock_irqsave(&fdir->ctx_lock, flags); + if (!(ctx_irq->flags & ICE_VF_FDIR_CTX_VALID)) { + spin_unlock_irqrestore(&fdir->ctx_lock, flags); + WARN_ON_ONCE(1); + return; + } + + ctx_irq->flags &= ~ICE_VF_FDIR_CTX_VALID; + + ctx_done->flags |= ICE_VF_FDIR_CTX_VALID; + ctx_done->conf = ctx_irq->conf; + ctx_done->stat = ICE_FDIR_CTX_IRQ; + ctx_done->v_opcode = ctx_irq->v_opcode; + memcpy(&ctx_done->rx_desc, rx_desc, sizeof(*rx_desc)); + spin_unlock_irqrestore(&fdir->ctx_lock, flags); + + ret = del_timer(&ctx_irq->rx_tmr); + if (!ret) + dev_err(dev, "VF %d: Unexpected inactive timer!\n", vf->vf_id); + + set_bit(ICE_FD_VF_FLUSH_CTX, pf->state); + ice_service_task_schedule(pf); +} + +/** + * ice_vf_fdir_dump_info - dump FDIR information for diagnosis + * @vf: pointer to the VF info + */ +static void ice_vf_fdir_dump_info(struct ice_vf *vf) +{ + struct ice_vsi *vf_vsi; + u32 fd_size, fd_cnt; + struct device *dev; + struct ice_pf *pf; + struct ice_hw *hw; + u16 vsi_num; + + pf = vf->pf; + hw = &pf->hw; + dev = ice_pf_to_dev(pf); + vf_vsi = pf->vsi[vf->lan_vsi_idx]; + vsi_num = ice_get_hw_vsi_num(hw, vf_vsi->idx); + + fd_size = rd32(hw, VSIQF_FD_SIZE(vsi_num)); + fd_cnt = rd32(hw, VSIQF_FD_CNT(vsi_num)); + dev_dbg(dev, "VF %d: space allocated: guar:0x%x, be:0x%x, space consumed: guar:0x%x, be:0x%x", + vf->vf_id, + (fd_size & VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S, + (fd_size & VSIQF_FD_CNT_FD_BCNT_M) >> VSIQF_FD_CNT_FD_BCNT_S, + (fd_cnt & VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S, + (fd_cnt & VSIQF_FD_CNT_FD_BCNT_M) >> VSIQF_FD_CNT_FD_BCNT_S); +} + +/** + * ice_vf_verify_rx_desc - verify received FDIR programming status descriptor + * @vf: pointer to the VF info + * @ctx: FDIR context info for post processing + * @status: virtchnl FDIR program status + * + * Return: 0 on success, and other on error. + */ +static int +ice_vf_verify_rx_desc(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx, + enum virtchnl_fdir_prgm_status *status) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + u32 stat_err, error, prog_id; + int ret; + + stat_err = le16_to_cpu(ctx->rx_desc.wb.status_error0); + if (((stat_err & ICE_FXD_FLTR_WB_QW1_DD_M) >> + ICE_FXD_FLTR_WB_QW1_DD_S) != ICE_FXD_FLTR_WB_QW1_DD_YES) { + *status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; + dev_err(dev, "VF %d: Desc Done not set\n", vf->vf_id); + ret = -EINVAL; + goto err_exit; + } + + prog_id = (stat_err & ICE_FXD_FLTR_WB_QW1_PROG_ID_M) >> + ICE_FXD_FLTR_WB_QW1_PROG_ID_S; + if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_ADD && + ctx->v_opcode != VIRTCHNL_OP_ADD_FDIR_FILTER) { + dev_err(dev, "VF %d: Desc show add, but ctx not", + vf->vf_id); + *status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID; + ret = -EINVAL; + goto err_exit; + } + + if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_DEL && + ctx->v_opcode != VIRTCHNL_OP_DEL_FDIR_FILTER) { + dev_err(dev, "VF %d: Desc show del, but ctx not", + vf->vf_id); + *status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID; + ret = -EINVAL; + goto err_exit; + } + + error = (stat_err & ICE_FXD_FLTR_WB_QW1_FAIL_M) >> + ICE_FXD_FLTR_WB_QW1_FAIL_S; + if (error == ICE_FXD_FLTR_WB_QW1_FAIL_YES) { + if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_ADD) { + dev_err(dev, "VF %d, Failed to add FDIR rule due to no space in the table", + vf->vf_id); + *status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; + } else { + dev_err(dev, "VF %d, Failed to remove FDIR rule, attempt to remove non-existent entry", + vf->vf_id); + *status = VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST; + } + ret = -EINVAL; + goto err_exit; + } + + error = (stat_err & ICE_FXD_FLTR_WB_QW1_FAIL_PROF_M) >> + ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S; + if (error == ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES) { + dev_err(dev, "VF %d: Profile matching error", vf->vf_id); + *status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; + ret = -EINVAL; + goto err_exit; + } + + *status = VIRTCHNL_FDIR_SUCCESS; + + return 0; + +err_exit: + ice_vf_fdir_dump_info(vf); + return ret; +} + +/** + * ice_vc_add_fdir_fltr_post + * @vf: pointer to the VF structure + * @ctx: FDIR context info for post processing + * @status: virtchnl FDIR program status + * @success: true implies success, false implies failure + * + * Post process for flow director add command. If success, then do post process + * and send back success msg by virtchnl. Otherwise, do context reversion and + * send back failure msg by virtchnl. + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_add_fdir_fltr_post(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx, + enum virtchnl_fdir_prgm_status status, + bool success) +{ + struct virtchnl_fdir_fltr_conf *conf = ctx->conf; + struct device *dev = ice_pf_to_dev(vf->pf); + enum virtchnl_status_code v_ret; + struct virtchnl_fdir_add *resp; + int ret, len, is_tun; + + v_ret = VIRTCHNL_STATUS_SUCCESS; + len = sizeof(*resp); + resp = kzalloc(len, GFP_KERNEL); + if (!resp) { + len = 0; + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + dev_dbg(dev, "VF %d: Alloc resp buf fail", vf->vf_id); + goto err_exit; + } + + if (!success) + goto err_exit; + + is_tun = 0; + resp->status = status; + resp->flow_id = conf->flow_id; + vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]++; + + ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret, + (u8 *)resp, len); + kfree(resp); + + dev_dbg(dev, "VF %d: flow_id:0x%X, FDIR %s success!\n", + vf->vf_id, conf->flow_id, + (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER) ? + "add" : "del"); + return ret; + +err_exit: + if (resp) + resp->status = status; + ice_vc_fdir_remove_entry(vf, conf, conf->flow_id); + devm_kfree(dev, conf); + + ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret, + (u8 *)resp, len); + kfree(resp); + return ret; +} + +/** + * ice_vc_del_fdir_fltr_post + * @vf: pointer to the VF structure + * @ctx: FDIR context info for post processing + * @status: virtchnl FDIR program status + * @success: true implies success, false implies failure + * + * Post process for flow director del command. If success, then do post process + * and send back success msg by virtchnl. Otherwise, do context reversion and + * send back failure msg by virtchnl. + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_del_fdir_fltr_post(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx, + enum virtchnl_fdir_prgm_status status, + bool success) +{ + struct virtchnl_fdir_fltr_conf *conf = ctx->conf; + struct device *dev = ice_pf_to_dev(vf->pf); + enum virtchnl_status_code v_ret; + struct virtchnl_fdir_del *resp; + int ret, len, is_tun; + + v_ret = VIRTCHNL_STATUS_SUCCESS; + len = sizeof(*resp); + resp = kzalloc(len, GFP_KERNEL); + if (!resp) { + len = 0; + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + dev_dbg(dev, "VF %d: Alloc resp buf fail", vf->vf_id); + goto err_exit; + } + + if (!success) + goto err_exit; + + is_tun = 0; + resp->status = status; + ice_vc_fdir_remove_entry(vf, conf, conf->flow_id); + vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]--; + + ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret, + (u8 *)resp, len); + kfree(resp); + + dev_dbg(dev, "VF %d: flow_id:0x%X, FDIR %s success!\n", + vf->vf_id, conf->flow_id, + (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER) ? + "add" : "del"); + devm_kfree(dev, conf); + return ret; + +err_exit: + if (resp) + resp->status = status; + if (success) + devm_kfree(dev, conf); + + ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret, + (u8 *)resp, len); + kfree(resp); + return ret; +} + +/** + * ice_flush_fdir_ctx + * @pf: pointer to the PF structure + * + * Flush all the pending event on ctx_done list and process them. + */ +void ice_flush_fdir_ctx(struct ice_pf *pf) +{ + int i; + + if (!test_and_clear_bit(ICE_FD_VF_FLUSH_CTX, pf->state)) + return; + + ice_for_each_vf(pf, i) { + struct device *dev = ice_pf_to_dev(pf); + enum virtchnl_fdir_prgm_status status; + struct ice_vf *vf = &pf->vf[i]; + struct ice_vf_fdir_ctx *ctx; + unsigned long flags; + int ret; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) + continue; + + if (vf->ctrl_vsi_idx == ICE_NO_VSI) + continue; + + ctx = &vf->fdir.ctx_done; + spin_lock_irqsave(&vf->fdir.ctx_lock, flags); + if (!(ctx->flags & ICE_VF_FDIR_CTX_VALID)) { + spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags); + continue; + } + spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags); + + WARN_ON(ctx->stat == ICE_FDIR_CTX_READY); + if (ctx->stat == ICE_FDIR_CTX_TIMEOUT) { + status = VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT; + dev_err(dev, "VF %d: ctrl_vsi irq timeout\n", + vf->vf_id); + goto err_exit; + } + + ret = ice_vf_verify_rx_desc(vf, ctx, &status); + if (ret) + goto err_exit; + + if (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER) + ice_vc_add_fdir_fltr_post(vf, ctx, status, true); + else if (ctx->v_opcode == VIRTCHNL_OP_DEL_FDIR_FILTER) + ice_vc_del_fdir_fltr_post(vf, ctx, status, true); + else + dev_err(dev, "VF %d: Unsupported opcode\n", vf->vf_id); + + spin_lock_irqsave(&vf->fdir.ctx_lock, flags); + ctx->flags &= ~ICE_VF_FDIR_CTX_VALID; + spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags); + continue; +err_exit: + if (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER) + ice_vc_add_fdir_fltr_post(vf, ctx, status, false); + else if (ctx->v_opcode == VIRTCHNL_OP_DEL_FDIR_FILTER) + ice_vc_del_fdir_fltr_post(vf, ctx, status, false); + else + dev_err(dev, "VF %d: Unsupported opcode\n", vf->vf_id); + + spin_lock_irqsave(&vf->fdir.ctx_lock, flags); + ctx->flags &= ~ICE_VF_FDIR_CTX_VALID; + spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags); + } +} + +/** + * ice_vc_fdir_set_irq_ctx - set FDIR context info for later IRQ handler + * @vf: pointer to the VF structure + * @conf: FDIR configuration for each filter + * @v_opcode: virtual channel operation code + * + * Return: 0 on success, and other on error. + */ +static int +ice_vc_fdir_set_irq_ctx(struct ice_vf *vf, struct virtchnl_fdir_fltr_conf *conf, + enum virtchnl_ops v_opcode) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vf_fdir_ctx *ctx; + unsigned long flags; + + ctx = &vf->fdir.ctx_irq; + spin_lock_irqsave(&vf->fdir.ctx_lock, flags); + if ((vf->fdir.ctx_irq.flags & ICE_VF_FDIR_CTX_VALID) || + (vf->fdir.ctx_done.flags & ICE_VF_FDIR_CTX_VALID)) { + spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags); + dev_dbg(dev, "VF %d: Last request is still in progress\n", + vf->vf_id); + return -EBUSY; + } + ctx->flags |= ICE_VF_FDIR_CTX_VALID; + spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags); + + ctx->conf = conf; + ctx->v_opcode = v_opcode; + ctx->stat = ICE_FDIR_CTX_READY; + timer_setup(&ctx->rx_tmr, ice_vf_fdir_timer, 0); + + mod_timer(&ctx->rx_tmr, round_jiffies(msecs_to_jiffies(10) + jiffies)); + + return 0; +} + +/** + * ice_vc_fdir_clear_irq_ctx - clear FDIR context info for IRQ handler + * @vf: pointer to the VF structure + * + * Return: 0 on success, and other on error. + */ +static void ice_vc_fdir_clear_irq_ctx(struct ice_vf *vf) +{ + struct ice_vf_fdir_ctx *ctx = &vf->fdir.ctx_irq; + unsigned long flags; + + del_timer(&ctx->rx_tmr); + spin_lock_irqsave(&vf->fdir.ctx_lock, flags); + ctx->flags &= ~ICE_VF_FDIR_CTX_VALID; + spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags); +} + +/** + * ice_vc_add_fdir_fltr - add a FDIR filter for VF by the msg buffer + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * Return: 0 on success, and other on error. + */ +int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_fdir_add *fltr = (struct virtchnl_fdir_add *)msg; + struct virtchnl_fdir_add *stat = NULL; + struct virtchnl_fdir_fltr_conf *conf; + enum virtchnl_status_code v_ret; + struct device *dev; + struct ice_pf *pf; + int is_tun = 0; + int len = 0; + int ret; + + pf = vf->pf; + dev = ice_pf_to_dev(pf); + ret = ice_vc_fdir_param_check(vf, fltr->vsi_id); + if (ret) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_dbg(dev, "Parameter check for VF %d failed\n", vf->vf_id); + goto err_exit; + } + + ret = ice_vf_start_ctrl_vsi(vf); + if (ret && (ret != -EEXIST)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_err(dev, "Init FDIR for VF %d failed, ret:%d\n", + vf->vf_id, ret); + goto err_exit; + } + + stat = kzalloc(sizeof(*stat), GFP_KERNEL); + if (!stat) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + dev_dbg(dev, "Alloc stat for VF %d failed\n", vf->vf_id); + goto err_exit; + } + + conf = devm_kzalloc(dev, sizeof(*conf), GFP_KERNEL); + if (!conf) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + dev_dbg(dev, "Alloc conf for VF %d failed\n", vf->vf_id); + goto err_exit; + } + + len = sizeof(*stat); + ret = ice_vc_validate_fdir_fltr(vf, fltr, conf); + if (ret) { + v_ret = VIRTCHNL_STATUS_SUCCESS; + stat->status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID; + dev_dbg(dev, "Invalid FDIR filter from VF %d\n", vf->vf_id); + goto err_free_conf; + } + + if (fltr->validate_only) { + v_ret = VIRTCHNL_STATUS_SUCCESS; + stat->status = VIRTCHNL_FDIR_SUCCESS; + devm_kfree(dev, conf); + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_FDIR_FILTER, + v_ret, (u8 *)stat, len); + goto exit; + } + + ret = ice_vc_fdir_config_input_set(vf, fltr, conf, is_tun); + if (ret) { + v_ret = VIRTCHNL_STATUS_SUCCESS; + stat->status = VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT; + dev_err(dev, "VF %d: FDIR input set configure failed, ret:%d\n", + vf->vf_id, ret); + goto err_free_conf; + } + + ret = ice_vc_fdir_is_dup_fltr(vf, conf); + if (ret) { + v_ret = VIRTCHNL_STATUS_SUCCESS; + stat->status = VIRTCHNL_FDIR_FAILURE_RULE_EXIST; + dev_dbg(dev, "VF %d: duplicated FDIR rule detected\n", + vf->vf_id); + goto err_free_conf; + } + + ret = ice_vc_fdir_insert_entry(vf, conf, &conf->flow_id); + if (ret) { + v_ret = VIRTCHNL_STATUS_SUCCESS; + stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; + dev_dbg(dev, "VF %d: insert FDIR list failed\n", vf->vf_id); + goto err_free_conf; + } + + ret = ice_vc_fdir_set_irq_ctx(vf, conf, VIRTCHNL_OP_ADD_FDIR_FILTER); + if (ret) { + v_ret = VIRTCHNL_STATUS_SUCCESS; + stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; + dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id); + goto err_free_conf; + } + + ret = ice_vc_fdir_write_fltr(vf, conf, true, is_tun); + if (ret) { + v_ret = VIRTCHNL_STATUS_SUCCESS; + stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; + dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n", + vf->vf_id, ret); + goto err_rem_entry; + } + +exit: + kfree(stat); + return ret; + +err_rem_entry: + ice_vc_fdir_clear_irq_ctx(vf); + ice_vc_fdir_remove_entry(vf, conf, conf->flow_id); +err_free_conf: + devm_kfree(dev, conf); +err_exit: + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_FDIR_FILTER, v_ret, + (u8 *)stat, len); + kfree(stat); + return ret; +} + +/** + * ice_vc_del_fdir_fltr - delete a FDIR filter for VF by the msg buffer + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * Return: 0 on success, and other on error. + */ +int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_fdir_del *fltr = (struct virtchnl_fdir_del *)msg; + struct virtchnl_fdir_del *stat = NULL; + struct virtchnl_fdir_fltr_conf *conf; + enum virtchnl_status_code v_ret; + struct device *dev; + struct ice_pf *pf; + int is_tun = 0; + int len = 0; + int ret; + + pf = vf->pf; + dev = ice_pf_to_dev(pf); + ret = ice_vc_fdir_param_check(vf, fltr->vsi_id); + if (ret) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_dbg(dev, "Parameter check for VF %d failed\n", vf->vf_id); + goto err_exit; + } + + stat = kzalloc(sizeof(*stat), GFP_KERNEL); + if (!stat) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + dev_dbg(dev, "Alloc stat for VF %d failed\n", vf->vf_id); + goto err_exit; + } + + len = sizeof(*stat); + + conf = ice_vc_fdir_lookup_entry(vf, fltr->flow_id); + if (!conf) { + v_ret = VIRTCHNL_STATUS_SUCCESS; + stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST; + dev_dbg(dev, "VF %d: FDIR invalid flow_id:0x%X\n", + vf->vf_id, fltr->flow_id); + goto err_exit; + } + + /* Just return failure when ctrl_vsi idx is invalid */ + if (vf->ctrl_vsi_idx == ICE_NO_VSI) { + v_ret = VIRTCHNL_STATUS_SUCCESS; + stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; + dev_err(dev, "Invalid FDIR ctrl_vsi for VF %d\n", vf->vf_id); + goto err_exit; + } + + ret = ice_vc_fdir_set_irq_ctx(vf, conf, VIRTCHNL_OP_DEL_FDIR_FILTER); + if (ret) { + v_ret = VIRTCHNL_STATUS_SUCCESS; + stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; + dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id); + goto err_exit; + } + + ret = ice_vc_fdir_write_fltr(vf, conf, false, is_tun); + if (ret) { + v_ret = VIRTCHNL_STATUS_SUCCESS; + stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE; + dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n", + vf->vf_id, ret); + goto err_del_tmr; + } + + kfree(stat); + + return ret; + +err_del_tmr: + ice_vc_fdir_clear_irq_ctx(vf); +err_exit: + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_FDIR_FILTER, v_ret, + (u8 *)stat, len); + kfree(stat); + return ret; +} + +/** + * ice_vf_fdir_init - init FDIR resource for VF + * @vf: pointer to the VF info + */ +void ice_vf_fdir_init(struct ice_vf *vf) +{ + struct ice_vf_fdir *fdir = &vf->fdir; + + idr_init(&fdir->fdir_rule_idr); + INIT_LIST_HEAD(&fdir->fdir_rule_list); + + spin_lock_init(&fdir->ctx_lock); + fdir->ctx_irq.flags = 0; + fdir->ctx_done.flags = 0; +} + +/** + * ice_vf_fdir_exit - destroy FDIR resource for VF + * @vf: pointer to the VF info + */ +void ice_vf_fdir_exit(struct ice_vf *vf) +{ + ice_vc_fdir_flush_entry(vf); + idr_destroy(&vf->fdir.fdir_rule_idr); + ice_vc_fdir_rem_prof_all(vf); + ice_vc_fdir_free_prof_all(vf); +} diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h new file mode 100644 index 000000000000..f4e629f4c09b --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021, Intel Corporation. */ + +#ifndef _ICE_VIRTCHNL_FDIR_H_ +#define _ICE_VIRTCHNL_FDIR_H_ + +struct ice_vf; +struct ice_pf; + +enum ice_fdir_ctx_stat { + ICE_FDIR_CTX_READY, + ICE_FDIR_CTX_IRQ, + ICE_FDIR_CTX_TIMEOUT, +}; + +struct ice_vf_fdir_ctx { + struct timer_list rx_tmr; + enum virtchnl_ops v_opcode; + enum ice_fdir_ctx_stat stat; + union ice_32b_rx_flex_desc rx_desc; +#define ICE_VF_FDIR_CTX_VALID BIT(0) + u32 flags; + + void *conf; +}; + +/* VF FDIR information structure */ +struct ice_vf_fdir { + u16 fdir_fltr_cnt[ICE_FLTR_PTYPE_MAX][ICE_FD_HW_SEG_MAX]; + int prof_entry_cnt[ICE_FLTR_PTYPE_MAX][ICE_FD_HW_SEG_MAX]; + struct ice_fd_hw_prof **fdir_prof; + + struct idr fdir_rule_idr; + struct list_head fdir_rule_list; + + spinlock_t ctx_lock; /* protects FDIR context info */ + struct ice_vf_fdir_ctx ctx_irq; + struct ice_vf_fdir_ctx ctx_done; +}; + +#ifdef CONFIG_PCI_IOV +int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg); +int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg); +void ice_vf_fdir_init(struct ice_vf *vf); +void ice_vf_fdir_exit(struct ice_vf *vf); +void +ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi, + union ice_32b_rx_flex_desc *rx_desc); +void ice_flush_fdir_ctx(struct ice_pf *pf); +#else +static inline void +ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi, union ice_32b_rx_flex_desc *rx_desc) { } +static inline void ice_flush_fdir_ctx(struct ice_pf *pf) { } +#endif /* CONFIG_PCI_IOV */ +#endif /* _ICE_VIRTCHNL_FDIR_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 1f38a8d0c525..a1d22d2aa0bd 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -5,6 +5,256 @@ #include "ice_base.h" #include "ice_lib.h" #include "ice_fltr.h" +#include "ice_flow.h" +#include "ice_virtchnl_allowlist.h" + +#define FIELD_SELECTOR(proto_hdr_field) \ + BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK) + +struct ice_vc_hdr_match_type { + u32 vc_hdr; /* virtchnl headers (VIRTCHNL_PROTO_HDR_XXX) */ + u32 ice_hdr; /* ice headers (ICE_FLOW_SEG_HDR_XXX) */ +}; + +static const struct ice_vc_hdr_match_type ice_vc_hdr_list_os[] = { + {VIRTCHNL_PROTO_HDR_NONE, ICE_FLOW_SEG_HDR_NONE}, + {VIRTCHNL_PROTO_HDR_IPV4, ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER}, + {VIRTCHNL_PROTO_HDR_IPV6, ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER}, + {VIRTCHNL_PROTO_HDR_TCP, ICE_FLOW_SEG_HDR_TCP}, + {VIRTCHNL_PROTO_HDR_UDP, ICE_FLOW_SEG_HDR_UDP}, + {VIRTCHNL_PROTO_HDR_SCTP, ICE_FLOW_SEG_HDR_SCTP}, +}; + +static const struct ice_vc_hdr_match_type ice_vc_hdr_list_comms[] = { + {VIRTCHNL_PROTO_HDR_NONE, ICE_FLOW_SEG_HDR_NONE}, + {VIRTCHNL_PROTO_HDR_ETH, ICE_FLOW_SEG_HDR_ETH}, + {VIRTCHNL_PROTO_HDR_S_VLAN, ICE_FLOW_SEG_HDR_VLAN}, + {VIRTCHNL_PROTO_HDR_C_VLAN, ICE_FLOW_SEG_HDR_VLAN}, + {VIRTCHNL_PROTO_HDR_IPV4, ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER}, + {VIRTCHNL_PROTO_HDR_IPV6, ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER}, + {VIRTCHNL_PROTO_HDR_TCP, ICE_FLOW_SEG_HDR_TCP}, + {VIRTCHNL_PROTO_HDR_UDP, ICE_FLOW_SEG_HDR_UDP}, + {VIRTCHNL_PROTO_HDR_SCTP, ICE_FLOW_SEG_HDR_SCTP}, + {VIRTCHNL_PROTO_HDR_PPPOE, ICE_FLOW_SEG_HDR_PPPOE}, + {VIRTCHNL_PROTO_HDR_GTPU_IP, ICE_FLOW_SEG_HDR_GTPU_IP}, + {VIRTCHNL_PROTO_HDR_GTPU_EH, ICE_FLOW_SEG_HDR_GTPU_EH}, + {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN, + ICE_FLOW_SEG_HDR_GTPU_DWN}, + {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP, + ICE_FLOW_SEG_HDR_GTPU_UP}, + {VIRTCHNL_PROTO_HDR_L2TPV3, ICE_FLOW_SEG_HDR_L2TPV3}, + {VIRTCHNL_PROTO_HDR_ESP, ICE_FLOW_SEG_HDR_ESP}, + {VIRTCHNL_PROTO_HDR_AH, ICE_FLOW_SEG_HDR_AH}, + {VIRTCHNL_PROTO_HDR_PFCP, ICE_FLOW_SEG_HDR_PFCP_SESSION}, +}; + +struct ice_vc_hash_field_match_type { + u32 vc_hdr; /* virtchnl headers + * (VIRTCHNL_PROTO_HDR_XXX) + */ + u32 vc_hash_field; /* virtchnl hash fields selector + * FIELD_SELECTOR((VIRTCHNL_PROTO_HDR_ETH_XXX)) + */ + u64 ice_hash_field; /* ice hash fields + * (BIT_ULL(ICE_FLOW_FIELD_IDX_XXX)) + */ +}; + +static const struct +ice_vc_hash_field_match_type ice_vc_hash_field_list_os[] = { + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), + ICE_FLOW_HASH_IPV4}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), + ICE_FLOW_HASH_IPV6}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), + ICE_FLOW_HASH_TCP_PORT}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), + ICE_FLOW_HASH_UDP_PORT}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), + ICE_FLOW_HASH_SCTP_PORT}, +}; + +static const struct +ice_vc_hash_field_match_type ice_vc_hash_field_list_comms[] = { + {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA)}, + {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA)}, + {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST), + ICE_FLOW_HASH_ETH}, + {VIRTCHNL_PROTO_HDR_ETH, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE), + BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_TYPE)}, + {VIRTCHNL_PROTO_HDR_S_VLAN, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_S_VLAN_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_S_VLAN)}, + {VIRTCHNL_PROTO_HDR_C_VLAN, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_C_VLAN_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_C_VLAN)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), + ICE_FLOW_HASH_IPV4}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), + ICE_FLOW_HASH_IPV6}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), + ICE_FLOW_HASH_TCP_PORT}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), + ICE_FLOW_HASH_UDP_PORT}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), + ICE_FLOW_HASH_SCTP_PORT}, + {VIRTCHNL_PROTO_HDR_PPPOE, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)}, + {VIRTCHNL_PROTO_HDR_GTPU_IP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_GTPU_IP_TEID), + BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID)}, + {VIRTCHNL_PROTO_HDR_L2TPV3, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID)}, + {VIRTCHNL_PROTO_HDR_ESP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ESP_SPI), + BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)}, + {VIRTCHNL_PROTO_HDR_AH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_AH_SPI), + BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)}, + {VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PFCP_SEID), + BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)}, +}; + +/** + * ice_get_vf_vsi - get VF's VSI based on the stored index + * @vf: VF used to get VSI + */ +static struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf) +{ + return vf->pf->vsi[vf->lan_vsi_idx]; +} /** * ice_validate_vf_id - helper to check if VF ID is valid @@ -197,11 +447,30 @@ static void ice_vf_invalidate_vsi(struct ice_vf *vf) */ static void ice_vf_vsi_release(struct ice_vf *vf) { - ice_vsi_release(vf->pf->vsi[vf->lan_vsi_idx]); + ice_vsi_release(ice_get_vf_vsi(vf)); ice_vf_invalidate_vsi(vf); } /** + * ice_vf_ctrl_invalidate_vsi - invalidate ctrl_vsi_idx to remove VSI access + * @vf: VF that control VSI is being invalidated on + */ +static void ice_vf_ctrl_invalidate_vsi(struct ice_vf *vf) +{ + vf->ctrl_vsi_idx = ICE_NO_VSI; +} + +/** + * ice_vf_ctrl_vsi_release - invalidate the VF's control VSI after freeing it + * @vf: VF that control VSI is being released on + */ +static void ice_vf_ctrl_vsi_release(struct ice_vf *vf) +{ + ice_vsi_release(vf->pf->vsi[vf->ctrl_vsi_idx]); + ice_vf_ctrl_invalidate_vsi(vf); +} + +/** * ice_free_vf_res - Free a VF's resources * @vf: pointer to the VF info */ @@ -214,6 +483,10 @@ static void ice_free_vf_res(struct ice_vf *vf) * accessing the VF's VSI after it's freed or invalidated. */ clear_bit(ICE_VF_STATE_INIT, vf->vf_states); + ice_vf_fdir_exit(vf); + /* free VF control VSI */ + if (vf->ctrl_vsi_idx != ICE_NO_VSI) + ice_vf_ctrl_vsi_release(vf); /* free VSI and disconnect it from the parent uplink */ if (vf->lan_vsi_idx != ICE_NO_VSI) { @@ -250,7 +523,7 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) struct ice_hw *hw; hw = &pf->hw; - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); dev = ice_pf_to_dev(pf); wr32(hw, VPINT_ALLOC(vf->vf_id), 0); @@ -325,10 +598,7 @@ void ice_set_vf_state_qs_dis(struct ice_vf *vf) */ static void ice_dis_vf_qs(struct ice_vf *vf) { - struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - - vsi = pf->vsi[vf->lan_vsi_idx]; + struct ice_vsi *vsi = ice_get_vf_vsi(vf); ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id); ice_vsi_stop_all_rx_rings(vsi); @@ -348,7 +618,7 @@ void ice_free_vfs(struct ice_pf *pf) if (!pf->vf) return; - while (test_and_set_bit(__ICE_VF_DIS, pf->state)) + while (test_and_set_bit(ICE_VF_DIS, pf->state)) usleep_range(1000, 2000); /* Disable IOV before freeing resources. This lets any VF drivers @@ -401,7 +671,15 @@ void ice_free_vfs(struct ice_pf *pf) wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); } } - clear_bit(__ICE_VF_DIS, pf->state); + + /* clear malicious info if the VFs are getting released */ + for (i = 0; i < tmp; i++) + if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, + ICE_MAX_VF_COUNT, i)) + dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", + i); + + clear_bit(ICE_VF_DIS, pf->state); clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags); } @@ -560,6 +838,28 @@ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf) } /** + * ice_vf_ctrl_vsi_setup - Set up a VF control VSI + * @vf: VF to setup control VSI for + * + * Returns pointer to the successfully allocated VSI struct on success, + * otherwise returns NULL on failure. + */ +struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) +{ + struct ice_port_info *pi = ice_vf_get_port_info(vf); + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + + vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id); + if (!vsi) { + dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n"); + ice_vf_ctrl_invalidate_vsi(vf); + } + + return vsi; +} + +/** * ice_calc_vf_first_vector_idx - Calculate MSIX vector index in the PF space * @pf: pointer to PF structure * @vf: pointer to VF that the first MSIX vector index is being calculated for @@ -585,8 +885,8 @@ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf) */ static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf) { - struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); u16 vlan_id = 0; int err; @@ -622,8 +922,8 @@ static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf) */ static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf) { - struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); enum ice_status status; u8 broadcast[ETH_ALEN]; @@ -724,8 +1024,8 @@ static void ice_ena_vf_msix_mappings(struct ice_vf *vf) */ static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq) { - struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); struct ice_hw *hw = &vf->pf->hw; u32 reg; @@ -772,7 +1072,7 @@ static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq) */ static void ice_ena_vf_mappings(struct ice_vf *vf) { - struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; + struct ice_vsi *vsi = ice_get_vf_vsi(vf); ice_ena_vf_msix_mappings(vf); ice_ena_vf_q_mappings(vf, vsi->alloc_txq, vsi->alloc_rxq); @@ -1035,7 +1335,7 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m, static void ice_vf_clear_counters(struct ice_vf *vf) { - struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; + struct ice_vsi *vsi = ice_get_vf_vsi(vf); vf->num_mac = 0; vsi->num_vlan = 0; @@ -1095,8 +1395,8 @@ static void ice_vf_rebuild_aggregator_node_cfg(struct ice_vsi *vsi) */ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf) { - struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); ice_vf_set_host_trust_cfg(vf); @@ -1136,10 +1436,8 @@ static int ice_vf_rebuild_vsi_with_release(struct ice_vf *vf) */ static int ice_vf_rebuild_vsi(struct ice_vf *vf) { + struct ice_vsi *vsi = ice_get_vf_vsi(vf); struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - - vsi = pf->vsi[vf->lan_vsi_idx]; if (ice_vsi_rebuild(vsi, true)) { dev_err(ice_pf_to_dev(pf), "failed to rebuild VF %d VSI\n", @@ -1212,8 +1510,13 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) if (!pf->num_alloc_vfs) return false; + /* clear all malicious info if the VFs are getting reset */ + ice_for_each_vf(pf, i) + if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, ICE_MAX_VF_COUNT, i)) + dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", i); + /* If VFs have been disabled, there is no need to reset */ - if (test_and_set_bit(__ICE_VF_DIS, pf->state)) + if (test_and_set_bit(ICE_VF_DIS, pf->state)) return false; /* Begin reset on all VFs at once */ @@ -1256,13 +1559,23 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) ice_for_each_vf(pf, v) { vf = &pf->vf[v]; + vf->driver_caps = 0; + ice_vc_set_default_allowlist(vf); + + ice_vf_fdir_exit(vf); + /* clean VF control VSI when resetting VFs since it should be + * setup only when VF creates its first FDIR rule. + */ + if (vf->ctrl_vsi_idx != ICE_NO_VSI) + ice_vf_ctrl_invalidate_vsi(vf); + ice_vf_pre_vsi_rebuild(vf); ice_vf_rebuild_vsi(vf); ice_vf_post_vsi_rebuild(vf); } ice_flush(hw); - clear_bit(__ICE_VF_DIS, pf->state); + clear_bit(ICE_VF_DIS, pf->state); return true; } @@ -1282,7 +1595,7 @@ static bool ice_is_vf_disabled(struct ice_vf *vf) * means something else is resetting the VF, so we shouldn't continue. * Otherwise, set disable VF state bit for actual reset, and continue. */ - return (test_bit(__ICE_VF_DIS, pf->state) || + return (test_bit(ICE_VF_DIS, pf->state) || test_bit(ICE_VF_STATE_DIS, vf->vf_states)); } @@ -1307,7 +1620,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) dev = ice_pf_to_dev(pf); - if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) { + if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) { dev_dbg(dev, "Trying to reset VF %d, but all VF resets are disabled\n", vf->vf_id); return true; @@ -1323,7 +1636,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) set_bit(ICE_VF_STATE_DIS, vf->vf_states); ice_trigger_vf_reset(vf, is_vflr, false); - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) ice_dis_vf_qs(vf); @@ -1353,6 +1666,9 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) usleep_range(10, 20); } + vf->driver_caps = 0; + ice_vc_set_default_allowlist(vf); + /* Display a warning if VF didn't manage to reset in time, but need to * continue on with the operation. */ @@ -1369,15 +1685,26 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) else promisc_m = ICE_UCAST_PROMISC_BITS; - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (ice_vf_set_vsi_promisc(vf, vsi, promisc_m, true)) dev_err(dev, "disabling promiscuous mode failed\n"); } + ice_vf_fdir_exit(vf); + /* clean VF control VSI when resetting VF since it should be setup + * only when VF creates its first FDIR rule. + */ + if (vf->ctrl_vsi_idx != ICE_NO_VSI) + ice_vf_ctrl_vsi_release(vf); + ice_vf_pre_vsi_rebuild(vf); ice_vf_rebuild_vsi_with_release(vf); ice_vf_post_vsi_rebuild(vf); + /* if the VF has been reset allow it to come up again */ + if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, ICE_MAX_VF_COUNT, vf->vf_id)) + dev_dbg(dev, "failed to clear malicious VF state for VF %u\n", i); + return true; } @@ -1532,7 +1859,7 @@ teardown: } /** - * ice_set_dflt_settings - set VF defaults during initialization/creation + * ice_set_dflt_settings_vfs - set VF defaults during initialization/creation * @pf: PF holding reference to all VFs for default configuration */ static void ice_set_dflt_settings_vfs(struct ice_pf *pf) @@ -1549,6 +1876,13 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf) set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps); vf->spoofchk = true; vf->num_vf_qs = pf->num_qps_per_vf; + ice_vc_set_default_allowlist(vf); + + /* ctrl_vsi_idx will be set to a valid value only when VF + * creates its first fdir rule. + */ + ice_vf_ctrl_invalidate_vsi(vf); + ice_vf_fdir_init(vf); } } @@ -1586,7 +1920,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) /* Disable global interrupt 0 so we don't try to handle the VFLR. */ wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); - set_bit(__ICE_OICR_INTR_DIS, pf->state); + set_bit(ICE_OICR_INTR_DIS, pf->state); ice_flush(hw); ret = pci_enable_sriov(pf->pdev, num_vfs); @@ -1614,7 +1948,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) goto err_unroll_sriov; } - clear_bit(__ICE_VF_DIS, pf->state); + clear_bit(ICE_VF_DIS, pf->state); return 0; err_unroll_sriov: @@ -1626,7 +1960,7 @@ err_pci_disable_sriov: err_unroll_intr: /* rearm interrupts here */ ice_irq_dynamic_ena(hw, NULL, NULL); - clear_bit(__ICE_OICR_INTR_DIS, pf->state); + clear_bit(ICE_OICR_INTR_DIS, pf->state); return ret; } @@ -1704,6 +2038,7 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) { struct ice_pf *pf = pci_get_drvdata(pdev); struct device *dev = ice_pf_to_dev(pf); + enum ice_status status; int err; err = ice_check_sriov_allowed(pf); @@ -1712,6 +2047,7 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!num_vfs) { if (!pci_vfs_assigned(pdev)) { + ice_mbx_deinit_snapshot(&pf->hw); ice_free_vfs(pf); if (pf->lag) ice_enable_lag(pf->lag); @@ -1722,9 +2058,15 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) return -EBUSY; } + status = ice_mbx_init_snapshot(&pf->hw, num_vfs); + if (status) + return ice_status_to_errno(status); + err = ice_pci_sriov_ena(pf, num_vfs); - if (err) + if (err) { + ice_mbx_deinit_snapshot(&pf->hw); return err; + } if (pf->lag) ice_disable_lag(pf->lag); @@ -1744,7 +2086,7 @@ void ice_process_vflr_event(struct ice_pf *pf) unsigned int vf_id; u32 reg; - if (!test_and_clear_bit(__ICE_VFLR_EVENT_PENDING, pf->state) || + if (!test_and_clear_bit(ICE_VFLR_EVENT_PENDING, pf->state) || !pf->num_alloc_vfs) return; @@ -1789,7 +2131,7 @@ static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq) struct ice_vsi *vsi; u16 rxq_idx; - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); ice_for_each_rxq(vsi, rxq_idx) if (vsi->rxq_map[rxq_idx] == pfq) @@ -1848,7 +2190,7 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) * * send msg to VF */ -static int +int ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, enum virtchnl_status_code v_retval, u8 *msg, u16 msglen) { @@ -1929,8 +2271,7 @@ static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg) */ static u16 ice_vc_get_max_frame_size(struct ice_vf *vf) { - struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; - struct ice_port_info *pi = vsi->port_info; + struct ice_port_info *pi = ice_vf_get_port_info(vf); u16 max_frame_size; max_frame_size = pi->phy.link_info.max_frame_size; @@ -1978,7 +2319,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) VIRTCHNL_VF_OFFLOAD_VLAN; vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2; - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto err; @@ -1996,6 +2337,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG; } + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_FDIR_PF; + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2; @@ -2017,6 +2361,12 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED) vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED; + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF; + + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_USO) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_USO; + vfres->num_vsis = 1; /* Tx and Rx queue are equal for VF */ vfres->num_queue_pairs = vsi->num_txq; @@ -2034,6 +2384,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) /* match guest capabilities */ vf->driver_caps = vfres->vf_cap_flags; + ice_vc_set_caps_allowlist(vf); + ice_vc_set_working_allowlist(vf); + set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states); err: @@ -2084,7 +2437,7 @@ static struct ice_vsi *ice_find_vsi_from_id(struct ice_pf *pf, u16 id) * * check for the valid VSI ID */ -static bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) +bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) { struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; @@ -2125,6 +2478,222 @@ static bool ice_vc_isvalid_ring_len(u16 ring_len) } /** + * ice_vc_parse_rss_cfg - parses hash fields and headers from + * a specific virtchnl RSS cfg + * @hw: pointer to the hardware + * @rss_cfg: pointer to the virtchnl RSS cfg + * @addl_hdrs: pointer to the protocol header fields (ICE_FLOW_SEG_HDR_*) + * to configure + * @hash_flds: pointer to the hash bit fields (ICE_FLOW_HASH_*) to configure + * + * Return true if all the protocol header and hash fields in the RSS cfg could + * be parsed, else return false + * + * This function parses the virtchnl RSS cfg to be the intended + * hash fields and the intended header for RSS configuration + */ +static bool +ice_vc_parse_rss_cfg(struct ice_hw *hw, struct virtchnl_rss_cfg *rss_cfg, + u32 *addl_hdrs, u64 *hash_flds) +{ + const struct ice_vc_hash_field_match_type *hf_list; + const struct ice_vc_hdr_match_type *hdr_list; + int i, hf_list_len, hdr_list_len; + + if (!strncmp(hw->active_pkg_name, "ICE COMMS Package", + sizeof(hw->active_pkg_name))) { + hf_list = ice_vc_hash_field_list_comms; + hf_list_len = ARRAY_SIZE(ice_vc_hash_field_list_comms); + hdr_list = ice_vc_hdr_list_comms; + hdr_list_len = ARRAY_SIZE(ice_vc_hdr_list_comms); + } else { + hf_list = ice_vc_hash_field_list_os; + hf_list_len = ARRAY_SIZE(ice_vc_hash_field_list_os); + hdr_list = ice_vc_hdr_list_os; + hdr_list_len = ARRAY_SIZE(ice_vc_hdr_list_os); + } + + for (i = 0; i < rss_cfg->proto_hdrs.count; i++) { + struct virtchnl_proto_hdr *proto_hdr = + &rss_cfg->proto_hdrs.proto_hdr[i]; + bool hdr_found = false; + int j; + + /* Find matched ice headers according to virtchnl headers. */ + for (j = 0; j < hdr_list_len; j++) { + struct ice_vc_hdr_match_type hdr_map = hdr_list[j]; + + if (proto_hdr->type == hdr_map.vc_hdr) { + *addl_hdrs |= hdr_map.ice_hdr; + hdr_found = true; + } + } + + if (!hdr_found) + return false; + + /* Find matched ice hash fields according to + * virtchnl hash fields. + */ + for (j = 0; j < hf_list_len; j++) { + struct ice_vc_hash_field_match_type hf_map = hf_list[j]; + + if (proto_hdr->type == hf_map.vc_hdr && + proto_hdr->field_selector == hf_map.vc_hash_field) { + *hash_flds |= hf_map.ice_hash_field; + break; + } + } + } + + return true; +} + +/** + * ice_vf_adv_rss_offload_ena - determine if capabilities support advanced + * RSS offloads + * @caps: VF driver negotiated capabilities + * + * Return true if VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF capability is set, + * else return false + */ +static bool ice_vf_adv_rss_offload_ena(u32 caps) +{ + return !!(caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF); +} + +/** + * ice_vc_handle_rss_cfg + * @vf: pointer to the VF info + * @msg: pointer to the message buffer + * @add: add a RSS config if true, otherwise delete a RSS config + * + * This function adds/deletes a RSS config + */ +static int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) +{ + u32 v_opcode = add ? VIRTCHNL_OP_ADD_RSS_CFG : VIRTCHNL_OP_DEL_RSS_CFG; + struct virtchnl_rss_cfg *rss_cfg = (struct virtchnl_rss_cfg *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_hw *hw = &vf->pf->hw; + struct ice_vsi *vsi; + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + dev_dbg(dev, "VF %d attempting to configure RSS, but RSS is not supported by the PF\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + goto error_param; + } + + if (!ice_vf_adv_rss_offload_ena(vf->driver_caps)) { + dev_dbg(dev, "VF %d attempting to configure RSS, but Advanced RSS offload is not supported\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (rss_cfg->proto_hdrs.count > VIRTCHNL_MAX_NUM_PROTO_HDRS || + rss_cfg->rss_algorithm < VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC || + rss_cfg->rss_algorithm > VIRTCHNL_RSS_ALG_XOR_SYMMETRIC) { + dev_dbg(dev, "VF %d attempting to configure RSS, but RSS configuration is not valid\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) { + struct ice_vsi_ctx *ctx; + enum ice_status status; + u8 lut_type, hash_type; + + lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; + hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_XOR : + ICE_AQ_VSI_Q_OPT_RSS_TPLZ; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + goto error_param; + } + + ctx->info.q_opt_rss = ((lut_type << + ICE_AQ_VSI_Q_OPT_RSS_LUT_S) & + ICE_AQ_VSI_Q_OPT_RSS_LUT_M) | + (hash_type & + ICE_AQ_VSI_Q_OPT_RSS_HASH_M); + + /* Preserve existing queueing option setting */ + ctx->info.q_opt_rss |= (vsi->info.q_opt_rss & + ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M); + ctx->info.q_opt_tc = vsi->info.q_opt_tc; + ctx->info.q_opt_flags = vsi->info.q_opt_rss; + + ctx->info.valid_sections = + cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); + + status = ice_update_vsi(hw, vsi->idx, ctx, NULL); + if (status) { + dev_err(dev, "update VSI for RSS failed, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + } else { + vsi->info.q_opt_rss = ctx->info.q_opt_rss; + } + + kfree(ctx); + } else { + u32 addl_hdrs = ICE_FLOW_SEG_HDR_NONE; + u64 hash_flds = ICE_HASH_INVALID; + + if (!ice_vc_parse_rss_cfg(hw, rss_cfg, &addl_hdrs, + &hash_flds)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (add) { + if (ice_add_rss_cfg(hw, vsi->idx, hash_flds, + addl_hdrs)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_err(dev, "ice_add_rss_cfg failed for vsi = %d, v_ret = %d\n", + vsi->vsi_num, v_ret); + } + } else { + enum ice_status status; + + status = ice_rem_rss_cfg(hw, vsi->idx, hash_flds, + addl_hdrs); + /* We just ignore ICE_ERR_DOES_NOT_EXIST, because + * if two configurations share the same profile remove + * one of them actually removes both, since the + * profile is deleted. + */ + if (status && status != ICE_ERR_DOES_NOT_EXIST) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_err(dev, "ice_rem_rss_cfg failed for VF ID:%d, error:%s\n", + vf->vf_id, ice_stat_str(status)); + } + } + } + +error_param: + return ice_vc_send_msg_to_vf(vf, v_opcode, v_ret, NULL, 0); +} + +/** * ice_vc_config_rss_key * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -2136,7 +2705,6 @@ static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg) enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg; - struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { @@ -2159,13 +2727,13 @@ static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg) goto error_param; } - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if (ice_set_rss(vsi, vrk->key, NULL, 0)) + if (ice_set_rss_key(vsi, vrk->key)) v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; error_param: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret, @@ -2183,7 +2751,6 @@ static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg) { struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { @@ -2206,13 +2773,13 @@ static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg) goto error_param; } - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - if (ice_set_rss(vsi, NULL, vrl->lut, ICE_VSIQF_HLUT_ARRAY_SIZE)) + if (ice_set_rss_lut(vsi, vrl->lut, ICE_VSIQF_HLUT_ARRAY_SIZE)) v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; error_param: return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret, @@ -2289,7 +2856,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) if (ret) return ret; - vf_vsi = pf->vsi[vf->lan_vsi_idx]; + vf_vsi = ice_get_vf_vsi(vf); if (!vf_vsi) { netdev_err(netdev, "VSI %d for VF %d is null\n", vf->lan_vsi_idx, vf->vf_id); @@ -2394,7 +2961,7 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2530,7 +3097,6 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg) struct virtchnl_queue_select *vqs = (struct virtchnl_queue_select *)msg; struct ice_eth_stats stats = { 0 }; - struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { @@ -2543,7 +3109,7 @@ static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2633,7 +3199,6 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_queue_select *vqs = (struct virtchnl_queue_select *)msg; - struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; unsigned long q_map; u16 vf_q_id; @@ -2653,7 +3218,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2685,7 +3250,6 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) set_bit(vf_q_id, vf->rxq_ena); } - vsi = pf->vsi[vf->lan_vsi_idx]; q_map = vqs->tx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { @@ -2724,7 +3288,6 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; struct virtchnl_queue_select *vqs = (struct virtchnl_queue_select *)msg; - struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; unsigned long q_map; u16 vf_q_id; @@ -2745,7 +3308,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2910,7 +3473,7 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2987,7 +3550,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) goto error_param; } - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -3222,7 +3785,7 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set) goto handle_mac_exit; } - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto handle_mac_exit; @@ -3454,7 +4017,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) } hw = &pf->hw; - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -3621,7 +4184,6 @@ static int ice_vc_remove_vlan_msg(struct ice_vf *vf, u8 *msg) static int ice_vc_ena_vlan_stripping(struct ice_vf *vf) { enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { @@ -3634,7 +4196,7 @@ static int ice_vc_ena_vlan_stripping(struct ice_vf *vf) goto error_param; } - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (ice_vsi_manage_vlan_stripping(vsi, true)) v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -3652,7 +4214,6 @@ error_param: static int ice_vc_dis_vlan_stripping(struct ice_vf *vf) { enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { @@ -3665,7 +4226,7 @@ static int ice_vc_dis_vlan_stripping(struct ice_vf *vf) goto error_param; } - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -3691,7 +4252,7 @@ error_param: */ static int ice_vf_init_vlan_stripping(struct ice_vf *vf) { - struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; + struct ice_vsi *vsi = ice_get_vf_vsi(vf); if (!vsi) return -EINVAL; @@ -3747,6 +4308,13 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) err = -EINVAL; } + if (!ice_vc_is_opcode_allowed(vf, v_opcode)) { + ice_vc_send_msg_to_vf(vf, v_opcode, + VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL, + 0); + return; + } + error_handler: if (err) { ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM, @@ -3816,6 +4384,18 @@ error_handler: case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: err = ice_vc_dis_vlan_stripping(vf); break; + case VIRTCHNL_OP_ADD_FDIR_FILTER: + err = ice_vc_add_fdir_fltr(vf, msg); + break; + case VIRTCHNL_OP_DEL_FDIR_FILTER: + err = ice_vc_del_fdir_fltr(vf, msg); + break; + case VIRTCHNL_OP_ADD_RSS_CFG: + err = ice_vc_handle_rss_cfg(vf, msg, true); + break; + case VIRTCHNL_OP_DEL_RSS_CFG: + err = ice_vc_handle_rss_cfg(vf, msg, false); + break; case VIRTCHNL_OP_UNKNOWN: default: dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode, @@ -4066,7 +4646,7 @@ int ice_get_vf_stats(struct net_device *netdev, int vf_id, if (ret) return ret; - vsi = pf->vsi[vf->lan_vsi_idx]; + vsi = ice_get_vf_vsi(vf); if (!vsi) return -EINVAL; @@ -4108,7 +4688,7 @@ void ice_print_vf_rx_mdd_event(struct ice_vf *vf) } /** - * ice_print_vfs_mdd_event - print VFs malicious driver detect event + * ice_print_vfs_mdd_events - print VFs malicious driver detect event * @pf: pointer to the PF structure * * Called from ice_handle_mdd_event to rate limit and print VFs MDD events. @@ -4120,7 +4700,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) int i; /* check that there are pending MDD events to print */ - if (!test_and_clear_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state)) + if (!test_and_clear_bit(ICE_MDD_VF_PRINT_PENDING, pf->state)) return; /* VF MDD event logs are rate limited to one second intervals */ @@ -4160,7 +4740,6 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf) */ void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { - struct pci_dev *vfdev; u16 vf_id; int pos; @@ -4169,6 +4748,8 @@ void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); if (pos) { + struct pci_dev *vfdev; + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id); vfdev = pci_get_device(pdev->vendor, vf_id, NULL); @@ -4180,3 +4761,70 @@ void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) } } } + +/** + * ice_is_malicious_vf - helper function to detect a malicious VF + * @pf: ptr to struct ice_pf + * @event: pointer to the AQ event + * @num_msg_proc: the number of messages processed so far + * @num_msg_pending: the number of messages peinding in admin queue + */ +bool +ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, + u16 num_msg_proc, u16 num_msg_pending) +{ + s16 vf_id = le16_to_cpu(event->desc.retval); + struct device *dev = ice_pf_to_dev(pf); + struct ice_mbx_data mbxdata; + enum ice_status status; + bool malvf = false; + struct ice_vf *vf; + + if (ice_validate_vf_id(pf, vf_id)) + return false; + + vf = &pf->vf[vf_id]; + /* Check if VF is disabled. */ + if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) + return false; + + mbxdata.num_msg_proc = num_msg_proc; + mbxdata.num_pending_arq = num_msg_pending; + mbxdata.max_num_msgs_mbx = pf->hw.mailboxq.num_rq_entries; +#define ICE_MBX_OVERFLOW_WATERMARK 64 + mbxdata.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK; + + /* check to see if we have a malicious VF */ + status = ice_mbx_vf_state_handler(&pf->hw, &mbxdata, vf_id, &malvf); + if (status) + return false; + + if (malvf) { + bool report_vf = false; + + /* if the VF is malicious and we haven't let the user + * know about it, then let them know now + */ + status = ice_mbx_report_malvf(&pf->hw, pf->malvfs, + ICE_MAX_VF_COUNT, vf_id, + &report_vf); + if (status) + dev_dbg(dev, "Error reporting malicious VF\n"); + + if (report_vf) { + struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); + + if (pf_vsi) + dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n", + &vf->dflt_lan_addr.addr[0], + pf_vsi->netdev->dev_addr); + } + + return true; + } + + /* if there was an error in detection or the VF is not malicious then + * return false + */ + return false; +} diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 0f519fba3770..d800ed83d6c3 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -4,6 +4,7 @@ #ifndef _ICE_VIRTCHNL_PF_H_ #define _ICE_VIRTCHNL_PF_H_ #include "ice.h" +#include "ice_virtchnl_fdir.h" /* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */ #define ICE_MAX_VLAN_PER_VF 8 @@ -70,6 +71,8 @@ struct ice_vf { u16 vf_id; /* VF ID in the PF space */ u16 lan_vsi_idx; /* index into PF struct */ + u16 ctrl_vsi_idx; + struct ice_vf_fdir fdir; /* first vector index of this VF in the PF space */ int first_vector_idx; struct ice_sw *vf_sw_id; /* switch ID the VF VSIs connect to */ @@ -100,6 +103,7 @@ struct ice_vf { u16 num_vf_qs; /* num of queue configured per VF */ struct ice_mdd_vf_events mdd_rx_events; struct ice_mdd_vf_events mdd_tx_events; + DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX); }; #ifdef CONFIG_PCI_IOV @@ -116,6 +120,9 @@ void ice_vc_notify_reset(struct ice_pf *pf); bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr); bool ice_reset_vf(struct ice_vf *vf, bool is_vflr); void ice_restore_all_vfs_msi_state(struct pci_dev *pdev); +bool +ice_is_malicious_vf(struct ice_pf *pf, struct ice_rq_event_info *event, + u16 num_msg_proc, u16 num_msg_pending); int ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, @@ -138,6 +145,11 @@ void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event); void ice_print_vfs_mdd_events(struct ice_pf *pf); void ice_print_vf_rx_mdd_event(struct ice_vf *vf); +struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf); +int +ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, + enum virtchnl_status_code v_retval, u8 *msg, u16 msglen); +bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id); #else /* CONFIG_PCI_IOV */ #define ice_process_vflr_event(pf) do {} while (0) #define ice_free_vfs(pf) do {} while (0) @@ -151,6 +163,15 @@ void ice_print_vf_rx_mdd_event(struct ice_vf *vf); #define ice_restore_all_vfs_msi_state(pdev) do {} while (0) static inline bool +ice_is_malicious_vf(struct ice_pf __always_unused *pf, + struct ice_rq_event_info __always_unused *event, + u16 __always_unused num_msg_proc, + u16 __always_unused num_msg_pending) +{ + return false; +} + +static inline bool ice_reset_all_vfs(struct ice_pf __always_unused *pf, bool __always_unused is_vflr) { diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 83f3c9574ed1..faa7b8d96adb 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -108,9 +108,6 @@ ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector) ice_cfg_itr(hw, q_vector); - wr32(hw, GLINT_RATE(reg_idx), - ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran)); - ice_for_each_ring(ring, q_vector->tx) ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx, q_vector->tx.itr_idx); @@ -159,7 +156,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) rx_ring = vsi->rx_rings[q_idx]; q_vector = rx_ring->q_vector; - while (test_and_set_bit(__ICE_CFG_BUSY, vsi->state)) { + while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) { timeout--; if (!timeout) return -EBUSY; @@ -249,7 +246,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; - clear_bit(__ICE_CFG_BUSY, vsi->state); + clear_bit(ICE_CFG_BUSY, vsi->state); ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); @@ -358,18 +355,18 @@ xsk_pool_if_up: * This function allocates a number of Rx buffers from the fill ring * or the internal recycle mechanism and places them on the Rx ring. * - * Returns false if all allocations were successful, true if any fail. + * Returns true if all allocations were successful, false if any fail. */ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) { union ice_32b_rx_flex_desc *rx_desc; u16 ntu = rx_ring->next_to_use; struct ice_rx_buf *rx_buf; - bool ret = false; + bool ok = true; dma_addr_t dma; if (!count) - return false; + return true; rx_desc = ICE_RX_DESC(rx_ring, ntu); rx_buf = &rx_ring->rx_buf[ntu]; @@ -377,7 +374,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) do { rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_pool); if (!rx_buf->xdp) { - ret = true; + ok = false; break; } @@ -402,7 +399,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) ice_release_rx_desc(rx_ring, ntu); } - return ret; + return ok; } /** @@ -473,6 +470,14 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp) xdp_prog = READ_ONCE(rx_ring->xdp_prog); act = bpf_prog_run_xdp(xdp_prog, xdp); + + if (likely(act == XDP_REDIRECT)) { + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED; + rcu_read_unlock(); + return result; + } + switch (act) { case XDP_PASS: break; @@ -480,10 +485,6 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp) xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index]; result = ice_xmit_xdp_buff(xdp, xdp_ring); break; - case XDP_REDIRECT: - err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED; - break; default: bpf_warn_invalid_xdp_action(act); fallthrough; @@ -754,7 +755,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, struct ice_vsi *vsi = np->vsi; struct ice_ring *ring; - if (test_bit(__ICE_DOWN, vsi->state)) + if (test_bit(ICE_DOWN, vsi->state)) return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi)) diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index d2e2c50ce257..ca5429774994 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -340,10 +340,10 @@ #define I210_RXPBSIZE_PB_32KB 0x00000020 #define I210_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */ #define I210_TXPBSIZE_MASK 0xC0FFFFFF -#define I210_TXPBSIZE_PB0_8KB (8 << 0) -#define I210_TXPBSIZE_PB1_8KB (8 << 6) -#define I210_TXPBSIZE_PB2_4KB (4 << 12) -#define I210_TXPBSIZE_PB3_4KB (4 << 18) +#define I210_TXPBSIZE_PB0_6KB (6 << 0) +#define I210_TXPBSIZE_PB1_6KB (6 << 6) +#define I210_TXPBSIZE_PB2_6KB (6 << 12) +#define I210_TXPBSIZE_PB3_6KB (6 << 18) #define I210_DTXMXPKTSZ_DEFAULT 0x00000098 diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index 5d87957b2627..44111f65afc7 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright(c) 2007 - 2018 Intel Corporation. */ -#ifndef _E1000_HW_H_ -#define _E1000_HW_H_ +#ifndef _E1000_IGB_HW_H_ +#define _E1000_IGB_HW_H_ #include <linux/types.h> #include <linux/delay.h> @@ -551,4 +551,4 @@ s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value); -#endif /* _E1000_HW_H_ */ +#endif /* _E1000_IGB_HW_H_ */ diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index fd8eb2f9ab9d..e63ee3cca5ea 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -484,6 +484,31 @@ static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) } /** + * igb_i21x_hw_doublecheck - double checks potential HW issue in i21X + * @hw: pointer to the HW structure + * + * Checks if multicast array is wrote correctly + * If not then rewrites again to register + **/ +static void igb_i21x_hw_doublecheck(struct e1000_hw *hw) +{ + bool is_failed; + int i; + + do { + is_failed = false; + for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) { + if (array_rd32(E1000_MTA, i) != hw->mac.mta_shadow[i]) { + is_failed = true; + array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]); + wrfl(); + break; + } + } + } while (is_failed); +} + +/** * igb_update_mc_addr_list - Update Multicast addresses * @hw: pointer to the HW structure * @mc_addr_list: array of multicast addresses to program @@ -516,6 +541,8 @@ void igb_update_mc_addr_list(struct e1000_hw *hw, for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]); wrfl(); + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) + igb_i21x_hw_doublecheck(hw); } /** diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c index 33cceb77e960..29383112bc19 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mbx.c +++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c @@ -441,7 +441,7 @@ out_no_read: } /** - * e1000_init_mbx_params_pf - set initial values for pf mailbox + * igb_init_mbx_params_pf - set initial values for pf mailbox * @hw: pointer to the HW structure * * Initializes the hw->mbx struct to correct values for pf mailbox diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 8c8eb82e6272..a018000f7db9 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -836,6 +836,7 @@ s32 igb_copper_link_setup_igp(struct e1000_hw *hw) break; case e1000_ms_auto: data &= ~CR_1000T_MS_ENABLE; + break; default: break; } diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index aaa954aae574..7bda8c5edea5 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -748,8 +748,8 @@ void igb_ptp_suspend(struct igb_adapter *adapter); void igb_ptp_rx_hang(struct igb_adapter *adapter); void igb_ptp_tx_hang(struct igb_adapter *adapter); void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb); -void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, - struct sk_buff *skb); +int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, + struct sk_buff *skb); int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); void igb_set_flag_queue_pairs(struct igb_adapter *, const u32); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 28baf203459a..7545da216d8b 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2347,35 +2347,23 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) IGB_TEST_LEN*ETH_GSTRING_LEN); break; case ETH_SS_STATS: - for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) { - memcpy(p, igb_gstrings_stats[i].stat_string, - ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } - for (i = 0; i < IGB_NETDEV_STATS_LEN; i++) { - memcpy(p, igb_gstrings_net_stats[i].stat_string, - ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) + ethtool_sprintf(&p, + igb_gstrings_stats[i].stat_string); + for (i = 0; i < IGB_NETDEV_STATS_LEN; i++) + ethtool_sprintf(&p, + igb_gstrings_net_stats[i].stat_string); for (i = 0; i < adapter->num_tx_queues; i++) { - sprintf(p, "tx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_restart", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "tx_queue_%u_packets", i); + ethtool_sprintf(&p, "tx_queue_%u_bytes", i); + ethtool_sprintf(&p, "tx_queue_%u_restart", i); } for (i = 0; i < adapter->num_rx_queues; i++) { - sprintf(p, "rx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_drops", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_csum_err", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_alloc_failed", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "rx_queue_%u_packets", i); + ethtool_sprintf(&p, "rx_queue_%u_bytes", i); + ethtool_sprintf(&p, "rx_queue_%u_drops", i); + ethtool_sprintf(&p, "rx_queue_%u_csum_err", i); + ethtool_sprintf(&p, "rx_queue_%u_alloc_failed", i); } /* BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */ break; @@ -3022,6 +3010,7 @@ static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) break; case ETHTOOL_SRXCLSRLDEL: ret = igb_del_ethtool_nfc_entry(adapter, cmd); + break; default: break; } diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 878b31d534ec..038a9fd1af44 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1921,8 +1921,8 @@ static void igb_setup_tx_mode(struct igb_adapter *adapter) */ val = rd32(E1000_TXPBS); val &= ~I210_TXPBSIZE_MASK; - val |= I210_TXPBSIZE_PB0_8KB | I210_TXPBSIZE_PB1_8KB | - I210_TXPBSIZE_PB2_4KB | I210_TXPBSIZE_PB3_4KB; + val |= I210_TXPBSIZE_PB0_6KB | I210_TXPBSIZE_PB1_6KB | + I210_TXPBSIZE_PB2_6KB | I210_TXPBSIZE_PB3_6KB; wr32(E1000_TXPBS, val); val = rd32(E1000_RXPBS); @@ -2037,7 +2037,7 @@ static void igb_power_down_link(struct igb_adapter *adapter) } /** - * Detect and switch function for Media Auto Sense + * igb_check_swap_media - Detect and switch function for Media Auto Sense * @adapter: address of the board private structure **/ static void igb_check_swap_media(struct igb_adapter *adapter) @@ -2934,7 +2934,7 @@ static int igb_xdp_xmit(struct net_device *dev, int n, int cpu = smp_processor_id(); struct igb_ring *tx_ring; struct netdev_queue *nq; - int drops = 0; + int nxmit = 0; int i; if (unlikely(test_bit(__IGB_DOWN, &adapter->state))) @@ -2961,10 +2961,9 @@ static int igb_xdp_xmit(struct net_device *dev, int n, int err; err = igb_xmit_xdp_ring(adapter, tx_ring, xdpf); - if (err != IGB_XDP_TX) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err != IGB_XDP_TX) + break; + nxmit++; } __netif_tx_unlock(nq); @@ -2972,7 +2971,7 @@ static int igb_xdp_xmit(struct net_device *dev, int n, if (unlikely(flags & XDP_XMIT_FLUSH)) igb_xdp_ring_update_tail(tx_ring); - return n - drops; + return nxmit; } static const struct net_device_ops igb_netdev_ops = { @@ -3115,7 +3114,7 @@ static s32 igb_init_i2c(struct igb_adapter *adapter) return 0; /* Initialize the i2c bus which is controlled by the registers. - * This bus will use the i2c_algo_bit structue that implements + * This bus will use the i2c_algo_bit structure that implements * the protocol through toggling of the 4 bits in the register. */ adapter->i2c_adap.owner = THIS_MODULE; @@ -4020,7 +4019,7 @@ static int igb_sw_init(struct igb_adapter *adapter) } /** - * igb_open - Called when a network interface is made active + * __igb_open - Called when a network interface is made active * @netdev: network interface device structure * @resuming: indicates whether we are in a resume call * @@ -4138,7 +4137,7 @@ int igb_open(struct net_device *netdev) } /** - * igb_close - Disables a network interface + * __igb_close - Disables a network interface * @netdev: network interface device structure * @suspending: indicates we are in a suspend call * @@ -5856,7 +5855,7 @@ static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, */ if (tx_ring->launchtime_enable) { ts = ktime_to_timespec64(first->skb->tstamp); - first->skb->tstamp = ktime_set(0, 0); + skb_txtime_consumed(first->skb); context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32); } else { context_desc->seqnum_seed = 0; @@ -8214,7 +8213,8 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring, new_buff->pagecnt_bias = old_buff->pagecnt_bias; } -static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer) +static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, + int rx_buf_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@ -8225,7 +8225,7 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer) #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) + if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1)) return false; #else #define IGB_LAST_OFFSET \ @@ -8301,9 +8301,10 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring, return NULL; if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) { - igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb); - xdp->data += IGB_TS_HDR_LEN; - size -= IGB_TS_HDR_LEN; + if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb)) { + xdp->data += IGB_TS_HDR_LEN; + size -= IGB_TS_HDR_LEN; + } } /* Determine available headroom for copy */ @@ -8364,8 +8365,8 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring, /* pull timestamp out of packet data */ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb); - __skb_pull(skb, IGB_TS_HDR_LEN); + if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb)) + __skb_pull(skb, IGB_TS_HDR_LEN); } /* update buffer offset */ @@ -8614,11 +8615,17 @@ static unsigned int igb_rx_offset(struct igb_ring *rx_ring) } static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring, - const unsigned int size) + const unsigned int size, int *rx_buf_pgcnt) { struct igb_rx_buffer *rx_buffer; rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + *rx_buf_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buffer->page); +#else + 0; +#endif prefetchw(rx_buffer->page); /* we are reusing so sync this buffer for CPU use */ @@ -8634,9 +8641,9 @@ static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring, } static void igb_put_rx_buffer(struct igb_ring *rx_ring, - struct igb_rx_buffer *rx_buffer) + struct igb_rx_buffer *rx_buffer, int rx_buf_pgcnt) { - if (igb_can_reuse_rx_page(rx_buffer)) { + if (igb_can_reuse_rx_page(rx_buffer, rx_buf_pgcnt)) { /* hand second half of page back to the ring */ igb_reuse_rx_page(rx_ring, rx_buffer); } else { @@ -8664,6 +8671,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) unsigned int xdp_xmit = 0; struct xdp_buff xdp; u32 frame_sz = 0; + int rx_buf_pgcnt; /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ #if (PAGE_SIZE < 8192) @@ -8693,7 +8701,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) */ dma_rmb(); - rx_buffer = igb_get_rx_buffer(rx_ring, size); + rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt); /* retrieve a buffer from the ring */ if (!skb) { @@ -8736,7 +8744,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) break; } - igb_put_rx_buffer(rx_ring, rx_buffer); + igb_put_rx_buffer(rx_ring, rx_buffer, rx_buf_pgcnt); cleaned_count++; /* fetch next buffer in frame if non-eop */ diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 7cc5428c3b3d..ba61fe9bfaf4 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -856,6 +856,9 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) dev_kfree_skb_any(skb); } +#define IGB_RET_PTP_DISABLED 1 +#define IGB_RET_PTP_INVALID 2 + /** * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp * @q_vector: Pointer to interrupt specific structure @@ -864,19 +867,29 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) * * This function is meant to retrieve a timestamp from the first buffer of an * incoming frame. The value is stored in little endian format starting on - * byte 8. + * byte 8 + * + * Returns: 0 if success, nonzero if failure **/ -void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, - struct sk_buff *skb) +int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, + struct sk_buff *skb) { - __le64 *regval = (__le64 *)va; struct igb_adapter *adapter = q_vector->adapter; + __le64 *regval = (__le64 *)va; int adjust = 0; + if (!(adapter->ptp_flags & IGB_PTP_ENABLED)) + return IGB_RET_PTP_DISABLED; + /* The timestamp is recorded in little endian format. * DWORD: 0 1 2 3 * Field: Reserved Reserved SYSTIML SYSTIMH */ + + /* check reserved dwords are zero, be/le doesn't matter for zero */ + if (regval[0]) + return IGB_RET_PTP_INVALID; + igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), le64_to_cpu(regval[1])); @@ -896,6 +909,8 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, } skb_hwtstamps(skb)->hwtstamp = ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); + + return 0; } /** @@ -906,13 +921,15 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, * This function is meant to retrieve a timestamp from the internal registers * of the adapter and store it in the skb. **/ -void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, - struct sk_buff *skb) +void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb) { struct igb_adapter *adapter = q_vector->adapter; struct e1000_hw *hw = &adapter->hw; - u64 regval; int adjust = 0; + u64 regval; + + if (!(adapter->ptp_flags & IGB_PTP_ENABLED)) + return; /* If this bit is set, then the RX registers contain the time stamp. No * other packet will be time stamped until we read these registers, so @@ -1008,6 +1025,7 @@ static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter, switch (config->tx_type) { case HWTSTAMP_TX_OFF: tsync_tx_ctl = 0; + break; case HWTSTAMP_TX_ON: break; default: diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile index 1c3051db9085..95d1e8c490a4 100644 --- a/drivers/net/ethernet/intel/igc/Makefile +++ b/drivers/net/ethernet/intel/igc/Makefile @@ -8,4 +8,4 @@ obj-$(CONFIG_IGC) += igc.o igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \ -igc_diag.o igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o +igc_diag.o igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o igc_xdp.o diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 5d2809dfd06a..25871351730b 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -28,6 +28,11 @@ void igc_ethtool_set_ops(struct net_device *); #define MAX_ETYPE_FILTER 8 #define IGC_RETA_SIZE 128 +/* SDP support */ +#define IGC_N_EXTTS 2 +#define IGC_N_PEROUT 2 +#define IGC_N_SDP 4 + enum igc_mac_filter_type { IGC_MAC_FILTER_TYPE_DST = 0, IGC_MAC_FILTER_TYPE_SRC @@ -111,6 +116,8 @@ struct igc_ring { struct sk_buff *skb; }; }; + + struct xdp_rxq_info xdp_rxq; } ____cacheline_internodealigned_in_smp; /* Board specific private data structure */ @@ -219,6 +226,16 @@ struct igc_adapter { ktime_t ptp_reset_start; /* Reset time in clock mono */ char fw_version[32]; + + struct bpf_prog *xdp_prog; + + bool pps_sys_wrap_on; + + struct ptp_pin_desc sdp_config[IGC_N_SDP]; + struct { + struct timespec64 start; + struct timespec64 period; + } perout[IGC_N_PEROUT]; }; void igc_up(struct igc_adapter *adapter); @@ -373,6 +390,8 @@ enum igc_tx_flags { /* olinfo flags */ IGC_TX_FLAGS_IPV4 = 0x10, IGC_TX_FLAGS_CSUM = 0x20, + + IGC_TX_FLAGS_XDP = 0x100, }; enum igc_boards { @@ -395,7 +414,10 @@ enum igc_boards { struct igc_tx_buffer { union igc_adv_tx_desc *next_to_watch; unsigned long time_stamp; - struct sk_buff *skb; + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; unsigned int bytecount; u16 gso_segs; __be16 protocol; @@ -504,6 +526,10 @@ enum igc_ring_flags_t { #define ring_uses_large_buffer(ring) \ test_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags) +#define set_ring_uses_large_buffer(ring) \ + set_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags) +#define clear_ring_uses_large_buffer(ring) \ + clear_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags) #define ring_uses_build_skb(ring) \ test_bit(IGC_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags) @@ -547,8 +573,7 @@ void igc_ptp_init(struct igc_adapter *adapter); void igc_ptp_reset(struct igc_adapter *adapter); void igc_ptp_suspend(struct igc_adapter *adapter); void igc_ptp_stop(struct igc_adapter *adapter); -void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va, - struct sk_buff *skb); +ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf); int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); void igc_ptp_tx_hang(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index b909f00a79e6..0103dda32f39 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -8,6 +8,8 @@ #define REQ_TX_DESCRIPTOR_MULTIPLE 8 #define REQ_RX_DESCRIPTOR_MULTIPLE 8 +#define IGC_CTRL_EXT_SDP2_DIR 0x00000400 /* SDP2 Data direction */ +#define IGC_CTRL_EXT_SDP3_DIR 0x00000800 /* SDP3 Data direction */ #define IGC_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */ /* Definitions for power management and wakeup registers */ @@ -96,6 +98,9 @@ #define IGC_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ #define IGC_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ +#define IGC_CTRL_SDP0_DIR 0x00400000 /* SDP0 Data direction */ +#define IGC_CTRL_SDP1_DIR 0x00800000 /* SDP1 Data direction */ + /* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */ #define MAX_JUMBO_FRAME_SIZE 0x2600 @@ -403,6 +408,64 @@ #define IGC_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */ #define IGC_TSYNCTXCTL_TXSYNSIG 0x00000020 /* Sample TX tstamp in PHY sop */ +/* Timer selection bits */ +#define IGC_AUX_IO_TIMER_SEL_SYSTIM0 (0u << 30) /* Select SYSTIM0 for auxiliary time stamp */ +#define IGC_AUX_IO_TIMER_SEL_SYSTIM1 (1u << 30) /* Select SYSTIM1 for auxiliary time stamp */ +#define IGC_AUX_IO_TIMER_SEL_SYSTIM2 (2u << 30) /* Select SYSTIM2 for auxiliary time stamp */ +#define IGC_AUX_IO_TIMER_SEL_SYSTIM3 (3u << 30) /* Select SYSTIM3 for auxiliary time stamp */ +#define IGC_TT_IO_TIMER_SEL_SYSTIM0 (0u << 30) /* Select SYSTIM0 for target time stamp */ +#define IGC_TT_IO_TIMER_SEL_SYSTIM1 (1u << 30) /* Select SYSTIM1 for target time stamp */ +#define IGC_TT_IO_TIMER_SEL_SYSTIM2 (2u << 30) /* Select SYSTIM2 for target time stamp */ +#define IGC_TT_IO_TIMER_SEL_SYSTIM3 (3u << 30) /* Select SYSTIM3 for target time stamp */ + +/* TSAUXC Configuration Bits */ +#define IGC_TSAUXC_EN_TT0 BIT(0) /* Enable target time 0. */ +#define IGC_TSAUXC_EN_TT1 BIT(1) /* Enable target time 1. */ +#define IGC_TSAUXC_EN_CLK0 BIT(2) /* Enable Configurable Frequency Clock 0. */ +#define IGC_TSAUXC_EN_CLK1 BIT(5) /* Enable Configurable Frequency Clock 1. */ +#define IGC_TSAUXC_EN_TS0 BIT(8) /* Enable hardware timestamp 0. */ +#define IGC_TSAUXC_AUTT0 BIT(9) /* Auxiliary Timestamp Taken. */ +#define IGC_TSAUXC_EN_TS1 BIT(10) /* Enable hardware timestamp 0. */ +#define IGC_TSAUXC_AUTT1 BIT(11) /* Auxiliary Timestamp Taken. */ +#define IGC_TSAUXC_PLSG BIT(17) /* Generate a pulse. */ +#define IGC_TSAUXC_DISABLE1 BIT(27) /* Disable SYSTIM0 Count Operation. */ +#define IGC_TSAUXC_DISABLE2 BIT(28) /* Disable SYSTIM1 Count Operation. */ +#define IGC_TSAUXC_DISABLE3 BIT(29) /* Disable SYSTIM2 Count Operation. */ +#define IGC_TSAUXC_DIS_TS_CLEAR BIT(30) /* Disable EN_TT0/1 auto clear. */ +#define IGC_TSAUXC_DISABLE0 BIT(31) /* Disable SYSTIM0 Count Operation. */ + +/* SDP Configuration Bits */ +#define IGC_AUX0_SEL_SDP0 (0u << 0) /* Assign SDP0 to auxiliary time stamp 0. */ +#define IGC_AUX0_SEL_SDP1 (1u << 0) /* Assign SDP1 to auxiliary time stamp 0. */ +#define IGC_AUX0_SEL_SDP2 (2u << 0) /* Assign SDP2 to auxiliary time stamp 0. */ +#define IGC_AUX0_SEL_SDP3 (3u << 0) /* Assign SDP3 to auxiliary time stamp 0. */ +#define IGC_AUX0_TS_SDP_EN (1u << 2) /* Enable auxiliary time stamp trigger 0. */ +#define IGC_AUX1_SEL_SDP0 (0u << 3) /* Assign SDP0 to auxiliary time stamp 1. */ +#define IGC_AUX1_SEL_SDP1 (1u << 3) /* Assign SDP1 to auxiliary time stamp 1. */ +#define IGC_AUX1_SEL_SDP2 (2u << 3) /* Assign SDP2 to auxiliary time stamp 1. */ +#define IGC_AUX1_SEL_SDP3 (3u << 3) /* Assign SDP3 to auxiliary time stamp 1. */ +#define IGC_AUX1_TS_SDP_EN (1u << 5) /* Enable auxiliary time stamp trigger 1. */ +#define IGC_TS_SDP0_SEL_TT0 (0u << 6) /* Target time 0 is output on SDP0. */ +#define IGC_TS_SDP0_SEL_TT1 (1u << 6) /* Target time 1 is output on SDP0. */ +#define IGC_TS_SDP0_SEL_FC0 (2u << 6) /* Freq clock 0 is output on SDP0. */ +#define IGC_TS_SDP0_SEL_FC1 (3u << 6) /* Freq clock 1 is output on SDP0. */ +#define IGC_TS_SDP0_EN (1u << 8) /* SDP0 is assigned to Tsync. */ +#define IGC_TS_SDP1_SEL_TT0 (0u << 9) /* Target time 0 is output on SDP1. */ +#define IGC_TS_SDP1_SEL_TT1 (1u << 9) /* Target time 1 is output on SDP1. */ +#define IGC_TS_SDP1_SEL_FC0 (2u << 9) /* Freq clock 0 is output on SDP1. */ +#define IGC_TS_SDP1_SEL_FC1 (3u << 9) /* Freq clock 1 is output on SDP1. */ +#define IGC_TS_SDP1_EN (1u << 11) /* SDP1 is assigned to Tsync. */ +#define IGC_TS_SDP2_SEL_TT0 (0u << 12) /* Target time 0 is output on SDP2. */ +#define IGC_TS_SDP2_SEL_TT1 (1u << 12) /* Target time 1 is output on SDP2. */ +#define IGC_TS_SDP2_SEL_FC0 (2u << 12) /* Freq clock 0 is output on SDP2. */ +#define IGC_TS_SDP2_SEL_FC1 (3u << 12) /* Freq clock 1 is output on SDP2. */ +#define IGC_TS_SDP2_EN (1u << 14) /* SDP2 is assigned to Tsync. */ +#define IGC_TS_SDP3_SEL_TT0 (0u << 15) /* Target time 0 is output on SDP3. */ +#define IGC_TS_SDP3_SEL_TT1 (1u << 15) /* Target time 1 is output on SDP3. */ +#define IGC_TS_SDP3_SEL_FC0 (2u << 15) /* Freq clock 0 is output on SDP3. */ +#define IGC_TS_SDP3_SEL_FC1 (3u << 15) /* Freq clock 1 is output on SDP3. */ +#define IGC_TS_SDP3_EN (1u << 17) /* SDP3 is assigned to Tsync. */ + /* Transmit Scheduling */ #define IGC_TQAVCTRL_TRANSMIT_MODE_TSN 0x00000001 #define IGC_TQAVCTRL_ENHANCED_QAV 0x00000008 @@ -441,11 +504,6 @@ #define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ #define MII_CR_POWER_DOWN 0x0800 /* Power down */ #define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ -#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ -#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ -#define MII_CR_SPEED_1000 0x0040 -#define MII_CR_SPEED_100 0x2000 -#define MII_CR_SPEED_10 0x0000 /* PHY Status Register */ #define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 824a6c454bca..9722449d7633 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -65,6 +65,8 @@ static const struct igc_stats igc_gstrings_stats[] = { IGC_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts), IGC_STAT("tx_hwtstamp_skipped", tx_hwtstamp_skipped), IGC_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared), + IGC_STAT("tx_lpi_counter", stats.tlpic), + IGC_STAT("rx_lpi_counter", stats.rlpic), }; #define IGC_NETDEV_STAT(_net_stat) { \ @@ -1711,6 +1713,9 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, Autoneg); } + /* Set pause flow control settings */ + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + switch (hw->fc.requested_mode) { case igc_fc_full: ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); @@ -1725,9 +1730,7 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, Asym_Pause); break; default: - ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - Asym_Pause); + break; } status = pm_runtime_suspended(&adapter->pdev->dev) ? diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index 7ec04e48860c..b2ef9fde97b3 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -6,7 +6,7 @@ #include "igc_hw.h" /** - * igc_get_hw_semaphore_i225 - Acquire hardware semaphore + * igc_acquire_nvm_i225 - Acquire exclusive access to EEPROM * @hw: pointer to the HW structure * * Acquire the necessary semaphores for exclusive access to the EEPROM. @@ -229,10 +229,11 @@ static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words, if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) || words == 0) { hw_dbg("nvm parameter(s) out of bounds\n"); - goto out; + return ret_val; } for (i = 0; i < words; i++) { + ret_val = -IGC_ERR_NVM; eewr = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) | (data[i] << IGC_NVM_RW_REG_DATA) | IGC_NVM_RW_REG_START; @@ -254,7 +255,6 @@ static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words, } } -out: return ret_val; } diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 7ac9597ddb84..069471b7ffb0 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -10,17 +10,24 @@ #include <linux/ip.h> #include <linux/pm_runtime.h> #include <net/pkt_sched.h> +#include <linux/bpf_trace.h> #include <net/ipv6.h> #include "igc.h" #include "igc_hw.h" #include "igc_tsn.h" +#include "igc_xdp.h" #define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver" #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) +#define IGC_XDP_PASS 0 +#define IGC_XDP_CONSUMED BIT(0) +#define IGC_XDP_TX BIT(1) +#define IGC_XDP_REDIRECT BIT(2) + static int debug = -1; MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); @@ -176,8 +183,10 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) while (i != tx_ring->next_to_use) { union igc_adv_tx_desc *eop_desc, *tx_desc; - /* Free all the Tx ring sk_buffs */ - dev_kfree_skb_any(tx_buffer->skb); + if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP) + xdp_return_frame(tx_buffer->xdpf); + else + dev_kfree_skb_any(tx_buffer->skb); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -375,6 +384,8 @@ static void igc_clean_rx_ring(struct igc_ring *rx_ring) i = 0; } + clear_ring_uses_large_buffer(rx_ring); + rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; @@ -403,6 +414,8 @@ void igc_free_rx_resources(struct igc_ring *rx_ring) { igc_clean_rx_ring(rx_ring); + igc_xdp_unregister_rxq_info(rx_ring); + vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; @@ -440,7 +453,11 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) { struct net_device *ndev = rx_ring->netdev; struct device *dev = rx_ring->dev; - int size, desc_len; + int size, desc_len, res; + + res = igc_xdp_register_rxq_info(rx_ring); + if (res < 0) + return res; size = sizeof(struct igc_rx_buffer) * rx_ring->count; rx_ring->rx_buffer_info = vzalloc(size); @@ -466,6 +483,7 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) return 0; err: + igc_xdp_unregister_rxq_info(rx_ring); vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n"); @@ -497,6 +515,11 @@ static int igc_setup_all_rx_resources(struct igc_adapter *adapter) return err; } +static bool igc_xdp_is_enabled(struct igc_adapter *adapter) +{ + return !!adapter->xdp_prog; +} + /** * igc_configure_rx_ring - Configure a receive ring after Reset * @adapter: board private structure @@ -513,6 +536,9 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter, u32 srrctl = 0, rxdctl = 0; u64 rdba = ring->dma; + if (igc_xdp_is_enabled(adapter)) + set_ring_uses_large_buffer(ring); + /* disable the queue */ wr32(IGC_RXDCTL(reg_idx), 0); @@ -941,7 +967,7 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); ktime_t txtime = first->skb->tstamp; - first->skb->tstamp = ktime_set(0, 0); + skb_txtime_consumed(first->skb); context_desc->launch_time = igc_tx_launchtime(adapter, txtime); } else { @@ -1029,7 +1055,7 @@ static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) -static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) +static u32 igc_tx_cmd_type(u32 tx_flags) { /* set type for advanced descriptor with frame checksum insertion */ u32 cmd_type = IGC_ADVTXD_DTYP_DATA | @@ -1078,7 +1104,7 @@ static int igc_tx_map(struct igc_ring *tx_ring, u16 i = tx_ring->next_to_use; unsigned int data_len, size; dma_addr_t dma; - u32 cmd_type = igc_tx_cmd_type(skb, tx_flags); + u32 cmd_type = igc_tx_cmd_type(tx_flags); tx_desc = IGC_TX_DESC(tx_ring, i); @@ -1480,11 +1506,18 @@ static void igc_process_skb_fields(struct igc_ring *rx_ring, } static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, - const unsigned int size) + const unsigned int size, + int *rx_buffer_pgcnt) { struct igc_rx_buffer *rx_buffer; rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + *rx_buffer_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buffer->page); +#else + 0; +#endif prefetchw(rx_buffer->page); /* we are reusing so sync this buffer for CPU use */ @@ -1499,6 +1532,32 @@ static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, return rx_buffer; } +static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer, + unsigned int truesize) +{ +#if (PAGE_SIZE < 8192) + buffer->page_offset ^= truesize; +#else + buffer->page_offset += truesize; +#endif +} + +static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring, + unsigned int size) +{ + unsigned int truesize; + +#if (PAGE_SIZE < 8192) + truesize = igc_rx_pg_size(ring) / 2; +#else + truesize = ring_uses_build_skb(ring) ? + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(IGC_SKB_PAD + size) : + SKB_DATA_ALIGN(size); +#endif + return truesize; +} + /** * igc_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on @@ -1513,20 +1572,19 @@ static void igc_add_rx_frag(struct igc_ring *rx_ring, struct sk_buff *skb, unsigned int size) { -#if (PAGE_SIZE < 8192) - unsigned int truesize = igc_rx_pg_size(rx_ring) / 2; + unsigned int truesize; - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, - rx_buffer->page_offset, size, truesize); - rx_buffer->page_offset ^= truesize; +#if (PAGE_SIZE < 8192) + truesize = igc_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = ring_uses_build_skb(rx_ring) ? - SKB_DATA_ALIGN(IGC_SKB_PAD + size) : - SKB_DATA_ALIGN(size); + truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IGC_SKB_PAD + size) : + SKB_DATA_ALIGN(size); +#endif skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, rx_buffer->page_offset, size, truesize); - rx_buffer->page_offset += truesize; -#endif + + igc_rx_buffer_flip(rx_buffer, truesize); } static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, @@ -1535,12 +1593,7 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, unsigned int size) { void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; -#if (PAGE_SIZE < 8192) - unsigned int truesize = igc_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(IGC_SKB_PAD + size); -#endif + unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); struct sk_buff *skb; /* prefetch first cache line of first page */ @@ -1555,27 +1608,18 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, skb_reserve(skb, IGC_SKB_PAD); __skb_put(skb, size); - /* update buffer offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - + igc_rx_buffer_flip(rx_buffer, truesize); return skb; } static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, struct igc_rx_buffer *rx_buffer, - union igc_adv_rx_desc *rx_desc, - unsigned int size) + struct xdp_buff *xdp, + ktime_t timestamp) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; -#if (PAGE_SIZE < 8192) - unsigned int truesize = igc_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(size); -#endif + unsigned int size = xdp->data_end - xdp->data; + unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); + void *va = xdp->data; unsigned int headlen; struct sk_buff *skb; @@ -1587,11 +1631,8 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, if (unlikely(!skb)) return NULL; - if (unlikely(igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP))) { - igc_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); - va += IGC_TS_HDR_LEN; - size -= IGC_TS_HDR_LEN; - } + if (timestamp) + skb_hwtstamps(skb)->hwtstamp = timestamp; /* Determine available headroom for copy */ headlen = size; @@ -1607,11 +1648,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, skb_add_rx_frag(skb, 0, rx_buffer->page, (va + headlen) - page_address(rx_buffer->page), size, truesize); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif + igc_rx_buffer_flip(rx_buffer, truesize); } else { rx_buffer->pagecnt_bias++; } @@ -1648,7 +1685,8 @@ static void igc_reuse_rx_page(struct igc_ring *rx_ring, new_buff->pagecnt_bias = old_buff->pagecnt_bias; } -static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer) +static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer, + int rx_buffer_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@ -1659,7 +1697,7 @@ static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer) #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) + if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) return false; #else #define IGC_LAST_OFFSET \ @@ -1673,8 +1711,8 @@ static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer) * the pagecnt_bias and page count so that we fully restock the * number of references the driver holds. */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); + if (unlikely(pagecnt_bias == 1)) { + page_ref_add(page, USHRT_MAX - 1); rx_buffer->pagecnt_bias = USHRT_MAX; } @@ -1726,6 +1764,10 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring, union igc_adv_rx_desc *rx_desc, struct sk_buff *skb) { + /* XDP packets use error pointer so abort at this point */ + if (IS_ERR(skb)) + return true; + if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) { struct net_device *netdev = rx_ring->netdev; @@ -1743,9 +1785,10 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring, } static void igc_put_rx_buffer(struct igc_ring *rx_ring, - struct igc_rx_buffer *rx_buffer) + struct igc_rx_buffer *rx_buffer, + int rx_buffer_pgcnt) { - if (igc_can_reuse_rx_page(rx_buffer)) { + if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { /* hand second half of page back to the ring */ igc_reuse_rx_page(rx_ring, rx_buffer); } else { @@ -1765,7 +1808,14 @@ static void igc_put_rx_buffer(struct igc_ring *rx_ring, static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) { - return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0; + struct igc_adapter *adapter = rx_ring->q_vector->adapter; + + if (ring_uses_build_skb(rx_ring)) + return IGC_SKB_PAD; + if (igc_xdp_is_enabled(adapter)) + return XDP_PACKET_HEADROOM; + + return 0; } static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, @@ -1804,7 +1854,8 @@ static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, bi->dma = dma; bi->page = page; bi->page_offset = igc_rx_offset(rx_ring); - bi->pagecnt_bias = 1; + page_ref_add(page, USHRT_MAX - 1); + bi->pagecnt_bias = USHRT_MAX; return true; } @@ -1879,17 +1930,196 @@ static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) } } +static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer, + struct xdp_frame *xdpf, + struct igc_ring *ring) +{ + dma_addr_t dma; + + dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE); + if (dma_mapping_error(ring->dev, dma)) { + netdev_err_once(ring->netdev, "Failed to map DMA for TX\n"); + return -ENOMEM; + } + + buffer->xdpf = xdpf; + buffer->tx_flags = IGC_TX_FLAGS_XDP; + buffer->protocol = 0; + buffer->bytecount = xdpf->len; + buffer->gso_segs = 1; + buffer->time_stamp = jiffies; + dma_unmap_len_set(buffer, len, xdpf->len); + dma_unmap_addr_set(buffer, dma, dma); + return 0; +} + +/* This function requires __netif_tx_lock is held by the caller. */ +static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, + struct xdp_frame *xdpf) +{ + struct igc_tx_buffer *buffer; + union igc_adv_tx_desc *desc; + u32 cmd_type, olinfo_status; + int err; + + if (!igc_desc_unused(ring)) + return -EBUSY; + + buffer = &ring->tx_buffer_info[ring->next_to_use]; + err = igc_xdp_init_tx_buffer(buffer, xdpf, ring); + if (err) + return err; + + cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | + IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | + buffer->bytecount; + olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; + + desc = IGC_TX_DESC(ring, ring->next_to_use); + desc->read.cmd_type_len = cpu_to_le32(cmd_type); + desc->read.olinfo_status = cpu_to_le32(olinfo_status); + desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma)); + + netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount); + + buffer->next_to_watch = desc; + + ring->next_to_use++; + if (ring->next_to_use == ring->count) + ring->next_to_use = 0; + + return 0; +} + +static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter, + int cpu) +{ + int index = cpu; + + if (unlikely(index < 0)) + index = 0; + + while (index >= adapter->num_tx_queues) + index -= adapter->num_tx_queues; + + return adapter->tx_ring[index]; +} + +static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) +{ + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); + int cpu = smp_processor_id(); + struct netdev_queue *nq; + struct igc_ring *ring; + int res; + + if (unlikely(!xdpf)) + return -EFAULT; + + ring = igc_xdp_get_tx_ring(adapter, cpu); + nq = txring_txq(ring); + + __netif_tx_lock(nq, cpu); + res = igc_xdp_init_tx_descriptor(ring, xdpf); + __netif_tx_unlock(nq); + return res; +} + +static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter, + struct xdp_buff *xdp) +{ + struct bpf_prog *prog; + int res; + u32 act; + + rcu_read_lock(); + + prog = READ_ONCE(adapter->xdp_prog); + if (!prog) { + res = IGC_XDP_PASS; + goto unlock; + } + + act = bpf_prog_run_xdp(prog, xdp); + switch (act) { + case XDP_PASS: + res = IGC_XDP_PASS; + break; + case XDP_TX: + if (igc_xdp_xmit_back(adapter, xdp) < 0) + res = IGC_XDP_CONSUMED; + else + res = IGC_XDP_TX; + break; + case XDP_REDIRECT: + if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0) + res = IGC_XDP_CONSUMED; + else + res = IGC_XDP_REDIRECT; + break; + default: + bpf_warn_invalid_xdp_action(act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(adapter->netdev, prog, act); + fallthrough; + case XDP_DROP: + res = IGC_XDP_CONSUMED; + break; + } + +unlock: + rcu_read_unlock(); + return ERR_PTR(-res); +} + +/* This function assumes __netif_tx_lock is held by the caller. */ +static void igc_flush_tx_descriptors(struct igc_ring *ring) +{ + /* Once tail pointer is updated, hardware can fetch the descriptors + * any time so we issue a write membar here to ensure all memory + * writes are complete before the tail pointer is updated. + */ + wmb(); + writel(ring->next_to_use, ring->tail); +} + +static void igc_finalize_xdp(struct igc_adapter *adapter, int status) +{ + int cpu = smp_processor_id(); + struct netdev_queue *nq; + struct igc_ring *ring; + + if (status & IGC_XDP_TX) { + ring = igc_xdp_get_tx_ring(adapter, cpu); + nq = txring_txq(ring); + + __netif_tx_lock(nq, cpu); + igc_flush_tx_descriptors(ring); + __netif_tx_unlock(nq); + } + + if (status & IGC_XDP_REDIRECT) + xdp_do_flush(); +} + static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) { unsigned int total_bytes = 0, total_packets = 0; + struct igc_adapter *adapter = q_vector->adapter; struct igc_ring *rx_ring = q_vector->rx.ring; struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = igc_desc_unused(rx_ring); + int xdp_status = 0, rx_buffer_pgcnt; while (likely(total_packets < budget)) { union igc_adv_rx_desc *rx_desc; struct igc_rx_buffer *rx_buffer; - unsigned int size; + unsigned int size, truesize; + ktime_t timestamp = 0; + struct xdp_buff xdp; + int pkt_offset = 0; + void *pktbuf; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IGC_RX_BUFFER_WRITE) { @@ -1908,16 +2138,52 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) */ dma_rmb(); - rx_buffer = igc_get_rx_buffer(rx_ring, size); + rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); + truesize = igc_get_rx_frame_truesize(rx_ring, size); + + pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; - /* retrieve a buffer from the ring */ - if (skb) + if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { + timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, + pktbuf); + pkt_offset = IGC_TS_HDR_LEN; + size -= IGC_TS_HDR_LEN; + } + + if (!skb) { + xdp.data = pktbuf + pkt_offset; + xdp.data_end = xdp.data + size; + xdp.data_hard_start = pktbuf - igc_rx_offset(rx_ring); + xdp_set_data_meta_invalid(&xdp); + xdp.frame_sz = truesize; + xdp.rxq = &rx_ring->xdp_rxq; + + skb = igc_xdp_run_prog(adapter, &xdp); + } + + if (IS_ERR(skb)) { + unsigned int xdp_res = -PTR_ERR(skb); + + switch (xdp_res) { + case IGC_XDP_CONSUMED: + rx_buffer->pagecnt_bias++; + break; + case IGC_XDP_TX: + case IGC_XDP_REDIRECT: + igc_rx_buffer_flip(rx_buffer, truesize); + xdp_status |= xdp_res; + break; + } + + total_packets++; + total_bytes += size; + } else if (skb) igc_add_rx_frag(rx_ring, rx_buffer, skb, size); else if (ring_uses_build_skb(rx_ring)) skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size); else - skb = igc_construct_skb(rx_ring, rx_buffer, - rx_desc, size); + skb = igc_construct_skb(rx_ring, rx_buffer, &xdp, + timestamp); /* exit if we failed to retrieve a buffer */ if (!skb) { @@ -1926,7 +2192,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) break; } - igc_put_rx_buffer(rx_ring, rx_buffer); + igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -1954,6 +2220,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) total_packets++; } + if (xdp_status) + igc_finalize_xdp(adapter, xdp_status); + /* place incomplete frames back on ring for completion */ rx_ring->skb = skb; @@ -2015,8 +2284,10 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) total_bytes += tx_buffer->bytecount; total_packets += tx_buffer->gso_segs; - /* free the skb */ - napi_consume_skb(tx_buffer->skb, napi_budget); + if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP) + xdp_return_frame(tx_buffer->xdpf); + else + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -3580,7 +3851,7 @@ void igc_up(struct igc_adapter *adapter) netif_tx_start_all_queues(adapter->netdev); /* start the watchdog. */ - hw->mac.get_link_status = 1; + hw->mac.get_link_status = true; schedule_work(&adapter->watchdog_task); } @@ -3831,10 +4102,19 @@ static void igc_reset_task(struct work_struct *work) adapter = container_of(work, struct igc_adapter, reset_task); + rtnl_lock(); + /* If we're already down or resetting, just bail */ + if (test_bit(__IGC_DOWN, &adapter->state) || + test_bit(__IGC_RESETTING, &adapter->state)) { + rtnl_unlock(); + return; + } + igc_rings_dump(adapter); igc_regs_dump(adapter); netdev_err(adapter->netdev, "Reset adapter\n"); igc_reinit_locked(adapter); + rtnl_unlock(); } /** @@ -3849,6 +4129,11 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu) int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; struct igc_adapter *adapter = netdev_priv(netdev); + if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) { + netdev_dbg(netdev, "Jumbo frames not supported with XDP"); + return -EINVAL; + } + /* adjust max frame to be at least the size of a standard frame */ if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; @@ -3965,9 +4250,20 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev, static void igc_tsync_interrupt(struct igc_adapter *adapter) { + u32 ack, tsauxc, sec, nsec, tsicr; struct igc_hw *hw = &adapter->hw; - u32 tsicr = rd32(IGC_TSICR); - u32 ack = 0; + struct ptp_clock_event event; + struct timespec64 ts; + + tsicr = rd32(IGC_TSICR); + ack = 0; + + if (tsicr & IGC_TSICR_SYS_WRAP) { + event.type = PTP_CLOCK_PPS; + if (adapter->ptp_caps.pps) + ptp_clock_event(adapter->ptp_clock, &event); + ack |= IGC_TSICR_SYS_WRAP; + } if (tsicr & IGC_TSICR_TXTS) { /* retrieve hardware timestamp */ @@ -3975,6 +4271,54 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) ack |= IGC_TSICR_TXTS; } + if (tsicr & IGC_TSICR_TT0) { + spin_lock(&adapter->tmreg_lock); + ts = timespec64_add(adapter->perout[0].start, + adapter->perout[0].period); + wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); + wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec); + tsauxc = rd32(IGC_TSAUXC); + tsauxc |= IGC_TSAUXC_EN_TT0; + wr32(IGC_TSAUXC, tsauxc); + adapter->perout[0].start = ts; + spin_unlock(&adapter->tmreg_lock); + ack |= IGC_TSICR_TT0; + } + + if (tsicr & IGC_TSICR_TT1) { + spin_lock(&adapter->tmreg_lock); + ts = timespec64_add(adapter->perout[1].start, + adapter->perout[1].period); + wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); + wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec); + tsauxc = rd32(IGC_TSAUXC); + tsauxc |= IGC_TSAUXC_EN_TT1; + wr32(IGC_TSAUXC, tsauxc); + adapter->perout[1].start = ts; + spin_unlock(&adapter->tmreg_lock); + ack |= IGC_TSICR_TT1; + } + + if (tsicr & IGC_TSICR_AUTT0) { + nsec = rd32(IGC_AUXSTMPL0); + sec = rd32(IGC_AUXSTMPH0); + event.type = PTP_CLOCK_EXTTS; + event.index = 0; + event.timestamp = sec * NSEC_PER_SEC + nsec; + ptp_clock_event(adapter->ptp_clock, &event); + ack |= IGC_TSICR_AUTT0; + } + + if (tsicr & IGC_TSICR_AUTT1) { + nsec = rd32(IGC_AUXSTMPL1); + sec = rd32(IGC_AUXSTMPH1); + event.type = PTP_CLOCK_EXTTS; + event.index = 1; + event.timestamp = sec * NSEC_PER_SEC + nsec; + ptp_clock_event(adapter->ptp_clock, &event); + ack |= IGC_TSICR_AUTT1; + } + /* acknowledge the interrupts */ wr32(IGC_TSICR, ack); } @@ -4000,7 +4344,7 @@ static irqreturn_t igc_msix_other(int irq, void *data) } if (icr & IGC_ICR_LSC) { - hw->mac.get_link_status = 1; + hw->mac.get_link_status = true; /* guard against interrupt when we're going down */ if (!test_bit(__IGC_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); @@ -4378,7 +4722,7 @@ static irqreturn_t igc_intr_msi(int irq, void *data) } if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { - hw->mac.get_link_status = 1; + hw->mac.get_link_status = true; if (!test_bit(__IGC_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); } @@ -4420,7 +4764,7 @@ static irqreturn_t igc_intr(int irq, void *data) } if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { - hw->mac.get_link_status = 1; + hw->mac.get_link_status = true; /* guard against interrupt when we're going down */ if (!test_bit(__IGC_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); @@ -4574,7 +4918,7 @@ static int __igc_open(struct net_device *netdev, bool resuming) netif_tx_start_all_queues(netdev); /* start the watchdog. */ - hw->mac.get_link_status = 1; + hw->mac.get_link_status = true; schedule_work(&adapter->watchdog_task); return IGC_SUCCESS; @@ -4835,6 +5179,58 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, } } +static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf) +{ + struct igc_adapter *adapter = netdev_priv(dev); + + switch (bpf->command) { + case XDP_SETUP_PROG: + return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack); + default: + return -EOPNOTSUPP; + } +} + +static int igc_xdp_xmit(struct net_device *dev, int num_frames, + struct xdp_frame **frames, u32 flags) +{ + struct igc_adapter *adapter = netdev_priv(dev); + int cpu = smp_processor_id(); + struct netdev_queue *nq; + struct igc_ring *ring; + int i, drops; + + if (unlikely(test_bit(__IGC_DOWN, &adapter->state))) + return -ENETDOWN; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + ring = igc_xdp_get_tx_ring(adapter, cpu); + nq = txring_txq(ring); + + __netif_tx_lock(nq, cpu); + + drops = 0; + for (i = 0; i < num_frames; i++) { + int err; + struct xdp_frame *xdpf = frames[i]; + + err = igc_xdp_init_tx_descriptor(ring, xdpf); + if (err) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + + if (flags & XDP_XMIT_FLUSH) + igc_flush_tx_descriptors(ring); + + __netif_tx_unlock(nq); + + return num_frames - drops; +} + static const struct net_device_ops igc_netdev_ops = { .ndo_open = igc_open, .ndo_stop = igc_close, @@ -4848,6 +5244,8 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_features_check = igc_features_check, .ndo_do_ioctl = igc_ioctl, .ndo_setup_tc = igc_setup_tc, + .ndo_bpf = igc_bpf, + .ndo_xdp_xmit = igc_xdp_xmit, }; /* PCIe configuration access */ @@ -4915,7 +5313,7 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx) { struct igc_mac_info *mac = &adapter->hw.mac; - mac->autoneg = 0; + mac->autoneg = false; /* Make sure dplx is at most 1 bit and lsb of speed is not set * for the switch() below to work @@ -4937,13 +5335,13 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx) mac->forced_speed_duplex = ADVERTISE_100_FULL; break; case SPEED_1000 + DUPLEX_FULL: - mac->autoneg = 1; + mac->autoneg = true; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case SPEED_1000 + DUPLEX_HALF: /* not supported */ goto err_inval; case SPEED_2500 + DUPLEX_FULL: - mac->autoneg = 1; + mac->autoneg = true; adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL; break; case SPEED_2500 + DUPLEX_HALF: /* not supported */ diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index ac0b9c85da7c..69617d2c1be2 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -120,12 +120,289 @@ static int igc_ptp_settime_i225(struct ptp_clock_info *ptp, return 0; } +static void igc_pin_direction(int pin, int input, u32 *ctrl, u32 *ctrl_ext) +{ + u32 *ptr = pin < 2 ? ctrl : ctrl_ext; + static const u32 mask[IGC_N_SDP] = { + IGC_CTRL_SDP0_DIR, + IGC_CTRL_SDP1_DIR, + IGC_CTRL_EXT_SDP2_DIR, + IGC_CTRL_EXT_SDP3_DIR, + }; + + if (input) + *ptr &= ~mask[pin]; + else + *ptr |= mask[pin]; +} + +static void igc_pin_perout(struct igc_adapter *igc, int chan, int pin, int freq) +{ + static const u32 igc_aux0_sel_sdp[IGC_N_SDP] = { + IGC_AUX0_SEL_SDP0, IGC_AUX0_SEL_SDP1, IGC_AUX0_SEL_SDP2, IGC_AUX0_SEL_SDP3, + }; + static const u32 igc_aux1_sel_sdp[IGC_N_SDP] = { + IGC_AUX1_SEL_SDP0, IGC_AUX1_SEL_SDP1, IGC_AUX1_SEL_SDP2, IGC_AUX1_SEL_SDP3, + }; + static const u32 igc_ts_sdp_en[IGC_N_SDP] = { + IGC_TS_SDP0_EN, IGC_TS_SDP1_EN, IGC_TS_SDP2_EN, IGC_TS_SDP3_EN, + }; + static const u32 igc_ts_sdp_sel_tt0[IGC_N_SDP] = { + IGC_TS_SDP0_SEL_TT0, IGC_TS_SDP1_SEL_TT0, + IGC_TS_SDP2_SEL_TT0, IGC_TS_SDP3_SEL_TT0, + }; + static const u32 igc_ts_sdp_sel_tt1[IGC_N_SDP] = { + IGC_TS_SDP0_SEL_TT1, IGC_TS_SDP1_SEL_TT1, + IGC_TS_SDP2_SEL_TT1, IGC_TS_SDP3_SEL_TT1, + }; + static const u32 igc_ts_sdp_sel_fc0[IGC_N_SDP] = { + IGC_TS_SDP0_SEL_FC0, IGC_TS_SDP1_SEL_FC0, + IGC_TS_SDP2_SEL_FC0, IGC_TS_SDP3_SEL_FC0, + }; + static const u32 igc_ts_sdp_sel_fc1[IGC_N_SDP] = { + IGC_TS_SDP0_SEL_FC1, IGC_TS_SDP1_SEL_FC1, + IGC_TS_SDP2_SEL_FC1, IGC_TS_SDP3_SEL_FC1, + }; + static const u32 igc_ts_sdp_sel_clr[IGC_N_SDP] = { + IGC_TS_SDP0_SEL_FC1, IGC_TS_SDP1_SEL_FC1, + IGC_TS_SDP2_SEL_FC1, IGC_TS_SDP3_SEL_FC1, + }; + struct igc_hw *hw = &igc->hw; + u32 ctrl, ctrl_ext, tssdp = 0; + + ctrl = rd32(IGC_CTRL); + ctrl_ext = rd32(IGC_CTRL_EXT); + tssdp = rd32(IGC_TSSDP); + + igc_pin_direction(pin, 0, &ctrl, &ctrl_ext); + + /* Make sure this pin is not enabled as an input. */ + if ((tssdp & IGC_AUX0_SEL_SDP3) == igc_aux0_sel_sdp[pin]) + tssdp &= ~IGC_AUX0_TS_SDP_EN; + + if ((tssdp & IGC_AUX1_SEL_SDP3) == igc_aux1_sel_sdp[pin]) + tssdp &= ~IGC_AUX1_TS_SDP_EN; + + tssdp &= ~igc_ts_sdp_sel_clr[pin]; + if (freq) { + if (chan == 1) + tssdp |= igc_ts_sdp_sel_fc1[pin]; + else + tssdp |= igc_ts_sdp_sel_fc0[pin]; + } else { + if (chan == 1) + tssdp |= igc_ts_sdp_sel_tt1[pin]; + else + tssdp |= igc_ts_sdp_sel_tt0[pin]; + } + tssdp |= igc_ts_sdp_en[pin]; + + wr32(IGC_TSSDP, tssdp); + wr32(IGC_CTRL, ctrl); + wr32(IGC_CTRL_EXT, ctrl_ext); +} + +static void igc_pin_extts(struct igc_adapter *igc, int chan, int pin) +{ + static const u32 igc_aux0_sel_sdp[IGC_N_SDP] = { + IGC_AUX0_SEL_SDP0, IGC_AUX0_SEL_SDP1, IGC_AUX0_SEL_SDP2, IGC_AUX0_SEL_SDP3, + }; + static const u32 igc_aux1_sel_sdp[IGC_N_SDP] = { + IGC_AUX1_SEL_SDP0, IGC_AUX1_SEL_SDP1, IGC_AUX1_SEL_SDP2, IGC_AUX1_SEL_SDP3, + }; + static const u32 igc_ts_sdp_en[IGC_N_SDP] = { + IGC_TS_SDP0_EN, IGC_TS_SDP1_EN, IGC_TS_SDP2_EN, IGC_TS_SDP3_EN, + }; + struct igc_hw *hw = &igc->hw; + u32 ctrl, ctrl_ext, tssdp = 0; + + ctrl = rd32(IGC_CTRL); + ctrl_ext = rd32(IGC_CTRL_EXT); + tssdp = rd32(IGC_TSSDP); + + igc_pin_direction(pin, 1, &ctrl, &ctrl_ext); + + /* Make sure this pin is not enabled as an output. */ + tssdp &= ~igc_ts_sdp_en[pin]; + + if (chan == 1) { + tssdp &= ~IGC_AUX1_SEL_SDP3; + tssdp |= igc_aux1_sel_sdp[pin] | IGC_AUX1_TS_SDP_EN; + } else { + tssdp &= ~IGC_AUX0_SEL_SDP3; + tssdp |= igc_aux0_sel_sdp[pin] | IGC_AUX0_TS_SDP_EN; + } + + wr32(IGC_TSSDP, tssdp); + wr32(IGC_CTRL, ctrl); + wr32(IGC_CTRL_EXT, ctrl_ext); +} + static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) { + struct igc_adapter *igc = + container_of(ptp, struct igc_adapter, ptp_caps); + struct igc_hw *hw = &igc->hw; + unsigned long flags; + struct timespec64 ts; + int use_freq = 0, pin = -1; + u32 tsim, tsauxc, tsauxc_mask, tsim_mask, trgttiml, trgttimh, freqout; + s64 ns; + + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + /* Reject requests with unsupported flags */ + if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | + PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS)) + return -EOPNOTSUPP; + + /* Reject requests failing to enable both edges. */ + if ((rq->extts.flags & PTP_STRICT_FLAGS) && + (rq->extts.flags & PTP_ENABLE_FEATURE) && + (rq->extts.flags & PTP_EXTTS_EDGES) != PTP_EXTTS_EDGES) + return -EOPNOTSUPP; + + if (on) { + pin = ptp_find_pin(igc->ptp_clock, PTP_PF_EXTTS, + rq->extts.index); + if (pin < 0) + return -EBUSY; + } + if (rq->extts.index == 1) { + tsauxc_mask = IGC_TSAUXC_EN_TS1; + tsim_mask = IGC_TSICR_AUTT1; + } else { + tsauxc_mask = IGC_TSAUXC_EN_TS0; + tsim_mask = IGC_TSICR_AUTT0; + } + spin_lock_irqsave(&igc->tmreg_lock, flags); + tsauxc = rd32(IGC_TSAUXC); + tsim = rd32(IGC_TSIM); + if (on) { + igc_pin_extts(igc, rq->extts.index, pin); + tsauxc |= tsauxc_mask; + tsim |= tsim_mask; + } else { + tsauxc &= ~tsauxc_mask; + tsim &= ~tsim_mask; + } + wr32(IGC_TSAUXC, tsauxc); + wr32(IGC_TSIM, tsim); + spin_unlock_irqrestore(&igc->tmreg_lock, flags); + return 0; + + case PTP_CLK_REQ_PEROUT: + /* Reject requests with unsupported flags */ + if (rq->perout.flags) + return -EOPNOTSUPP; + + if (on) { + pin = ptp_find_pin(igc->ptp_clock, PTP_PF_PEROUT, + rq->perout.index); + if (pin < 0) + return -EBUSY; + } + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + ns = timespec64_to_ns(&ts); + ns = ns >> 1; + if (on && (ns <= 70000000LL || ns == 125000000LL || + ns == 250000000LL || ns == 500000000LL)) { + if (ns < 8LL) + return -EINVAL; + use_freq = 1; + } + ts = ns_to_timespec64(ns); + if (rq->perout.index == 1) { + if (use_freq) { + tsauxc_mask = IGC_TSAUXC_EN_CLK1; + tsim_mask = 0; + } else { + tsauxc_mask = IGC_TSAUXC_EN_TT1; + tsim_mask = IGC_TSICR_TT1; + } + trgttiml = IGC_TRGTTIML1; + trgttimh = IGC_TRGTTIMH1; + freqout = IGC_FREQOUT1; + } else { + if (use_freq) { + tsauxc_mask = IGC_TSAUXC_EN_CLK0; + tsim_mask = 0; + } else { + tsauxc_mask = IGC_TSAUXC_EN_TT0; + tsim_mask = IGC_TSICR_TT0; + } + trgttiml = IGC_TRGTTIML0; + trgttimh = IGC_TRGTTIMH0; + freqout = IGC_FREQOUT0; + } + spin_lock_irqsave(&igc->tmreg_lock, flags); + tsauxc = rd32(IGC_TSAUXC); + tsim = rd32(IGC_TSIM); + if (rq->perout.index == 1) { + tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1); + tsim &= ~IGC_TSICR_TT1; + } else { + tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0); + tsim &= ~IGC_TSICR_TT0; + } + if (on) { + int i = rq->perout.index; + + igc_pin_perout(igc, i, pin, use_freq); + igc->perout[i].start.tv_sec = rq->perout.start.sec; + igc->perout[i].start.tv_nsec = rq->perout.start.nsec; + igc->perout[i].period.tv_sec = ts.tv_sec; + igc->perout[i].period.tv_nsec = ts.tv_nsec; + wr32(trgttimh, rq->perout.start.sec); + /* For now, always select timer 0 as source. */ + wr32(trgttiml, rq->perout.start.nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); + if (use_freq) + wr32(freqout, ns); + tsauxc |= tsauxc_mask; + tsim |= tsim_mask; + } + wr32(IGC_TSAUXC, tsauxc); + wr32(IGC_TSIM, tsim); + spin_unlock_irqrestore(&igc->tmreg_lock, flags); + return 0; + + case PTP_CLK_REQ_PPS: + spin_lock_irqsave(&igc->tmreg_lock, flags); + tsim = rd32(IGC_TSIM); + if (on) + tsim |= IGC_TSICR_SYS_WRAP; + else + tsim &= ~IGC_TSICR_SYS_WRAP; + igc->pps_sys_wrap_on = on; + wr32(IGC_TSIM, tsim); + spin_unlock_irqrestore(&igc->tmreg_lock, flags); + return 0; + + default: + break; + } + return -EOPNOTSUPP; } +static int igc_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + switch (func) { + case PTP_PF_NONE: + case PTP_PF_EXTTS: + case PTP_PF_PEROUT: + break; + case PTP_PF_PHYSYNC: + return -1; + } + return 0; +} + /** * igc_ptp_systim_to_hwtstamp - convert system time value to HW timestamp * @adapter: board private structure @@ -152,49 +429,58 @@ static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter, } /** - * igc_ptp_rx_pktstamp - retrieve Rx per packet timestamp - * @q_vector: Pointer to interrupt specific structure - * @va: Pointer to address containing Rx buffer - * @skb: Buffer containing timestamp and packet + * igc_ptp_rx_pktstamp - Retrieve timestamp from Rx packet buffer + * @adapter: Pointer to adapter the packet buffer belongs to + * @buf: Pointer to packet buffer * - * This function is meant to retrieve the first timestamp from the - * first buffer of an incoming frame. The value is stored in little - * endian format starting on byte 0. There's a second timestamp - * starting on byte 8. - **/ -void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va, - struct sk_buff *skb) + * This function retrieves the timestamp saved in the beginning of packet + * buffer. While two timestamps are available, one in timer0 reference and the + * other in timer1 reference, this function considers only the timestamp in + * timer0 reference. + * + * Returns timestamp value. + */ +ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf) { - struct igc_adapter *adapter = q_vector->adapter; - __le64 *regval = (__le64 *)va; - int adjust = 0; - - /* The timestamp is recorded in little endian format. - * DWORD: | 0 | 1 | 2 | 3 - * Field: | Timer0 Low | Timer0 High | Timer1 Low | Timer1 High + ktime_t timestamp; + u32 secs, nsecs; + int adjust; + + /* Timestamps are saved in little endian at the beginning of the packet + * buffer following the layout: + * + * DWORD: | 0 | 1 | 2 | 3 | + * Field: | Timer1 SYSTIML | Timer1 SYSTIMH | Timer0 SYSTIML | Timer0 SYSTIMH | + * + * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds + * part of the timestamp. */ - igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), - le64_to_cpu(regval[0])); - - /* adjust timestamp for the RX latency based on link speed */ - if (adapter->hw.mac.type == igc_i225) { - switch (adapter->link_speed) { - case SPEED_10: - adjust = IGC_I225_RX_LATENCY_10; - break; - case SPEED_100: - adjust = IGC_I225_RX_LATENCY_100; - break; - case SPEED_1000: - adjust = IGC_I225_RX_LATENCY_1000; - break; - case SPEED_2500: - adjust = IGC_I225_RX_LATENCY_2500; - break; - } + nsecs = le32_to_cpu(buf[2]); + secs = le32_to_cpu(buf[3]); + + timestamp = ktime_set(secs, nsecs); + + /* Adjust timestamp for the RX latency based on link speed */ + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGC_I225_RX_LATENCY_10; + break; + case SPEED_100: + adjust = IGC_I225_RX_LATENCY_100; + break; + case SPEED_1000: + adjust = IGC_I225_RX_LATENCY_1000; + break; + case SPEED_2500: + adjust = IGC_I225_RX_LATENCY_2500; + break; + default: + adjust = 0; + netdev_warn_once(adapter->netdev, "Imprecise timestamp\n"); + break; } - skb_hwtstamps(skb)->hwtstamp = - ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); + + return ktime_sub_ns(timestamp, adjust); } static void igc_ptp_disable_rx_timestamp(struct igc_adapter *adapter) @@ -477,9 +763,17 @@ void igc_ptp_init(struct igc_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct igc_hw *hw = &adapter->hw; + int i; switch (hw->mac.type) { case igc_i225: + for (i = 0; i < IGC_N_SDP; i++) { + struct ptp_pin_desc *ppd = &adapter->sdp_config[i]; + + snprintf(ppd->name, sizeof(ppd->name), "SDP%d", i); + ppd->index = i; + ppd->func = PTP_PF_NONE; + } snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); adapter->ptp_caps.owner = THIS_MODULE; adapter->ptp_caps.max_adj = 62499999; @@ -488,6 +782,12 @@ void igc_ptp_init(struct igc_adapter *adapter) adapter->ptp_caps.gettimex64 = igc_ptp_gettimex64_i225; adapter->ptp_caps.settime64 = igc_ptp_settime_i225; adapter->ptp_caps.enable = igc_ptp_feature_enable_i225; + adapter->ptp_caps.pps = 1; + adapter->ptp_caps.pin_config = adapter->sdp_config; + adapter->ptp_caps.n_ext_ts = IGC_N_EXTTS; + adapter->ptp_caps.n_per_out = IGC_N_PEROUT; + adapter->ptp_caps.n_pins = IGC_N_SDP; + adapter->ptp_caps.verify = igc_ptp_verify_pin; break; default: adapter->ptp_clock = NULL; @@ -589,7 +889,9 @@ void igc_ptp_reset(struct igc_adapter *adapter) case igc_i225: wr32(IGC_TSAUXC, 0x0); wr32(IGC_TSSDP, 0x0); - wr32(IGC_TSIM, IGC_TSICR_INTERRUPTS); + wr32(IGC_TSIM, + IGC_TSICR_INTERRUPTS | + (adapter->pps_sys_wrap_on ? IGC_TSICR_SYS_WRAP : 0)); wr32(IGC_IMS, IGC_IMS_TS); break; default: diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 3e5cb7aef9da..cc174853554b 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -192,6 +192,16 @@ #define IGC_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */ #define IGC_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */ #define IGC_TSSDP 0x0003C /* Time Sync SDP Configuration Register - RW */ +#define IGC_TRGTTIML0 0x0B644 /* Target Time Register 0 Low - RW */ +#define IGC_TRGTTIMH0 0x0B648 /* Target Time Register 0 High - RW */ +#define IGC_TRGTTIML1 0x0B64C /* Target Time Register 1 Low - RW */ +#define IGC_TRGTTIMH1 0x0B650 /* Target Time Register 1 High - RW */ +#define IGC_FREQOUT0 0x0B654 /* Frequency Out 0 Control Register - RW */ +#define IGC_FREQOUT1 0x0B658 /* Frequency Out 1 Control Register - RW */ +#define IGC_AUXSTMPL0 0x0B65C /* Auxiliary Time Stamp 0 Register Low - RO */ +#define IGC_AUXSTMPH0 0x0B660 /* Auxiliary Time Stamp 0 Register High - RO */ +#define IGC_AUXSTMPL1 0x0B664 /* Auxiliary Time Stamp 1 Register Low - RO */ +#define IGC_AUXSTMPH1 0x0B668 /* Auxiliary Time Stamp 1 Register High - RO */ #define IGC_IMIR(_i) (0x05A80 + ((_i) * 4)) /* Immediate Interrupt */ #define IGC_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* Immediate INTR Ext*/ diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c new file mode 100644 index 000000000000..11133c4619bb --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_xdp.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Intel Corporation. */ + +#include "igc.h" +#include "igc_xdp.h" + +int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = adapter->netdev; + bool if_running = netif_running(dev); + struct bpf_prog *old_prog; + + if (dev->mtu > ETH_DATA_LEN) { + /* For now, the driver doesn't support XDP functionality with + * jumbo frames so we return error. + */ + NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported"); + return -EOPNOTSUPP; + } + + if (if_running) + igc_close(dev); + + old_prog = xchg(&adapter->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + if (if_running) + igc_open(dev); + + return 0; +} + +int igc_xdp_register_rxq_info(struct igc_ring *ring) +{ + struct net_device *dev = ring->netdev; + int err; + + err = xdp_rxq_info_reg(&ring->xdp_rxq, dev, ring->queue_index, 0); + if (err) { + netdev_err(dev, "Failed to register xdp rxq info\n"); + return err; + } + + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, + NULL); + if (err) { + netdev_err(dev, "Failed to register xdp rxq mem model\n"); + xdp_rxq_info_unreg(&ring->xdp_rxq); + return err; + } + + return 0; +} + +void igc_xdp_unregister_rxq_info(struct igc_ring *ring) +{ + xdp_rxq_info_unreg(&ring->xdp_rxq); +} diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.h b/drivers/net/ethernet/intel/igc/igc_xdp.h new file mode 100644 index 000000000000..cfecb515b718 --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_xdp.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020, Intel Corporation. */ + +#ifndef _IGC_XDP_H_ +#define _IGC_XDP_H_ + +int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog, + struct netlink_ext_ack *extack); + +int igc_xdp_register_rxq_info(struct igc_ring *ring); +void igc_xdp_unregister_rxq_info(struct igc_ring *ring); + +#endif /* _IGC_XDP_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 8d3798a32f0e..e324e42fab2d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -1351,7 +1351,7 @@ static u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input, } /** - * ixgbe_atr_add_signature_filter_82599 - Adds a signature hash filter + * ixgbe_fdir_add_signature_filter_82599 - Adds a signature hash filter * @hw: pointer to hardware structure * @input: unique input dword * @common: compressed common input dword @@ -1542,6 +1542,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, switch (input_mask->formatted.vm_pool & 0x7F) { case 0x0: fdirm |= IXGBE_FDIRM_POOL; + break; case 0x7F: break; default: @@ -1557,6 +1558,7 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, hw_dbg(hw, " Error on src/dst port mask\n"); return IXGBE_ERR_CONFIG; } + break; case IXGBE_ATR_L4TYPE_MASK: break; default: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 62ddb452f862..03ccbe6b66d2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -93,6 +93,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) default: break; } + break; default: break; } @@ -2707,7 +2708,7 @@ s32 ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw) } /** - * ixgbe_enable_rx_buff - Enables the receive data path + * ixgbe_enable_rx_buff_generic - Enables the receive data path * @hw: pointer to hardware structure * * Enables the receive data path @@ -3029,14 +3030,14 @@ s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) } /** + * ixgbe_set_vmdq_san_mac_generic - Associate VMDq pool index with a rx address + * @hw: pointer to hardware struct + * @vmdq: VMDq pool index + * * This function should only be involved in the IOV mode. * In IOV mode, Default pool is next pool after the number of * VFs advertized and not 0. * MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index] - * - * ixgbe_set_vmdq_san_mac - Associate default VMDq pool index with a rx address - * @hw: pointer to hardware struct - * @vmdq: VMDq pool index **/ s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq) { @@ -3896,7 +3897,7 @@ static s32 ixgbe_get_ets_data(struct ixgbe_hw *hw, u16 *ets_cfg, } /** - * ixgbe_get_thermal_sensor_data - Gathers thermal sensor data + * ixgbe_get_thermal_sensor_data_generic - Gathers thermal sensor data * @hw: pointer to hardware structure * * Returns the thermal sensor data structure @@ -4054,8 +4055,7 @@ void ixgbe_get_orom_version(struct ixgbe_hw *hw, } /** - * ixgbe_get_oem_prod_version Etrack ID from EEPROM - * + * ixgbe_get_oem_prod_version - Etrack ID from EEPROM * @hw: pointer to hardware structure * @nvm_ver: pointer to output structure * diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c index c00332d2e02a..72e6ebffea33 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c @@ -361,7 +361,7 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev) } #ifdef IXGBE_FCOE - /* Reprogam FCoE hardware offloads when the traffic class + /* Reprogram FCoE hardware offloads when the traffic class * FCoE is using changes. This happens if the APP info * changes or the up2tc mapping is updated. */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index a280aa34ca1d..4ceaca0f6ce3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1368,45 +1368,33 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, static void ixgbe_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { - char *p = (char *)data; unsigned int i; + u8 *p = data; switch (stringset) { case ETH_SS_TEST: - for (i = 0; i < IXGBE_TEST_LEN; i++) { - memcpy(data, ixgbe_gstrings_test[i], ETH_GSTRING_LEN); - data += ETH_GSTRING_LEN; - } + for (i = 0; i < IXGBE_TEST_LEN; i++) + ethtool_sprintf(&p, ixgbe_gstrings_test[i]); break; case ETH_SS_STATS: - for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) { - memcpy(p, ixgbe_gstrings_stats[i].stat_string, - ETH_GSTRING_LEN); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) + ethtool_sprintf(&p, + ixgbe_gstrings_stats[i].stat_string); for (i = 0; i < netdev->num_tx_queues; i++) { - sprintf(p, "tx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "tx_queue_%u_packets", i); + ethtool_sprintf(&p, "tx_queue_%u_bytes", i); } for (i = 0; i < IXGBE_NUM_RX_QUEUES; i++) { - sprintf(p, "rx_queue_%u_packets", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_queue_%u_bytes", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "rx_queue_%u_packets", i); + ethtool_sprintf(&p, "rx_queue_%u_bytes", i); } for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) { - sprintf(p, "tx_pb_%u_pxon", i); - p += ETH_GSTRING_LEN; - sprintf(p, "tx_pb_%u_pxoff", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "tx_pb_%u_pxon", i); + ethtool_sprintf(&p, "tx_pb_%u_pxoff", i); } for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) { - sprintf(p, "rx_pb_%u_pxon", i); - p += ETH_GSTRING_LEN; - sprintf(p, "rx_pb_%u_pxoff", i); - p += ETH_GSTRING_LEN; + ethtool_sprintf(&p, "rx_pb_%u_pxon", i); + ethtool_sprintf(&p, "rx_pb_%u_pxoff", i); } /* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */ break; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index df389a11d3af..0218f6c9b925 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -132,6 +132,7 @@ static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc, else *tx = (tc + 4) << 4; /* 96, 112 */ } + break; default: break; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 9f3f12e2ccf2..c5ec17d19c59 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -225,7 +225,7 @@ static s32 ixgbe_get_parent_bus_info(struct ixgbe_adapter *adapter) } /** - * ixgbe_check_from_parent - Determine whether PCIe info should come from parent + * ixgbe_pcie_from_parent - Determine whether PCIe info should come from parent * @hw: hw specific details * * This function is used by probe to determine whether a device's PCI-Express @@ -4118,6 +4118,8 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, #endif } + ring->rx_offset = ixgbe_rx_offset(ring); + if (ring->xsk_pool && hw->mac.type != ixgbe_mac_82599EB) { u32 xsk_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool); @@ -6156,7 +6158,7 @@ void ixgbe_down(struct ixgbe_adapter *adapter) } /** - * ixgbe_eee_capable - helper function to determine EEE support on X550 + * ixgbe_set_eee_capable - helper function to determine EEE support on X550 * @adapter: board private structure */ static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter) @@ -6534,6 +6536,13 @@ err_setup_tx: return err; } +static int ixgbe_rx_napi_id(struct ixgbe_ring *rx_ring) +{ + struct ixgbe_q_vector *q_vector = rx_ring->q_vector; + + return q_vector ? q_vector->napi.napi_id : 0; +} + /** * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors) * @adapter: pointer to ixgbe_adapter @@ -6578,11 +6587,10 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter, rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; - rx_ring->rx_offset = ixgbe_rx_offset(rx_ring); /* XDP RX-queue info */ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, rx_ring->q_vector->napi.napi_id) < 0) + rx_ring->queue_index, ixgbe_rx_napi_id(rx_ring)) < 0) goto err; rx_ring->xdp_prog = adapter->xdp_prog; @@ -6891,6 +6899,11 @@ static int __maybe_unused ixgbe_resume(struct device *dev_d) adapter->hw.hw_addr = adapter->io_addr; + err = pci_enable_device_mem(pdev); + if (err) { + e_dev_err("Cannot enable PCI device from suspend\n"); + return err; + } smp_mb__before_atomic(); clear_bit(__IXGBE_DISABLED, &adapter->state); pci_set_master(pdev); @@ -10188,7 +10201,7 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n, { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_ring *ring; - int drops = 0; + int nxmit = 0; int i; if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) @@ -10212,16 +10225,15 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n, int err; err = ixgbe_xmit_xdp_ring(adapter, xdpf); - if (err != IXGBE_XDP_TX) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (err != IXGBE_XDP_TX) + break; + nxmit++; } if (unlikely(flags & XDP_XMIT_FLUSH)) ixgbe_xdp_ring_update_tail(ring); - return n - drops; + return nxmit; } static const struct net_device_ops ixgbe_netdev_ops = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index fc389eecdd2b..24aa97f993ca 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -380,6 +380,9 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) case X557_PHY_ID2: phy_type = ixgbe_phy_x550em_ext_t; break; + case BCM54616S_E_PHY_ID: + phy_type = ixgbe_phy_ext_1g_t; + break; default: phy_type = ixgbe_phy_unknown; break; @@ -461,12 +464,13 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) } /** - * ixgbe_read_phy_mdi - Reads a value from a specified PHY register without - * the SWFW lock + * ixgbe_read_phy_reg_mdi - read PHY register * @hw: pointer to hardware structure * @reg_addr: 32 bit address of PHY register to read * @device_type: 5 bit device type * @phy_data: Pointer to read data from PHY register + * + * Reads a value from a specified PHY register without the SWFW lock **/ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u16 *phy_data) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 22a874eee2e8..23ddfd79fc8b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -999,6 +999,7 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, switch (config->tx_type) { case HWTSTAMP_TX_OFF: tsync_tx_ctl = 0; + break; case HWTSTAMP_TX_ON: break; default: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 2be1c4c72435..2647937f7f4d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1407,6 +1407,7 @@ struct ixgbe_nvm_version { #define QT2022_PHY_ID 0x0043A400 #define ATH_PHY_ID 0x03429050 #define AQ_FW_REV 0x20 +#define BCM54616S_E_PHY_ID 0x03625D10 /* Special PHY Init Routine */ #define IXGBE_PHY_INIT_OFFSET_NL 0x002B @@ -3383,10 +3384,6 @@ struct ixgbe_hw_stats { /* forward declaration */ struct ixgbe_hw; -/* iterator type for walking multicast address lists */ -typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr, - u32 *vmdq); - /* Function pointer table */ struct ixgbe_eeprom_operations { s32 (*init_params)(struct ixgbe_hw *); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index 4b93ba149ec5..d5cfb51ff648 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -701,7 +701,7 @@ static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw) } /** - * ixgbe_release_nvm_semaphore - Release hardware semaphore + * ixgbe_release_swfw_sync_semaphore - Release hardware semaphore * @hw: pointer to hardware structure * * This function clears hardware semaphore bits. diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 5e339afa682a..9724ffb16518 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1248,7 +1248,7 @@ static s32 ixgbe_get_bus_info_X550em(struct ixgbe_hw *hw) } /** - * ixgbe_fw_recovery_mode - Check FW NVM recovery mode + * ixgbe_fw_recovery_mode_X550 - Check FW NVM recovery mode * @hw: pointer t hardware structure * * Returns true if in FW NVM recovery mode. diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 3771857cf887..91ad5b902673 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -104,6 +104,13 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, xdp_prog = READ_ONCE(rx_ring->xdp_prog); act = bpf_prog_run_xdp(xdp_prog, xdp); + if (likely(act == XDP_REDIRECT)) { + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + result = !err ? IXGBE_XDP_REDIR : IXGBE_XDP_CONSUMED; + rcu_read_unlock(); + return result; + } + switch (act) { case XDP_PASS: break; @@ -115,10 +122,6 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, } result = ixgbe_xmit_xdp_ring(adapter, xdpf); break; - case XDP_REDIRECT: - err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - result = !err ? IXGBE_XDP_REDIR : IXGBE_XDP_CONSUMED; - break; default: bpf_warn_invalid_xdp_action(act); fallthrough; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 449d7d5b280d..ba2ed8a43d2d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2633,6 +2633,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) adapter->num_rx_queues = rss; adapter->num_tx_queues = rss; adapter->num_xdp_queues = adapter->xdp_prog ? rss : 0; + break; default: break; } diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index bfe6dfcec4ab..5fc347abab3c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -121,9 +121,11 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw) } /** + * ixgbevf_hv_reset_hw_vf - reset via Hyper-V + * @hw: pointer to private hardware struct + * * Hyper-V variant; the VF/PF communication is through the PCI * config space. - * @hw: pointer to private hardware struct */ static s32 ixgbevf_hv_reset_hw_vf(struct ixgbe_hw *hw) { @@ -513,9 +515,11 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw, } /** - * Hyper-V variant - just a stub. + * ixgbevf_hv_update_mc_addr_list_vf - stub * @hw: unused * @netdev: unused + * + * Hyper-V variant - just a stub. */ static s32 ixgbevf_hv_update_mc_addr_list_vf(struct ixgbe_hw *hw, struct net_device *netdev) @@ -564,9 +568,11 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) } /** - * Hyper-V variant - just a stub. + * ixgbevf_hv_update_xcast_mode - stub * @hw: unused * @xcast_mode: unused + * + * Hyper-V variant - just a stub. */ static s32 ixgbevf_hv_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) { @@ -608,7 +614,7 @@ mbx_err: } /** - * Hyper-V variant - just a stub. + * ixgbevf_hv_set_vfta_vf - * Hyper-V variant - just a stub. * @hw: unused * @vlan: unused * @vind: unused @@ -726,11 +732,13 @@ out: } /** - * Hyper-V variant; there is no mailbox communication. + * ixgbevf_hv_check_mac_link_vf - check link * @hw: pointer to private hardware struct * @speed: pointer to link speed * @link_up: true is link is up, false otherwise * @autoneg_wait_to_complete: unused + * + * Hyper-V variant; there is no mailbox communication. */ static s32 ixgbevf_hv_check_mac_link_vf(struct ixgbe_hw *hw, ixgbe_link_speed *speed, diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index d1e9e306653b..1d8209df4162 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -16,9 +16,6 @@ struct ixgbe_hw; -/* iterator type for walking multicast address lists */ -typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr, - u32 *vmdq); struct ixgbe_mac_operations { s32 (*init_hw)(struct ixgbe_hw *); s32 (*reset_hw)(struct ixgbe_hw *); diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index e9efe074edc1..f1b9284e0bea 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -1265,9 +1265,9 @@ jme_stop_shutdown_timer(struct jme_adapter *jme) jwrite32f(jme, JME_APMC, apmc); } -static void jme_link_change_tasklet(struct tasklet_struct *t) +static void jme_link_change_work(struct work_struct *work) { - struct jme_adapter *jme = from_tasklet(jme, t, linkch_task); + struct jme_adapter *jme = container_of(work, struct jme_adapter, linkch_task); struct net_device *netdev = jme->dev; int rc; @@ -1510,7 +1510,7 @@ jme_intr_msi(struct jme_adapter *jme, u32 intrstat) * all other events are ignored */ jwrite32(jme, JME_IEVE, intrstat); - tasklet_schedule(&jme->linkch_task); + schedule_work(&jme->linkch_task); goto out_reenable; } @@ -1832,7 +1832,6 @@ jme_open(struct net_device *netdev) jme_clear_pm_disable_wol(jme); JME_NAPI_ENABLE(jme); - tasklet_setup(&jme->linkch_task, jme_link_change_tasklet); tasklet_setup(&jme->txclean_task, jme_tx_clean_tasklet); tasklet_setup(&jme->rxclean_task, jme_rx_clean_tasklet); tasklet_setup(&jme->rxempty_task, jme_rx_empty_tasklet); @@ -1920,7 +1919,7 @@ jme_close(struct net_device *netdev) JME_NAPI_DISABLE(jme); - tasklet_kill(&jme->linkch_task); + cancel_work_sync(&jme->linkch_task); tasklet_kill(&jme->txclean_task); tasklet_kill(&jme->rxclean_task); tasklet_kill(&jme->rxempty_task); @@ -3035,6 +3034,7 @@ jme_init_one(struct pci_dev *pdev, atomic_set(&jme->rx_empty, 1); tasklet_setup(&jme->pcc_task, jme_pcc_tasklet); + INIT_WORK(&jme->linkch_task, jme_link_change_work); jme->dpi.cur = PCC_P1; jme->reg_ghc = 0; diff --git a/drivers/net/ethernet/jme.h b/drivers/net/ethernet/jme.h index a2c3b00d939d..2af76329b4a2 100644 --- a/drivers/net/ethernet/jme.h +++ b/drivers/net/ethernet/jme.h @@ -411,7 +411,7 @@ struct jme_adapter { struct tasklet_struct rxempty_task; struct tasklet_struct rxclean_task; struct tasklet_struct txclean_task; - struct tasklet_struct linkch_task; + struct work_struct linkch_task; struct tasklet_struct pcc_task; unsigned long flags; u32 reg_txcs; diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 925161959b9b..6f987a7ffcb3 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -41,7 +41,10 @@ #include <linux/types.h> #include <linux/interrupt.h> #include <linux/ioport.h> +#include <linux/iopoll.h> #include <linux/in.h> +#include <linux/of_device.h> +#include <linux/of_net.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/delay.h> @@ -54,21 +57,246 @@ #include <linux/ethtool.h> #include <linux/crc32.h> #include <linux/pgtable.h> - -#include <asm/bootinfo.h> -#include <asm/bitops.h> -#include <asm/io.h> -#include <asm/dma.h> - -#include <asm/mach-rc32434/rb.h> -#include <asm/mach-rc32434/rc32434.h> -#include <asm/mach-rc32434/eth.h> -#include <asm/mach-rc32434/dma_v.h> +#include <linux/clk.h> #define DRV_NAME "korina" #define DRV_VERSION "0.20" #define DRV_RELDATE "15Sep2017" +struct eth_regs { + u32 ethintfc; + u32 ethfifott; + u32 etharc; + u32 ethhash0; + u32 ethhash1; + u32 ethu0[4]; /* Reserved. */ + u32 ethpfs; + u32 ethmcp; + u32 eth_u1[10]; /* Reserved. */ + u32 ethspare; + u32 eth_u2[42]; /* Reserved. */ + u32 ethsal0; + u32 ethsah0; + u32 ethsal1; + u32 ethsah1; + u32 ethsal2; + u32 ethsah2; + u32 ethsal3; + u32 ethsah3; + u32 ethrbc; + u32 ethrpc; + u32 ethrupc; + u32 ethrfc; + u32 ethtbc; + u32 ethgpf; + u32 eth_u9[50]; /* Reserved. */ + u32 ethmac1; + u32 ethmac2; + u32 ethipgt; + u32 ethipgr; + u32 ethclrt; + u32 ethmaxf; + u32 eth_u10; /* Reserved. */ + u32 ethmtest; + u32 miimcfg; + u32 miimcmd; + u32 miimaddr; + u32 miimwtd; + u32 miimrdd; + u32 miimind; + u32 eth_u11; /* Reserved. */ + u32 eth_u12; /* Reserved. */ + u32 ethcfsa0; + u32 ethcfsa1; + u32 ethcfsa2; +}; + +/* Ethernet interrupt registers */ +#define ETH_INT_FC_EN BIT(0) +#define ETH_INT_FC_ITS BIT(1) +#define ETH_INT_FC_RIP BIT(2) +#define ETH_INT_FC_JAM BIT(3) +#define ETH_INT_FC_OVR BIT(4) +#define ETH_INT_FC_UND BIT(5) +#define ETH_INT_FC_IOC 0x000000c0 + +/* Ethernet FIFO registers */ +#define ETH_FIFI_TT_TTH_BIT 0 +#define ETH_FIFO_TT_TTH 0x0000007f + +/* Ethernet ARC/multicast registers */ +#define ETH_ARC_PRO BIT(0) +#define ETH_ARC_AM BIT(1) +#define ETH_ARC_AFM BIT(2) +#define ETH_ARC_AB BIT(3) + +/* Ethernet SAL registers */ +#define ETH_SAL_BYTE_5 0x000000ff +#define ETH_SAL_BYTE_4 0x0000ff00 +#define ETH_SAL_BYTE_3 0x00ff0000 +#define ETH_SAL_BYTE_2 0xff000000 + +/* Ethernet SAH registers */ +#define ETH_SAH_BYTE1 0x000000ff +#define ETH_SAH_BYTE0 0x0000ff00 + +/* Ethernet GPF register */ +#define ETH_GPF_PTV 0x0000ffff + +/* Ethernet PFG register */ +#define ETH_PFS_PFD BIT(0) + +/* Ethernet CFSA[0-3] registers */ +#define ETH_CFSA0_CFSA4 0x000000ff +#define ETH_CFSA0_CFSA5 0x0000ff00 +#define ETH_CFSA1_CFSA2 0x000000ff +#define ETH_CFSA1_CFSA3 0x0000ff00 +#define ETH_CFSA1_CFSA0 0x000000ff +#define ETH_CFSA1_CFSA1 0x0000ff00 + +/* Ethernet MAC1 registers */ +#define ETH_MAC1_RE BIT(0) +#define ETH_MAC1_PAF BIT(1) +#define ETH_MAC1_RFC BIT(2) +#define ETH_MAC1_TFC BIT(3) +#define ETH_MAC1_LB BIT(4) +#define ETH_MAC1_MR BIT(31) + +/* Ethernet MAC2 registers */ +#define ETH_MAC2_FD BIT(0) +#define ETH_MAC2_FLC BIT(1) +#define ETH_MAC2_HFE BIT(2) +#define ETH_MAC2_DC BIT(3) +#define ETH_MAC2_CEN BIT(4) +#define ETH_MAC2_PE BIT(5) +#define ETH_MAC2_VPE BIT(6) +#define ETH_MAC2_APE BIT(7) +#define ETH_MAC2_PPE BIT(8) +#define ETH_MAC2_LPE BIT(9) +#define ETH_MAC2_NB BIT(12) +#define ETH_MAC2_BP BIT(13) +#define ETH_MAC2_ED BIT(14) + +/* Ethernet IPGT register */ +#define ETH_IPGT 0x0000007f + +/* Ethernet IPGR registers */ +#define ETH_IPGR_IPGR2 0x0000007f +#define ETH_IPGR_IPGR1 0x00007f00 + +/* Ethernet CLRT registers */ +#define ETH_CLRT_MAX_RET 0x0000000f +#define ETH_CLRT_COL_WIN 0x00003f00 + +/* Ethernet MAXF register */ +#define ETH_MAXF 0x0000ffff + +/* Ethernet test registers */ +#define ETH_TEST_REG BIT(2) +#define ETH_MCP_DIV 0x000000ff + +/* MII registers */ +#define ETH_MII_CFG_RSVD 0x0000000c +#define ETH_MII_CMD_RD BIT(0) +#define ETH_MII_CMD_SCN BIT(1) +#define ETH_MII_REG_ADDR 0x0000001f +#define ETH_MII_PHY_ADDR 0x00001f00 +#define ETH_MII_WTD_DATA 0x0000ffff +#define ETH_MII_RDD_DATA 0x0000ffff +#define ETH_MII_IND_BSY BIT(0) +#define ETH_MII_IND_SCN BIT(1) +#define ETH_MII_IND_NV BIT(2) + +/* Values for the DEVCS field of the Ethernet DMA Rx and Tx descriptors. */ +#define ETH_RX_FD BIT(0) +#define ETH_RX_LD BIT(1) +#define ETH_RX_ROK BIT(2) +#define ETH_RX_FM BIT(3) +#define ETH_RX_MP BIT(4) +#define ETH_RX_BP BIT(5) +#define ETH_RX_VLT BIT(6) +#define ETH_RX_CF BIT(7) +#define ETH_RX_OVR BIT(8) +#define ETH_RX_CRC BIT(9) +#define ETH_RX_CV BIT(10) +#define ETH_RX_DB BIT(11) +#define ETH_RX_LE BIT(12) +#define ETH_RX_LOR BIT(13) +#define ETH_RX_CES BIT(14) +#define ETH_RX_LEN_BIT 16 +#define ETH_RX_LEN 0xffff0000 + +#define ETH_TX_FD BIT(0) +#define ETH_TX_LD BIT(1) +#define ETH_TX_OEN BIT(2) +#define ETH_TX_PEN BIT(3) +#define ETH_TX_CEN BIT(4) +#define ETH_TX_HEN BIT(5) +#define ETH_TX_TOK BIT(6) +#define ETH_TX_MP BIT(7) +#define ETH_TX_BP BIT(8) +#define ETH_TX_UND BIT(9) +#define ETH_TX_OF BIT(10) +#define ETH_TX_ED BIT(11) +#define ETH_TX_EC BIT(12) +#define ETH_TX_LC BIT(13) +#define ETH_TX_TD BIT(14) +#define ETH_TX_CRC BIT(15) +#define ETH_TX_LE BIT(16) +#define ETH_TX_CC 0x001E0000 + +/* DMA descriptor (in physical memory). */ +struct dma_desc { + u32 control; /* Control. use DMAD_* */ + u32 ca; /* Current Address. */ + u32 devcs; /* Device control and status. */ + u32 link; /* Next descriptor in chain. */ +}; + +#define DMA_DESC_COUNT_BIT 0 +#define DMA_DESC_COUNT_MSK 0x0003ffff +#define DMA_DESC_DS_BIT 20 +#define DMA_DESC_DS_MSK 0x00300000 + +#define DMA_DESC_DEV_CMD_BIT 22 +#define DMA_DESC_DEV_CMD_MSK 0x01c00000 + +/* DMA descriptors interrupts */ +#define DMA_DESC_COF BIT(25) /* Chain on finished */ +#define DMA_DESC_COD BIT(26) /* Chain on done */ +#define DMA_DESC_IOF BIT(27) /* Interrupt on finished */ +#define DMA_DESC_IOD BIT(28) /* Interrupt on done */ +#define DMA_DESC_TERM BIT(29) /* Terminated */ +#define DMA_DESC_DONE BIT(30) /* Done */ +#define DMA_DESC_FINI BIT(31) /* Finished */ + +/* DMA register (within Internal Register Map). */ +struct dma_reg { + u32 dmac; /* Control. */ + u32 dmas; /* Status. */ + u32 dmasm; /* Mask. */ + u32 dmadptr; /* Descriptor pointer. */ + u32 dmandptr; /* Next descriptor pointer. */ +}; + +/* DMA channels specific registers */ +#define DMA_CHAN_RUN_BIT BIT(0) +#define DMA_CHAN_DONE_BIT BIT(1) +#define DMA_CHAN_MODE_BIT BIT(2) +#define DMA_CHAN_MODE_MSK 0x0000000c +#define DMA_CHAN_MODE_AUTO 0 +#define DMA_CHAN_MODE_BURST 1 +#define DMA_CHAN_MODE_XFRT 2 +#define DMA_CHAN_MODE_RSVD 3 +#define DMA_CHAN_ACT_BIT BIT(4) + +/* DMA status registers */ +#define DMA_STAT_FINI BIT(0) +#define DMA_STAT_DONE BIT(1) +#define DMA_STAT_CHAIN BIT(2) +#define DMA_STAT_ERR BIT(3) +#define DMA_STAT_HALT BIT(4) + #define STATION_ADDRESS_HIGH(dev) (((dev)->dev_addr[0] << 8) | \ ((dev)->dev_addr[1])) #define STATION_ADDRESS_LOW(dev) (((dev)->dev_addr[2] << 24) | \ @@ -95,24 +323,30 @@ enum chain_status { desc_filled, - desc_empty + desc_is_empty }; +#define DMA_COUNT(count) ((count) & DMA_DESC_COUNT_MSK) #define IS_DMA_FINISHED(X) (((X) & (DMA_DESC_FINI)) != 0) #define IS_DMA_DONE(X) (((X) & (DMA_DESC_DONE)) != 0) #define RCVPKT_LENGTH(X) (((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT) /* Information that need to be kept for each board. */ struct korina_private { - struct eth_regs *eth_regs; - struct dma_reg *rx_dma_regs; - struct dma_reg *tx_dma_regs; + struct eth_regs __iomem *eth_regs; + struct dma_reg __iomem *rx_dma_regs; + struct dma_reg __iomem *tx_dma_regs; struct dma_desc *td_ring; /* transmit descriptor ring */ struct dma_desc *rd_ring; /* receive descriptor ring */ + dma_addr_t td_dma; + dma_addr_t rd_dma; struct sk_buff *tx_skb[KORINA_NUM_TDS]; struct sk_buff *rx_skb[KORINA_NUM_RDS]; + dma_addr_t rx_skb_dma[KORINA_NUM_RDS]; + dma_addr_t tx_skb_dma[KORINA_NUM_TDS]; + int rx_next_done; int rx_chain_head; int rx_chain_tail; @@ -137,15 +371,18 @@ struct korina_private { struct mii_if_info mii_if; struct work_struct restart_task; struct net_device *dev; - int phy_addr; + struct device *dmadev; + int mii_clock_freq; }; -extern unsigned int idt_cpu_freq; +static dma_addr_t korina_tx_dma(struct korina_private *lp, int idx) +{ + return lp->td_dma + (idx * sizeof(struct dma_desc)); +} -static inline void korina_start_dma(struct dma_reg *ch, u32 dma_addr) +static dma_addr_t korina_rx_dma(struct korina_private *lp, int idx) { - writel(0, &ch->dmandptr); - writel(dma_addr, &ch->dmadptr); + return lp->rd_dma + (idx * sizeof(struct dma_desc)); } static inline void korina_abort_dma(struct net_device *dev, @@ -164,11 +401,6 @@ static inline void korina_abort_dma(struct net_device *dev, writel(0, &ch->dmandptr); } -static inline void korina_chain_dma(struct dma_reg *ch, u32 dma_addr) -{ - writel(dma_addr, &ch->dmandptr); -} - static void korina_abort_tx(struct net_device *dev) { struct korina_private *lp = netdev_priv(dev); @@ -183,30 +415,21 @@ static void korina_abort_rx(struct net_device *dev) korina_abort_dma(dev, lp->rx_dma_regs); } -static void korina_start_rx(struct korina_private *lp, - struct dma_desc *rd) -{ - korina_start_dma(lp->rx_dma_regs, CPHYSADDR(rd)); -} - -static void korina_chain_rx(struct korina_private *lp, - struct dma_desc *rd) -{ - korina_chain_dma(lp->rx_dma_regs, CPHYSADDR(rd)); -} - /* transmit packet */ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) { struct korina_private *lp = netdev_priv(dev); - unsigned long flags; - u32 length; u32 chain_prev, chain_next; + unsigned long flags; struct dma_desc *td; + dma_addr_t ca; + u32 length; + int idx; spin_lock_irqsave(&lp->lock, flags); - td = &lp->td_ring[lp->tx_chain_tail]; + idx = lp->tx_chain_tail; + td = &lp->td_ring[idx]; /* stop queue when full, drop pkts if queue already full */ if (lp->tx_count >= (KORINA_NUM_TDS - 2)) { @@ -214,38 +437,37 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) if (lp->tx_count == (KORINA_NUM_TDS - 2)) netif_stop_queue(dev); - else { - dev->stats.tx_dropped++; - dev_kfree_skb_any(skb); - spin_unlock_irqrestore(&lp->lock, flags); - - return NETDEV_TX_OK; - } + else + goto drop_packet; } lp->tx_count++; - lp->tx_skb[lp->tx_chain_tail] = skb; + lp->tx_skb[idx] = skb; length = skb->len; - dma_cache_wback((u32)skb->data, skb->len); /* Setup the transmit descriptor. */ - dma_cache_inv((u32) td, sizeof(*td)); - td->ca = CPHYSADDR(skb->data); - chain_prev = (lp->tx_chain_tail - 1) & KORINA_TDS_MASK; - chain_next = (lp->tx_chain_tail + 1) & KORINA_TDS_MASK; + ca = dma_map_single(lp->dmadev, skb->data, length, DMA_TO_DEVICE); + if (dma_mapping_error(lp->dmadev, ca)) + goto drop_packet; + + lp->tx_skb_dma[idx] = ca; + td->ca = ca; + + chain_prev = (idx - 1) & KORINA_TDS_MASK; + chain_next = (idx + 1) & KORINA_TDS_MASK; if (readl(&(lp->tx_dma_regs->dmandptr)) == 0) { - if (lp->tx_chain_status == desc_empty) { + if (lp->tx_chain_status == desc_is_empty) { /* Update tail */ td->control = DMA_COUNT(length) | DMA_DESC_COF | DMA_DESC_IOF; /* Move tail */ lp->tx_chain_tail = chain_next; /* Write to NDPTR */ - writel(CPHYSADDR(&lp->td_ring[lp->tx_chain_head]), - &lp->tx_dma_regs->dmandptr); + writel(korina_tx_dma(lp, lp->tx_chain_head), + &lp->tx_dma_regs->dmandptr); /* Move head to tail */ lp->tx_chain_head = lp->tx_chain_tail; } else { @@ -256,18 +478,18 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) lp->td_ring[chain_prev].control &= ~DMA_DESC_COF; /* Link to prev */ - lp->td_ring[chain_prev].link = CPHYSADDR(td); + lp->td_ring[chain_prev].link = korina_tx_dma(lp, idx); /* Move tail */ lp->tx_chain_tail = chain_next; /* Write to NDPTR */ - writel(CPHYSADDR(&lp->td_ring[lp->tx_chain_head]), - &(lp->tx_dma_regs->dmandptr)); + writel(korina_tx_dma(lp, lp->tx_chain_head), + &lp->tx_dma_regs->dmandptr); /* Move head to tail */ lp->tx_chain_head = lp->tx_chain_tail; - lp->tx_chain_status = desc_empty; + lp->tx_chain_status = desc_is_empty; } } else { - if (lp->tx_chain_status == desc_empty) { + if (lp->tx_chain_status == desc_is_empty) { /* Update tail */ td->control = DMA_COUNT(length) | DMA_DESC_COF | DMA_DESC_IOF; @@ -280,44 +502,66 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) DMA_DESC_COF | DMA_DESC_IOF; lp->td_ring[chain_prev].control &= ~DMA_DESC_COF; - lp->td_ring[chain_prev].link = CPHYSADDR(td); + lp->td_ring[chain_prev].link = korina_tx_dma(lp, idx); lp->tx_chain_tail = chain_next; } } - dma_cache_wback((u32) td, sizeof(*td)); netif_trans_update(dev); spin_unlock_irqrestore(&lp->lock, flags); return NETDEV_TX_OK; + +drop_packet: + dev->stats.tx_dropped++; + dev_kfree_skb_any(skb); + spin_unlock_irqrestore(&lp->lock, flags); + + return NETDEV_TX_OK; } -static int mdio_read(struct net_device *dev, int mii_id, int reg) +static int korina_mdio_wait(struct korina_private *lp) +{ + u32 value; + + return readl_poll_timeout_atomic(&lp->eth_regs->miimind, + value, value & ETH_MII_IND_BSY, + 1, 1000); +} + +static int korina_mdio_read(struct net_device *dev, int phy, int reg) { struct korina_private *lp = netdev_priv(dev); int ret; - mii_id = ((lp->rx_irq == 0x2c ? 1 : 0) << 8); + ret = korina_mdio_wait(lp); + if (ret < 0) + return ret; - writel(0, &lp->eth_regs->miimcfg); - writel(0, &lp->eth_regs->miimcmd); - writel(mii_id | reg, &lp->eth_regs->miimaddr); - writel(ETH_MII_CMD_SCN, &lp->eth_regs->miimcmd); + writel(phy << 8 | reg, &lp->eth_regs->miimaddr); + writel(1, &lp->eth_regs->miimcmd); + + ret = korina_mdio_wait(lp); + if (ret < 0) + return ret; - ret = (int)(readl(&lp->eth_regs->miimrdd)); + if (readl(&lp->eth_regs->miimind) & ETH_MII_IND_NV) + return -EINVAL; + + ret = readl(&lp->eth_regs->miimrdd); + writel(0, &lp->eth_regs->miimcmd); return ret; } -static void mdio_write(struct net_device *dev, int mii_id, int reg, int val) +static void korina_mdio_write(struct net_device *dev, int phy, int reg, int val) { struct korina_private *lp = netdev_priv(dev); - mii_id = ((lp->rx_irq == 0x2c ? 1 : 0) << 8); + if (korina_mdio_wait(lp)) + return; - writel(0, &lp->eth_regs->miimcfg); - writel(1, &lp->eth_regs->miimcmd); - writel(mii_id | reg, &lp->eth_regs->miimaddr); - writel(ETH_MII_CMD_SCN, &lp->eth_regs->miimcmd); + writel(0, &lp->eth_regs->miimcmd); + writel(phy << 8 | reg, &lp->eth_regs->miimaddr); writel(val, &lp->eth_regs->miimwtd); } @@ -353,12 +597,10 @@ static int korina_rx(struct net_device *dev, int limit) struct korina_private *lp = netdev_priv(dev); struct dma_desc *rd = &lp->rd_ring[lp->rx_next_done]; struct sk_buff *skb, *skb_new; - u8 *pkt_buf; u32 devcs, pkt_len, dmas; + dma_addr_t ca; int count; - dma_cache_inv((u32)rd, sizeof(*rd)); - for (count = 0; count < limit; count++) { skb = lp->rx_skb[lp->rx_next_done]; skb_new = NULL; @@ -392,20 +634,22 @@ static int korina_rx(struct net_device *dev, int limit) goto next; } - pkt_len = RCVPKT_LENGTH(devcs); - - /* must be the (first and) last - * descriptor then */ - pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data; - - /* invalidate the cache */ - dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4); - /* Malloc up new buffer. */ skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE); - if (!skb_new) break; + + ca = dma_map_single(lp->dmadev, skb_new->data, KORINA_RBSIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(lp->dmadev, ca)) { + dev_kfree_skb_any(skb_new); + break; + } + + pkt_len = RCVPKT_LENGTH(devcs); + dma_unmap_single(lp->dmadev, lp->rx_skb_dma[lp->rx_next_done], + pkt_len, DMA_FROM_DEVICE); + /* Do not count the CRC */ skb_put(skb, pkt_len - 4); skb->protocol = eth_type_trans(skb, dev); @@ -420,15 +664,13 @@ static int korina_rx(struct net_device *dev, int limit) dev->stats.multicast++; lp->rx_skb[lp->rx_next_done] = skb_new; + lp->rx_skb_dma[lp->rx_next_done] = ca; next: rd->devcs = 0; /* Restore descriptor's curr_addr */ - if (skb_new) - rd->ca = CPHYSADDR(skb_new->data); - else - rd->ca = CPHYSADDR(skb->data); + rd->ca = lp->rx_skb_dma[lp->rx_next_done]; rd->control = DMA_COUNT(KORINA_RBSIZE) | DMA_DESC_COD | DMA_DESC_IOD; @@ -437,23 +679,21 @@ next: ~DMA_DESC_COD; lp->rx_next_done = (lp->rx_next_done + 1) & KORINA_RDS_MASK; - dma_cache_wback((u32)rd, sizeof(*rd)); rd = &lp->rd_ring[lp->rx_next_done]; - writel(~DMA_STAT_DONE, &lp->rx_dma_regs->dmas); + writel((u32)~DMA_STAT_DONE, &lp->rx_dma_regs->dmas); } dmas = readl(&lp->rx_dma_regs->dmas); if (dmas & DMA_STAT_HALT) { - writel(~(DMA_STAT_HALT | DMA_STAT_ERR), - &lp->rx_dma_regs->dmas); + writel((u32)~(DMA_STAT_HALT | DMA_STAT_ERR), + &lp->rx_dma_regs->dmas); lp->dma_halt_cnt++; rd->devcs = 0; - skb = lp->rx_skb[lp->rx_next_done]; - rd->ca = CPHYSADDR(skb->data); - dma_cache_wback((u32)rd, sizeof(*rd)); - korina_chain_rx(lp, rd); + rd->ca = lp->rx_skb_dma[lp->rx_next_done]; + writel(korina_rx_dma(lp, rd - lp->rd_ring), + &lp->rx_dma_regs->dmandptr); } return count; @@ -576,6 +816,10 @@ static void korina_tx(struct net_device *dev) /* We must always free the original skb */ if (lp->tx_skb[lp->tx_next_done]) { + dma_unmap_single(lp->dmadev, + lp->tx_skb_dma[lp->tx_next_done], + lp->tx_skb[lp->tx_next_done]->len, + DMA_TO_DEVICE); dev_kfree_skb_any(lp->tx_skb[lp->tx_next_done]); lp->tx_skb[lp->tx_next_done] = NULL; } @@ -622,9 +866,9 @@ korina_tx_dma_interrupt(int irq, void *dev_id) if (lp->tx_chain_status == desc_filled && (readl(&(lp->tx_dma_regs->dmandptr)) == 0)) { - writel(CPHYSADDR(&lp->td_ring[lp->tx_chain_head]), - &(lp->tx_dma_regs->dmandptr)); - lp->tx_chain_status = desc_empty; + writel(korina_tx_dma(lp, lp->tx_chain_head), + &lp->tx_dma_regs->dmandptr); + lp->tx_chain_status = desc_is_empty; lp->tx_chain_head = lp->tx_chain_tail; netif_trans_update(dev); } @@ -643,7 +887,7 @@ static void korina_check_media(struct net_device *dev, unsigned int init_media) { struct korina_private *lp = netdev_priv(dev); - mii_check_media(&lp->mii_if, 0, init_media); + mii_check_media(&lp->mii_if, 1, init_media); if (lp->mii_if.full_duplex) writel(readl(&lp->eth_regs->ethmac2) | ETH_MAC2_FD, @@ -743,6 +987,7 @@ static int korina_alloc_ring(struct net_device *dev) { struct korina_private *lp = netdev_priv(dev); struct sk_buff *skb; + dma_addr_t ca; int i; /* Initialize the transmit descriptors */ @@ -754,7 +999,7 @@ static int korina_alloc_ring(struct net_device *dev) } lp->tx_next_done = lp->tx_chain_head = lp->tx_chain_tail = lp->tx_full = lp->tx_count = 0; - lp->tx_chain_status = desc_empty; + lp->tx_chain_status = desc_is_empty; /* Initialize the receive descriptors */ for (i = 0; i < KORINA_NUM_RDS; i++) { @@ -765,19 +1010,24 @@ static int korina_alloc_ring(struct net_device *dev) lp->rd_ring[i].control = DMA_DESC_IOD | DMA_COUNT(KORINA_RBSIZE); lp->rd_ring[i].devcs = 0; - lp->rd_ring[i].ca = CPHYSADDR(skb->data); - lp->rd_ring[i].link = CPHYSADDR(&lp->rd_ring[i+1]); + ca = dma_map_single(lp->dmadev, skb->data, KORINA_RBSIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(lp->dmadev, ca)) + return -ENOMEM; + lp->rd_ring[i].ca = ca; + lp->rx_skb_dma[i] = ca; + lp->rd_ring[i].link = korina_rx_dma(lp, i + 1); } /* loop back receive descriptors, so the last * descriptor points to the first one */ - lp->rd_ring[i - 1].link = CPHYSADDR(&lp->rd_ring[0]); + lp->rd_ring[i - 1].link = lp->rd_dma; lp->rd_ring[i - 1].control |= DMA_DESC_COD; lp->rx_next_done = 0; lp->rx_chain_head = 0; lp->rx_chain_tail = 0; - lp->rx_chain_status = desc_empty; + lp->rx_chain_status = desc_is_empty; return 0; } @@ -789,16 +1039,22 @@ static void korina_free_ring(struct net_device *dev) for (i = 0; i < KORINA_NUM_RDS; i++) { lp->rd_ring[i].control = 0; - if (lp->rx_skb[i]) + if (lp->rx_skb[i]) { + dma_unmap_single(lp->dmadev, lp->rx_skb_dma[i], + KORINA_RBSIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(lp->rx_skb[i]); - lp->rx_skb[i] = NULL; + lp->rx_skb[i] = NULL; + } } for (i = 0; i < KORINA_NUM_TDS; i++) { lp->td_ring[i].control = 0; - if (lp->tx_skb[i]) + if (lp->tx_skb[i]) { + dma_unmap_single(lp->dmadev, lp->tx_skb_dma[i], + lp->tx_skb[i]->len, DMA_TO_DEVICE); dev_kfree_skb_any(lp->tx_skb[i]); - lp->tx_skb[i] = NULL; + lp->tx_skb[i] = NULL; + } } } @@ -830,7 +1086,8 @@ static int korina_init(struct net_device *dev) writel(0, &lp->rx_dma_regs->dmas); /* Start Rx DMA */ - korina_start_rx(lp, &lp->rd_ring[0]); + writel(0, &lp->rx_dma_regs->dmandptr); + writel(korina_rx_dma(lp, 0), &lp->rx_dma_regs->dmadptr); writel(readl(&lp->tx_dma_regs->dmasm) & ~(DMA_STAT_FINI | DMA_STAT_ERR), @@ -867,14 +1124,17 @@ static int korina_init(struct net_device *dev) /* Management Clock Prescaler Divisor * Clock independent setting */ - writel(((idt_cpu_freq) / MII_CLOCK + 1) & ~1, - &lp->eth_regs->ethmcp); + writel(((lp->mii_clock_freq) / MII_CLOCK + 1) & ~1, + &lp->eth_regs->ethmcp); + writel(0, &lp->eth_regs->miimcfg); /* don't transmit until fifo contains 48b */ writel(48, &lp->eth_regs->ethfifott); writel(ETH_MAC1_RE, &lp->eth_regs->ethmac1); + korina_check_media(dev, 1); + napi_enable(&lp->napi); netif_start_queue(dev); @@ -1022,86 +1282,94 @@ static const struct net_device_ops korina_netdev_ops = { static int korina_probe(struct platform_device *pdev) { - struct korina_device *bif = platform_get_drvdata(pdev); + u8 *mac_addr = dev_get_platdata(&pdev->dev); struct korina_private *lp; struct net_device *dev; - struct resource *r; + struct clk *clk; + void __iomem *p; int rc; - dev = alloc_etherdev(sizeof(struct korina_private)); + dev = devm_alloc_etherdev(&pdev->dev, sizeof(struct korina_private)); if (!dev) return -ENOMEM; SET_NETDEV_DEV(dev, &pdev->dev); lp = netdev_priv(dev); - bif->dev = dev; - memcpy(dev->dev_addr, bif->mac, ETH_ALEN); + if (mac_addr) + ether_addr_copy(dev->dev_addr, mac_addr); + else if (of_get_mac_address(pdev->dev.of_node, dev->dev_addr) < 0) + eth_hw_addr_random(dev); + + clk = devm_clk_get_optional(&pdev->dev, "mdioclk"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + if (clk) { + clk_prepare_enable(clk); + lp->mii_clock_freq = clk_get_rate(clk); + } else { + lp->mii_clock_freq = 200000000; /* max possible input clk */ + } - lp->rx_irq = platform_get_irq_byname(pdev, "korina_rx"); - lp->tx_irq = platform_get_irq_byname(pdev, "korina_tx"); + lp->rx_irq = platform_get_irq_byname(pdev, "rx"); + lp->tx_irq = platform_get_irq_byname(pdev, "tx"); - r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_regs"); - dev->base_addr = r->start; - lp->eth_regs = ioremap(r->start, resource_size(r)); - if (!lp->eth_regs) { + p = devm_platform_ioremap_resource_byname(pdev, "emac"); + if (!p) { printk(KERN_ERR DRV_NAME ": cannot remap registers\n"); - rc = -ENXIO; - goto probe_err_out; + return -ENOMEM; } + lp->eth_regs = p; - r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_dma_rx"); - lp->rx_dma_regs = ioremap(r->start, resource_size(r)); - if (!lp->rx_dma_regs) { + p = devm_platform_ioremap_resource_byname(pdev, "dma_rx"); + if (!p) { printk(KERN_ERR DRV_NAME ": cannot remap Rx DMA registers\n"); - rc = -ENXIO; - goto probe_err_dma_rx; + return -ENOMEM; } + lp->rx_dma_regs = p; - r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_dma_tx"); - lp->tx_dma_regs = ioremap(r->start, resource_size(r)); - if (!lp->tx_dma_regs) { + p = devm_platform_ioremap_resource_byname(pdev, "dma_tx"); + if (!p) { printk(KERN_ERR DRV_NAME ": cannot remap Tx DMA registers\n"); - rc = -ENXIO; - goto probe_err_dma_tx; - } - - lp->td_ring = kmalloc(TD_RING_SIZE + RD_RING_SIZE, GFP_KERNEL); - if (!lp->td_ring) { - rc = -ENXIO; - goto probe_err_td_ring; + return -ENOMEM; } + lp->tx_dma_regs = p; - dma_cache_inv((unsigned long)(lp->td_ring), - TD_RING_SIZE + RD_RING_SIZE); + lp->td_ring = dmam_alloc_coherent(&pdev->dev, TD_RING_SIZE, + &lp->td_dma, GFP_KERNEL); + if (!lp->td_ring) + return -ENOMEM; - /* now convert TD_RING pointer to KSEG1 */ - lp->td_ring = (struct dma_desc *)KSEG1ADDR(lp->td_ring); - lp->rd_ring = &lp->td_ring[KORINA_NUM_TDS]; + lp->rd_ring = dmam_alloc_coherent(&pdev->dev, RD_RING_SIZE, + &lp->rd_dma, GFP_KERNEL); + if (!lp->rd_ring) + return -ENOMEM; spin_lock_init(&lp->lock); /* just use the rx dma irq */ dev->irq = lp->rx_irq; lp->dev = dev; + lp->dmadev = &pdev->dev; dev->netdev_ops = &korina_netdev_ops; dev->ethtool_ops = &netdev_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; netif_napi_add(dev, &lp->napi, korina_poll, NAPI_POLL_WEIGHT); - lp->phy_addr = (((lp->rx_irq == 0x2c? 1:0) << 8) | 0x05); lp->mii_if.dev = dev; - lp->mii_if.mdio_read = mdio_read; - lp->mii_if.mdio_write = mdio_write; - lp->mii_if.phy_id = lp->phy_addr; + lp->mii_if.mdio_read = korina_mdio_read; + lp->mii_if.mdio_write = korina_mdio_write; + lp->mii_if.phy_id = 1; lp->mii_if.phy_id_mask = 0x1f; lp->mii_if.reg_num_mask = 0x1f; + platform_set_drvdata(pdev, dev); + rc = register_netdev(dev); if (rc < 0) { printk(KERN_ERR DRV_NAME ": cannot register net device: %d\n", rc); - goto probe_err_register; + return rc; } timer_setup(&lp->media_check_timer, korina_poll_media, 0); @@ -1109,40 +1377,33 @@ static int korina_probe(struct platform_device *pdev) printk(KERN_INFO "%s: " DRV_NAME "-" DRV_VERSION " " DRV_RELDATE "\n", dev->name); -out: return rc; - -probe_err_register: - kfree((struct dma_desc *)KSEG0ADDR(lp->td_ring)); -probe_err_td_ring: - iounmap(lp->tx_dma_regs); -probe_err_dma_tx: - iounmap(lp->rx_dma_regs); -probe_err_dma_rx: - iounmap(lp->eth_regs); -probe_err_out: - free_netdev(dev); - goto out; } static int korina_remove(struct platform_device *pdev) { - struct korina_device *bif = platform_get_drvdata(pdev); - struct korina_private *lp = netdev_priv(bif->dev); - - iounmap(lp->eth_regs); - iounmap(lp->rx_dma_regs); - iounmap(lp->tx_dma_regs); - kfree((struct dma_desc *)KSEG0ADDR(lp->td_ring)); + struct net_device *dev = platform_get_drvdata(pdev); - unregister_netdev(bif->dev); - free_netdev(bif->dev); + unregister_netdev(dev); return 0; } +#ifdef CONFIG_OF +static const struct of_device_id korina_match[] = { + { + .compatible = "idt,3243x-emac", + }, + { } +}; +MODULE_DEVICE_TABLE(of, korina_match); +#endif + static struct platform_driver korina_driver = { - .driver.name = "korina", + .driver = { + .name = "korina", + .of_match_table = of_match_ptr(korina_match), + }, .probe = korina_probe, .remove = korina_remove, }; diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 51ed8a54d380..41c2ad210bc9 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -435,7 +435,6 @@ static int xrx200_probe(struct platform_device *pdev) struct resource *res; struct xrx200_priv *priv; struct net_device *net_dev; - const u8 *mac; int err; /* alloc the network device */ @@ -460,10 +459,8 @@ static int xrx200_probe(struct platform_device *pdev) } priv->pmac_reg = devm_ioremap_resource(dev, res); - if (IS_ERR(priv->pmac_reg)) { - dev_err(dev, "failed to request and remap io ranges\n"); + if (IS_ERR(priv->pmac_reg)) return PTR_ERR(priv->pmac_reg); - } priv->chan_rx.dma.irq = platform_get_irq_byname(pdev, "rx"); if (priv->chan_rx.dma.irq < 0) @@ -479,10 +476,8 @@ static int xrx200_probe(struct platform_device *pdev) return PTR_ERR(priv->clk); } - mac = of_get_mac_address(np); - if (!IS_ERR(mac)) - ether_addr_copy(net_dev->dev_addr, mac); - else + err = of_get_mac_address(np, net_dev->dev_addr); + if (err) eth_hw_addr_random(net_dev); /* bring up the dma engine and IP core */ diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index 7fe15a3286f4..fe0989c0fc25 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -6,7 +6,7 @@ config NET_VENDOR_MARVELL bool "Marvell devices" default y - depends on PCI || CPU_PXA168 || MV64X60 || PPC32 || PLAT_ORION || INET || COMPILE_TEST + depends on PCI || CPU_PXA168 || PPC32 || PLAT_ORION || INET || COMPILE_TEST help If you have a network (Ethernet) card belonging to this class, say Y. @@ -19,7 +19,7 @@ if NET_VENDOR_MARVELL config MV643XX_ETH tristate "Marvell Discovery (643XX) and Orion ethernet support" - depends on MV64X60 || PPC32 || PLAT_ORION || COMPILE_TEST + depends on PPC32 || PLAT_ORION || COMPILE_TEST depends on INET select PHYLIB select MVMDIO diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 90e6111ce534..d207bfcaf31d 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -700,7 +700,8 @@ static int skb_tx_csum(struct mv643xx_eth_private *mp, struct sk_buff *skb, ip_hdr(skb)->ihl << TX_IHL_SHIFT; /* TODO: Revisit this. With the usage of GEN_TCP_UDP_CHK_FULL - * it seems we don't need to pass the initial checksum. */ + * it seems we don't need to pass the initial checksum. + */ switch (ip_hdr(skb)->protocol) { case IPPROTO_UDP: cmd |= UDP_FRAME; @@ -790,7 +791,8 @@ txq_put_hdr_tso(struct sk_buff *skb, struct tx_queue *txq, int length, WARN(1, "failed to prepare checksum!"); /* Should we set this? Can't use the value from skb_tx_csum() - * as it's not the correct initial L4 checksum to use. */ + * as it's not the correct initial L4 checksum to use. + */ desc->l4i_chk = 0; desc->byte_cnt = hdr_len; @@ -2684,7 +2686,7 @@ static const struct of_device_id mv643xx_eth_shared_ids[] = { MODULE_DEVICE_TABLE(of, mv643xx_eth_shared_ids); #endif -#if defined(CONFIG_OF_IRQ) && !defined(CONFIG_MV64X60) +#ifdef CONFIG_OF_IRQ #define mv643xx_eth_property(_np, _name, _v) \ do { \ u32 tmp; \ @@ -2700,7 +2702,6 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, struct platform_device *ppdev; struct mv643xx_eth_platform_data ppd; struct resource res; - const char *mac_addr; int ret; int dev_num = 0; @@ -2731,9 +2732,7 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev, return -EINVAL; } - mac_addr = of_get_mac_address(pnp); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ppd.mac_addr, mac_addr); + of_get_mac_address(pnp, ppd.mac_addr); mv643xx_eth_property(pnp, "tx-queue-size", ppd.tx_queue_size); mv643xx_eth_property(pnp, "tx-sram-addr", ppd.tx_sram_addr); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index a635cf84608a..7d5cd9bc6c99 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1087,7 +1087,7 @@ static int mvneta_mbus_io_win_set(struct mvneta_port *pp, u32 base, u32 wsize, return 0; } -static int mvneta_bm_port_mbus_init(struct mvneta_port *pp) +static int mvneta_bm_port_mbus_init(struct mvneta_port *pp) { u32 wsize; u8 target, attr; @@ -2137,7 +2137,7 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame, { struct mvneta_port *pp = netdev_priv(dev); struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); - int i, nxmit_byte = 0, nxmit = num_frame; + int i, nxmit_byte = 0, nxmit = 0; int cpu = smp_processor_id(); struct mvneta_tx_queue *txq; struct netdev_queue *nq; @@ -2155,12 +2155,11 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame, __netif_tx_lock(nq, cpu); for (i = 0; i < num_frame; i++) { ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true); - if (ret == MVNETA_XDP_TX) { - nxmit_byte += frames[i]->len; - } else { - xdp_return_frame_rx_napi(frames[i]); - nxmit--; - } + if (ret != MVNETA_XDP_TX) + break; + + nxmit_byte += frames[i]->len; + nxmit++; } if (unlikely(flags & XDP_XMIT_FLUSH)) @@ -3994,7 +3993,8 @@ static void mvneta_mac_config(struct phylink_config *config, unsigned int mode, /* Armada 370 documentation says we can only change the port mode * and in-band enable when the link is down, so force it down - * while making these changes. We also do this for GMAC_CTRL2 */ + * while making these changes. We also do this for GMAC_CTRL2 + */ if ((new_ctrl0 ^ gmac_ctrl0) & MVNETA_GMAC0_PORT_1000BASE_X || (new_ctrl2 ^ gmac_ctrl2) & MVNETA_GMAC2_INBAND_AN_ENABLE || (new_an ^ gmac_an) & MVNETA_GMAC_INBAND_AN_ENABLE) { @@ -4176,9 +4176,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq); if (cpu == elected_cpu) - /* Map the default receive queue queue to the - * elected CPU - */ + /* Map the default receive queue to the elected CPU */ rxq_map |= MVNETA_CPU_RXQ_ACCESS(pp->rxq_def); /* We update the TX queue map only if we have one @@ -4908,7 +4906,8 @@ static int mvneta_ethtool_set_eee(struct net_device *dev, u32 lpi_ctl0; /* The Armada 37x documents do not give limits for this other than - * it being an 8-bit register. */ + * it being an 8-bit register. + */ if (eee->tx_lpi_enabled && eee->tx_lpi_timer > 255) return -EINVAL; @@ -5142,7 +5141,6 @@ static int mvneta_probe(struct platform_device *pdev) struct net_device *dev; struct phylink *phylink; struct phy *comphy; - const char *dt_mac_addr; char hw_mac_addr[ETH_ALEN]; phy_interface_t phy_mode; const char *mac_from; @@ -5238,10 +5236,9 @@ static int mvneta_probe(struct platform_device *pdev) goto err_free_ports; } - dt_mac_addr = of_get_mac_address(dn); - if (!IS_ERR(dt_mac_addr)) { + err = of_get_mac_address(dn, dev->dev_addr); + if (!err) { mac_from = "device tree"; - ether_addr_copy(dev->dev_addr, dt_mac_addr); } else { mvneta_get_mac_addr(pp, hw_mac_addr); if (is_valid_ether_addr(hw_mac_addr)) { diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 1767c60056c5..ec706d614cac 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3744,7 +3744,7 @@ mvpp2_xdp_xmit(struct net_device *dev, int num_frame, struct xdp_frame **frames, u32 flags) { struct mvpp2_port *port = netdev_priv(dev); - int i, nxmit_byte = 0, nxmit = num_frame; + int i, nxmit_byte = 0, nxmit = 0; struct mvpp2_pcpu_stats *stats; u16 txq_id; u32 ret; @@ -3762,12 +3762,11 @@ mvpp2_xdp_xmit(struct net_device *dev, int num_frame, for (i = 0; i < num_frame; i++) { ret = mvpp2_xdp_submit_frame(port, txq_id, frames[i], true); - if (ret == MVPP2_XDP_TX) { - nxmit_byte += frames[i]->len; - } else { - xdp_return_frame_rx_napi(frames[i]); - nxmit--; - } + if (ret != MVPP2_XDP_TX) + break; + + nxmit_byte += frames[i]->len; + nxmit++; } if (likely(nxmit > 0)) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index 4812cdb4609e..7cc7d72d761e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -918,9 +918,8 @@ static int mvpp2_prs_ip4_proto(struct mvpp2 *priv, unsigned short proto, mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS); mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1); - /* Set L4 offset */ - mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4, - sizeof(struct iphdr) - 4, + /* Set L3 offset */ + mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, -4, MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT); mvpp2_prs_sram_ri_update(&pe, ri, ri_mask | MVPP2_PRS_RI_IP_FRAG_MASK); @@ -1335,7 +1334,7 @@ static void mvpp2_prs_vid_init(struct mvpp2 *priv) static int mvpp2_prs_etype_init(struct mvpp2 *priv) { struct mvpp2_prs_entry pe; - int tid; + int tid, ihl; /* Ethertype: PPPoE */ tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, @@ -1427,67 +1426,43 @@ static int mvpp2_prs_etype_init(struct mvpp2 *priv) MVPP2_PRS_RI_UDF3_MASK); mvpp2_prs_hw_write(priv, &pe); - /* Ethertype: IPv4 without options */ - tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, - MVPP2_PE_LAST_FREE_TID); - if (tid < 0) - return tid; - - memset(&pe, 0, sizeof(pe)); - mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_L2); - pe.index = tid; - - mvpp2_prs_match_etype(&pe, 0, ETH_P_IP); - mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN, - MVPP2_PRS_IPV4_HEAD | MVPP2_PRS_IPV4_IHL, - MVPP2_PRS_IPV4_HEAD_MASK | - MVPP2_PRS_IPV4_IHL_MASK); - - mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); - mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4, - MVPP2_PRS_RI_L3_PROTO_MASK); - /* goto ipv4 dest-address (skip eth_type + IP-header-size - 4) */ - mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + - sizeof(struct iphdr) - 4, - MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); - /* Set L3 offset */ - mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, - MVPP2_ETH_TYPE_LEN, - MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); - - /* Update shadow table and hw entry */ - mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2); - priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF; - priv->prs_shadow[pe.index].finish = false; - mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_IP4, - MVPP2_PRS_RI_L3_PROTO_MASK); - mvpp2_prs_hw_write(priv, &pe); - - /* Ethertype: IPv4 with options */ - tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, - MVPP2_PE_LAST_FREE_TID); - if (tid < 0) - return tid; - - pe.index = tid; + /* Ethertype: IPv4 with header length >= 5 */ + for (ihl = MVPP2_PRS_IPV4_IHL_MIN; ihl <= MVPP2_PRS_IPV4_IHL_MAX; ihl++) { + tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, + MVPP2_PE_LAST_FREE_TID); + if (tid < 0) + return tid; - mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN, - MVPP2_PRS_IPV4_HEAD, - MVPP2_PRS_IPV4_HEAD_MASK); + memset(&pe, 0, sizeof(pe)); + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_L2); + pe.index = tid; - /* Clear ri before updating */ - pe.sram[MVPP2_PRS_SRAM_RI_WORD] = 0x0; - pe.sram[MVPP2_PRS_SRAM_RI_CTRL_WORD] = 0x0; - mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4_OPT, - MVPP2_PRS_RI_L3_PROTO_MASK); + mvpp2_prs_match_etype(&pe, 0, ETH_P_IP); + mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN, + MVPP2_PRS_IPV4_HEAD | ihl, + MVPP2_PRS_IPV4_HEAD_MASK | + MVPP2_PRS_IPV4_IHL_MASK); + + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); + mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4, + MVPP2_PRS_RI_L3_PROTO_MASK); + /* goto ipv4 dst-address (skip eth_type + IP-header-size - 4) */ + mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + + sizeof(struct iphdr) - 4, + MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + /* Set L4 offset */ + mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4, + MVPP2_ETH_TYPE_LEN + (ihl * 4), + MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); - /* Update shadow table and hw entry */ - mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2); - priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF; - priv->prs_shadow[pe.index].finish = false; - mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_IP4_OPT, - MVPP2_PRS_RI_L3_PROTO_MASK); - mvpp2_prs_hw_write(priv, &pe); + /* Update shadow table and hw entry */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_L2); + priv->prs_shadow[pe.index].udf = MVPP2_PRS_UDF_L2_DEF; + priv->prs_shadow[pe.index].finish = false; + mvpp2_prs_shadow_ri_set(priv, pe.index, MVPP2_PRS_RI_L3_IP4, + MVPP2_PRS_RI_L3_PROTO_MASK); + mvpp2_prs_hw_write(priv, &pe); + } /* Ethertype: IPv6 without options */ tid = mvpp2_prs_tcam_first_free(priv, MVPP2_PE_FIRST_FREE_TID, @@ -1674,7 +1649,8 @@ static int mvpp2_prs_pppoe_init(struct mvpp2 *priv) pe.index = tid; mvpp2_prs_tcam_data_byte_set(&pe, MVPP2_ETH_TYPE_LEN, - MVPP2_PRS_IPV4_HEAD | MVPP2_PRS_IPV4_IHL, + MVPP2_PRS_IPV4_HEAD | + MVPP2_PRS_IPV4_IHL_MIN, MVPP2_PRS_IPV4_HEAD_MASK | MVPP2_PRS_IPV4_IHL_MASK); @@ -1788,9 +1764,8 @@ static int mvpp2_prs_ip4_init(struct mvpp2 *priv) mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS); mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1); - /* Set L4 offset */ - mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4, - sizeof(struct iphdr) - 4, + /* Set L3 offset */ + mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, -4, MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT); mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L4_OTHER, diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h index c16e5b9947bd..5ce5907be591 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h @@ -28,7 +28,8 @@ #define MVPP2_PRS_IPV4_MC 0xe0 #define MVPP2_PRS_IPV4_MC_MASK 0xf0 #define MVPP2_PRS_IPV4_BC_MASK 0xff -#define MVPP2_PRS_IPV4_IHL 0x5 +#define MVPP2_PRS_IPV4_IHL_MIN 0x5 +#define MVPP2_PRS_IPV4_IHL_MAX 0xf #define MVPP2_PRS_IPV4_IHL_MASK 0xf #define MVPP2_PRS_IPV6_MC 0xff #define MVPP2_PRS_IPV6_MC_MASK 0xff diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 68deae529bc9..fac6474ad694 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -30,10 +30,35 @@ static LIST_HEAD(cgx_list); /* Convert firmware speed encoding to user format(Mbps) */ -static u32 cgx_speed_mbps[CGX_LINK_SPEED_MAX]; +static const u32 cgx_speed_mbps[CGX_LINK_SPEED_MAX] = { + [CGX_LINK_NONE] = 0, + [CGX_LINK_10M] = 10, + [CGX_LINK_100M] = 100, + [CGX_LINK_1G] = 1000, + [CGX_LINK_2HG] = 2500, + [CGX_LINK_5G] = 5000, + [CGX_LINK_10G] = 10000, + [CGX_LINK_20G] = 20000, + [CGX_LINK_25G] = 25000, + [CGX_LINK_40G] = 40000, + [CGX_LINK_50G] = 50000, + [CGX_LINK_80G] = 80000, + [CGX_LINK_100G] = 100000, +}; /* Convert firmware lmac type encoding to string */ -static char *cgx_lmactype_string[LMAC_MODE_MAX]; +static const char *cgx_lmactype_string[LMAC_MODE_MAX] = { + [LMAC_MODE_SGMII] = "SGMII", + [LMAC_MODE_XAUI] = "XAUI", + [LMAC_MODE_RXAUI] = "RXAUI", + [LMAC_MODE_10G_R] = "10G_R", + [LMAC_MODE_40G_R] = "40G_R", + [LMAC_MODE_QSGMII] = "QSGMII", + [LMAC_MODE_25G_R] = "25G_R", + [LMAC_MODE_50G_R] = "50G_R", + [LMAC_MODE_100G_R] = "100G_R", + [LMAC_MODE_USXGMII] = "USXGMII", +}; /* CGX PHY management internal APIs */ static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool en); @@ -659,34 +684,6 @@ int cgx_fwi_cmd_generic(u64 req, u64 *resp, struct cgx *cgx, int lmac_id) return err; } -static inline void cgx_link_usertable_init(void) -{ - cgx_speed_mbps[CGX_LINK_NONE] = 0; - cgx_speed_mbps[CGX_LINK_10M] = 10; - cgx_speed_mbps[CGX_LINK_100M] = 100; - cgx_speed_mbps[CGX_LINK_1G] = 1000; - cgx_speed_mbps[CGX_LINK_2HG] = 2500; - cgx_speed_mbps[CGX_LINK_5G] = 5000; - cgx_speed_mbps[CGX_LINK_10G] = 10000; - cgx_speed_mbps[CGX_LINK_20G] = 20000; - cgx_speed_mbps[CGX_LINK_25G] = 25000; - cgx_speed_mbps[CGX_LINK_40G] = 40000; - cgx_speed_mbps[CGX_LINK_50G] = 50000; - cgx_speed_mbps[CGX_LINK_80G] = 80000; - cgx_speed_mbps[CGX_LINK_100G] = 100000; - - cgx_lmactype_string[LMAC_MODE_SGMII] = "SGMII"; - cgx_lmactype_string[LMAC_MODE_XAUI] = "XAUI"; - cgx_lmactype_string[LMAC_MODE_RXAUI] = "RXAUI"; - cgx_lmactype_string[LMAC_MODE_10G_R] = "10G_R"; - cgx_lmactype_string[LMAC_MODE_40G_R] = "40G_R"; - cgx_lmactype_string[LMAC_MODE_QSGMII] = "QSGMII"; - cgx_lmactype_string[LMAC_MODE_25G_R] = "25G_R"; - cgx_lmactype_string[LMAC_MODE_50G_R] = "50G_R"; - cgx_lmactype_string[LMAC_MODE_100G_R] = "100G_R"; - cgx_lmactype_string[LMAC_MODE_USXGMII] = "USXGMII"; -} - static int cgx_link_usertable_index_map(int speed) { switch (speed) { @@ -828,7 +825,7 @@ static inline void link_status_user_format(u64 lstat, struct cgx_link_user_info *linfo, struct cgx *cgx, u8 lmac_id) { - char *lmac_string; + const char *lmac_string; linfo->link_up = FIELD_GET(RESP_LINKSTAT_UP, lstat); linfo->full_duplex = FIELD_GET(RESP_LINKSTAT_FDUPLEX, lstat); @@ -1377,7 +1374,6 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) list_add(&cgx->cgx_list, &cgx_list); - cgx_link_usertable_init(); cgx_populate_features(cgx); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index ea456099b33c..cedb2616c509 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -74,13 +74,13 @@ struct otx2_mbox { struct otx2_mbox_dev *dev; }; -/* Header which preceeds all mbox messages */ +/* Header which precedes all mbox messages */ struct mbox_hdr { u64 msg_size; /* Total msgs size embedded */ u16 num_msgs; /* No of msgs embedded */ }; -/* Header which preceeds every msg and is also part of it */ +/* Header which precedes every msg and is also part of it */ struct mbox_msghdr { u16 pcifunc; /* Who's sending this msg */ u16 id; /* Mbox message ID */ @@ -177,6 +177,9 @@ M(CPT_LF_ALLOC, 0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg, \ M(CPT_LF_FREE, 0xA01, cpt_lf_free, msg_req, msg_rsp) \ M(CPT_RD_WR_REGISTER, 0xA02, cpt_rd_wr_register, cpt_rd_wr_reg_msg, \ cpt_rd_wr_reg_msg) \ +M(CPT_STATS, 0xA05, cpt_sts, cpt_sts_req, cpt_sts_rsp) \ +M(CPT_RXC_TIME_CFG, 0xA06, cpt_rxc_time_cfg, cpt_rxc_time_cfg_req, \ + msg_rsp) \ /* NPC mbox IDs (range 0x6000 - 0x7FFF) */ \ M(NPC_MCAM_ALLOC_ENTRY, 0x6000, npc_mcam_alloc_entry, npc_mcam_alloc_entry_req,\ npc_mcam_alloc_entry_rsp) \ @@ -216,6 +219,9 @@ M(NPC_MCAM_READ_ENTRY, 0x600f, npc_mcam_read_entry, \ npc_mcam_read_entry_rsp) \ M(NPC_MCAM_READ_BASE_RULE, 0x6011, npc_read_base_steer_rule, \ msg_req, npc_mcam_read_base_rule_rsp) \ +M(NPC_MCAM_GET_STATS, 0x6012, npc_mcam_entry_stats, \ + npc_mcam_get_stats_req, \ + npc_mcam_get_stats_rsp) \ /* NIX mbox IDs (range 0x8000 - 0xFFFF) */ \ M(NIX_LF_ALLOC, 0x8000, nix_lf_alloc, \ nix_lf_alloc_req, nix_lf_alloc_rsp) \ @@ -277,8 +283,8 @@ struct msg_req { struct mbox_msghdr hdr; }; -/* Generic rsponse msg used a ack or response for those mbox - * messages which doesn't have a specific rsp msg format. +/* Generic response msg used an ack or response for those mbox + * messages which don't have a specific rsp msg format. */ struct msg_rsp { struct mbox_msghdr hdr; @@ -299,7 +305,7 @@ struct ready_msg_rsp { /* Structure for requesting resource provisioning. * 'modify' flag to be used when either requesting more - * or to detach partial of a cetain resource type. + * or to detach partial of a certain resource type. * Rest of the fields specify how many of what type to * be attached. * To request LFs from two blocks of same type this mailbox @@ -489,7 +495,7 @@ struct cgx_set_link_mode_rsp { }; #define RVU_LMAC_FEAT_FC BIT_ULL(0) /* pause frames */ -#define RVU_LMAC_FEAT_PTP BIT_ULL(1) /* precison time protocol */ +#define RVU_LMAC_FEAT_PTP BIT_ULL(1) /* precision time protocol */ #define RVU_MAC_VERSION BIT_ULL(2) #define RVU_MAC_CGX BIT_ULL(3) #define RVU_MAC_RPM BIT_ULL(4) @@ -605,6 +611,7 @@ enum nix_af_status { NIX_AF_INVAL_SSO_PF_FUNC = -420, NIX_AF_ERR_TX_VTAG_NOSPC = -421, NIX_AF_ERR_RX_VTAG_INUSE = -422, + NIX_AF_ERR_NPC_KEY_NOT_SUPP = -423, }; /* For NIX RX vtag action */ @@ -1141,6 +1148,7 @@ struct npc_install_flow_req { u64 features; u16 entry; u16 channel; + u16 chan_mask; u8 intf; u8 set_cntr; /* If counter is available set counter for this entry ? */ u8 default_rule; @@ -1193,6 +1201,17 @@ struct npc_mcam_read_base_rule_rsp { struct mcam_entry entry; }; +struct npc_mcam_get_stats_req { + struct mbox_msghdr hdr; + u16 entry; /* mcam entry */ +}; + +struct npc_mcam_get_stats_rsp { + struct mbox_msghdr hdr; + u64 stat; /* counter stats */ + u8 stat_ena; /* enabled */ +}; + enum ptp_op { PTP_OP_ADJFINE = 0, PTP_OP_GET_CLOCK = 1, @@ -1239,4 +1258,62 @@ struct cpt_lf_alloc_req_msg { int blkaddr; }; +/* Mailbox message request and response format for CPT stats. */ +struct cpt_sts_req { + struct mbox_msghdr hdr; + u8 blkaddr; +}; + +struct cpt_sts_rsp { + struct mbox_msghdr hdr; + u64 inst_req_pc; + u64 inst_lat_pc; + u64 rd_req_pc; + u64 rd_lat_pc; + u64 rd_uc_pc; + u64 active_cycles_pc; + u64 ctx_mis_pc; + u64 ctx_hit_pc; + u64 ctx_aop_pc; + u64 ctx_aop_lat_pc; + u64 ctx_ifetch_pc; + u64 ctx_ifetch_lat_pc; + u64 ctx_ffetch_pc; + u64 ctx_ffetch_lat_pc; + u64 ctx_wback_pc; + u64 ctx_wback_lat_pc; + u64 ctx_psh_pc; + u64 ctx_psh_lat_pc; + u64 ctx_err; + u64 ctx_enc_id; + u64 ctx_flush_timer; + u64 rxc_time; + u64 rxc_time_cfg; + u64 rxc_active_sts; + u64 rxc_zombie_sts; + u64 busy_sts_ae; + u64 free_sts_ae; + u64 busy_sts_se; + u64 free_sts_se; + u64 busy_sts_ie; + u64 free_sts_ie; + u64 exe_err_info; + u64 cptclk_cnt; + u64 diag; + u64 rxc_dfrg; + u64 x2p_link_cfg0; + u64 x2p_link_cfg1; +}; + +/* Mailbox message request format to configure reassembly timeout. */ +struct cpt_rxc_time_cfg_req { + struct mbox_msghdr hdr; + int blkaddr; + u32 step; + u16 zombie_thres; + u16 zombie_limit; + u16 active_thres; + u16 active_limit; +}; + #endif /* MBOX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index 3c640f6aba92..1e012e787260 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -167,6 +167,8 @@ enum key_fields { NPC_IPPROTO_SCTP, NPC_IPPROTO_AH, NPC_IPPROTO_ESP, + NPC_IPPROTO_ICMP, + NPC_IPPROTO_ICMP6, NPC_SPORT_TCP, NPC_DPORT_TCP, NPC_SPORT_UDP, @@ -420,6 +422,11 @@ struct nix_tx_action { #define TX_VTAG1_LID_MASK GENMASK_ULL(42, 40) #define TX_VTAG1_RELPTR_MASK GENMASK_ULL(39, 32) +/* NPC MCAM reserved entry index per nixlf */ +#define NIXLF_UCAST_ENTRY 0 +#define NIXLF_BCAST_ENTRY 1 +#define NIXLF_PROMISC_ENTRY 2 + struct npc_mcam_kex { /* MKEX Profle Header */ u64 mkex_sign; /* "mcam-kex-profile" (8 bytes/ASCII characters) */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h index b192692b4fc4..5c372d2c24a1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h @@ -13499,8 +13499,6 @@ static struct npc_mcam_kex npc_mkex_default = { [NPC_LT_LC_IP] = { /* SIP+DIP: 8 bytes, KW2[63:0] */ KEX_LD_CFG(0x07, 0xc, 0x1, 0x0, 0x10), - /* TOS: 1 byte, KW1[63:56] */ - KEX_LD_CFG(0x0, 0x1, 0x1, 0x0, 0xf), }, /* Layer C: IPv6 */ [NPC_LT_LC_IP6] = { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index d9a1a71c7ccc..ab24a5e8ee8a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2462,8 +2462,10 @@ static void rvu_unregister_interrupts(struct rvu *rvu) INTR_MASK(rvu->hw->total_pfs) & ~1ULL); for (irq = 0; irq < rvu->num_vec; irq++) { - if (rvu->irq_allocated[irq]) + if (rvu->irq_allocated[irq]) { free_irq(pci_irq_vector(rvu->pdev, irq), rvu); + rvu->irq_allocated[irq] = false; + } } pci_free_irq_vectors(rvu->pdev); @@ -2975,8 +2977,8 @@ static void rvu_remove(struct pci_dev *pdev) struct rvu *rvu = pci_get_drvdata(pdev); rvu_dbg_exit(rvu); - rvu_unregister_interrupts(rvu); rvu_unregister_dl(rvu); + rvu_unregister_interrupts(rvu); rvu_flr_wq_destroy(rvu); rvu_cgx_exit(rvu); rvu_fwdata_exit(rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index fa6e46e36ae4..c2cc4806d13c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -548,6 +548,12 @@ static inline int is_afvf(u16 pcifunc) return !(pcifunc & ~RVU_PFVF_FUNC_MASK); } +/* check if PF_FUNC is AF */ +static inline bool is_pffunc_af(u16 pcifunc) +{ + return !pcifunc; +} + static inline bool is_rvu_fwdata_valid(struct rvu *rvu) { return (rvu->fwdata->header_magic == RVU_FWDATA_HEADER_MAGIC) && @@ -640,7 +646,8 @@ int npc_config_ts_kpuaction(struct rvu *rvu, int pf, u16 pcifunc, bool en); void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan, u8 *mac_addr); void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, - int nixlf, u64 chan, bool allmulti); + int nixlf, u64 chan, u8 chan_cnt, + bool allmulti); void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, @@ -665,9 +672,6 @@ int rvu_npc_get_tx_nibble_cfg(struct rvu *rvu, u64 nibble_ena); int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel); int npc_flow_steering_init(struct rvu *rvu, int blkaddr); const char *npc_get_field_name(u8 hdr); -bool rvu_npc_write_default_rule(struct rvu *rvu, int blkaddr, int nixlf, - u16 pcifunc, u8 intf, struct mcam_entry *entry, - int *entry_index); int npc_get_bank(struct npc_mcam *mcam, int index); void npc_mcam_enable_flows(struct rvu *rvu, u16 target); void npc_mcam_disable_flows(struct rvu *rvu, u16 target); @@ -678,6 +682,12 @@ void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, u8 *intf, u8 *ena); bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature); u32 rvu_cgx_get_fifolen(struct rvu *rvu); +void *rvu_first_cgx_pdata(struct rvu *rvu); + +int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, + int type); +bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, + int index); /* CPT APIs */ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index e668e482383a..6e2bf4fcd29c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -89,6 +89,21 @@ void *rvu_cgx_pdata(u8 cgx_id, struct rvu *rvu) return rvu->cgx_idmap[cgx_id]; } +/* Return first enabled CGX instance if none are enabled then return NULL */ +void *rvu_first_cgx_pdata(struct rvu *rvu) +{ + int first_enabled_cgx = 0; + void *cgxd = NULL; + + for (; first_enabled_cgx < rvu->cgx_cnt_max; first_enabled_cgx++) { + cgxd = rvu_cgx_pdata(first_enabled_cgx, rvu); + if (cgxd) + break; + } + + return cgxd; +} + /* Based on P2X connectivity find mapped NIX block for a PF */ static void rvu_map_cgx_nix_block(struct rvu *rvu, int pf, int cgx_id, int lmac_id) @@ -711,10 +726,9 @@ int rvu_mbox_handler_cgx_features_get(struct rvu *rvu, u32 rvu_cgx_get_fifolen(struct rvu *rvu) { struct mac_ops *mac_ops; - int rvu_def_cgx_id = 0; u32 fifo_len; - mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu)); + mac_ops = get_mac_ops(rvu_first_cgx_pdata(rvu)); fifo_len = mac_ops ? mac_ops->fifo_len : 0; return fifo_len; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 0945c3a3b180..89253f7bdadb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2020 Marvell. */ +#include <linux/bitfield.h> #include <linux/pci.h> #include "rvu_struct.h" #include "rvu_reg.h" @@ -9,6 +10,28 @@ /* CPT PF device id */ #define PCI_DEVID_OTX2_CPT_PF 0xA0FD +#define PCI_DEVID_OTX2_CPT10K_PF 0xA0F2 + +/* Length of initial context fetch in 128 byte words */ +#define CPT_CTX_ILEN 2 + +#define cpt_get_eng_sts(e_min, e_max, rsp, etype) \ +({ \ + u64 free_sts = 0, busy_sts = 0; \ + typeof(rsp) _rsp = rsp; \ + u32 e, i; \ + \ + for (e = (e_min), i = 0; e < (e_max); e++, i++) { \ + reg = rvu_read64(rvu, blkaddr, CPT_AF_EXEX_STS(e)); \ + if (reg & 0x1) \ + busy_sts |= 1ULL << i; \ + \ + if (reg & 0x2) \ + free_sts |= 1ULL << i; \ + } \ + (_rsp)->busy_sts_##etype = busy_sts; \ + (_rsp)->free_sts_##etype = free_sts; \ +}) static int get_cpt_pf_num(struct rvu *rvu) { @@ -21,7 +44,8 @@ static int get_cpt_pf_num(struct rvu *rvu) if (!pdev) continue; - if (pdev->device == PCI_DEVID_OTX2_CPT_PF) { + if (pdev->device == PCI_DEVID_OTX2_CPT_PF || + pdev->device == PCI_DEVID_OTX2_CPT10K_PF) { cpt_pf_num = i; put_device(&pdev->dev); break; @@ -55,6 +79,17 @@ static bool is_cpt_vf(struct rvu *rvu, u16 pcifunc) return true; } +static int validate_and_get_cpt_blkaddr(int req_blkaddr) +{ + int blkaddr; + + blkaddr = req_blkaddr ? req_blkaddr : BLKADDR_CPT0; + if (blkaddr != BLKADDR_CPT0 && blkaddr != BLKADDR_CPT1) + return -EINVAL; + + return blkaddr; +} + int rvu_mbox_handler_cpt_lf_alloc(struct rvu *rvu, struct cpt_lf_alloc_req_msg *req, struct msg_rsp *rsp) @@ -65,9 +100,9 @@ int rvu_mbox_handler_cpt_lf_alloc(struct rvu *rvu, int num_lfs, slot; u64 val; - blkaddr = req->blkaddr ? req->blkaddr : BLKADDR_CPT0; - if (blkaddr != BLKADDR_CPT0 && blkaddr != BLKADDR_CPT1) - return -ENODEV; + blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); + if (blkaddr < 0) + return blkaddr; if (req->eng_grpmsk == 0x0) return CPT_AF_ERR_GRP_INVALID; @@ -103,6 +138,9 @@ int rvu_mbox_handler_cpt_lf_alloc(struct rvu *rvu, /* Set CPT LF group and priority */ val = (u64)req->eng_grpmsk << 48 | 1; + if (!is_rvu_otx2(rvu)) + val |= (CPT_CTX_ILEN << 17); + rvu_write64(rvu, blkaddr, CPT_AF_LFX_CTL(cptlf), val); /* Set CPT LF NIX_PF_FUNC and SSO_PF_FUNC */ @@ -162,7 +200,9 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) struct rvu_block *block; struct rvu_pfvf *pfvf; - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); + blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); + if (blkaddr < 0) + return blkaddr; /* Registers that can be accessed from PF/VF */ if ((offset & 0xFF000) == CPT_AF_LFX_CTL(0) || @@ -192,6 +232,7 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) case CPT_AF_PF_FUNC: case CPT_AF_BLK_RST: case CPT_AF_CONSTANTS1: + case CPT_AF_CTX_FLUSH_TIMER: return true; } @@ -217,9 +258,9 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, { int blkaddr; - blkaddr = req->blkaddr ? req->blkaddr : BLKADDR_CPT0; - if (blkaddr != BLKADDR_CPT0 && blkaddr != BLKADDR_CPT1) - return -ENODEV; + blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); + if (blkaddr < 0) + return blkaddr; /* This message is accepted only if sent from CPT PF/VF */ if (!is_cpt_pf(rvu, req->hdr.pcifunc) && @@ -241,6 +282,141 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, return 0; } +static void get_ctx_pc(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr) +{ + if (is_rvu_otx2(rvu)) + return; + + rsp->ctx_mis_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_MIS_PC); + rsp->ctx_hit_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_HIT_PC); + rsp->ctx_aop_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_AOP_PC); + rsp->ctx_aop_lat_pc = rvu_read64(rvu, blkaddr, + CPT_AF_CTX_AOP_LATENCY_PC); + rsp->ctx_ifetch_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_IFETCH_PC); + rsp->ctx_ifetch_lat_pc = rvu_read64(rvu, blkaddr, + CPT_AF_CTX_IFETCH_LATENCY_PC); + rsp->ctx_ffetch_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_FFETCH_PC); + rsp->ctx_ffetch_lat_pc = rvu_read64(rvu, blkaddr, + CPT_AF_CTX_FFETCH_LATENCY_PC); + rsp->ctx_wback_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_FFETCH_PC); + rsp->ctx_wback_lat_pc = rvu_read64(rvu, blkaddr, + CPT_AF_CTX_FFETCH_LATENCY_PC); + rsp->ctx_psh_pc = rvu_read64(rvu, blkaddr, CPT_AF_CTX_FFETCH_PC); + rsp->ctx_psh_lat_pc = rvu_read64(rvu, blkaddr, + CPT_AF_CTX_FFETCH_LATENCY_PC); + rsp->ctx_err = rvu_read64(rvu, blkaddr, CPT_AF_CTX_ERR); + rsp->ctx_enc_id = rvu_read64(rvu, blkaddr, CPT_AF_CTX_ENC_ID); + rsp->ctx_flush_timer = rvu_read64(rvu, blkaddr, CPT_AF_CTX_FLUSH_TIMER); + + rsp->rxc_time = rvu_read64(rvu, blkaddr, CPT_AF_RXC_TIME); + rsp->rxc_time_cfg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_TIME_CFG); + rsp->rxc_active_sts = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ACTIVE_STS); + rsp->rxc_zombie_sts = rvu_read64(rvu, blkaddr, CPT_AF_RXC_ZOMBIE_STS); + rsp->rxc_dfrg = rvu_read64(rvu, blkaddr, CPT_AF_RXC_DFRG); + rsp->x2p_link_cfg0 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(0)); + rsp->x2p_link_cfg1 = rvu_read64(rvu, blkaddr, CPT_AF_X2PX_LINK_CFG(1)); +} + +static void get_eng_sts(struct rvu *rvu, struct cpt_sts_rsp *rsp, int blkaddr) +{ + u16 max_ses, max_ies, max_aes; + u32 e_min = 0, e_max = 0; + u64 reg; + + reg = rvu_read64(rvu, blkaddr, CPT_AF_CONSTANTS1); + max_ses = reg & 0xffff; + max_ies = (reg >> 16) & 0xffff; + max_aes = (reg >> 32) & 0xffff; + + /* Get AE status */ + e_min = max_ses + max_ies; + e_max = max_ses + max_ies + max_aes; + cpt_get_eng_sts(e_min, e_max, rsp, ae); + /* Get SE status */ + e_min = 0; + e_max = max_ses; + cpt_get_eng_sts(e_min, e_max, rsp, se); + /* Get IE status */ + e_min = max_ses; + e_max = max_ses + max_ies; + cpt_get_eng_sts(e_min, e_max, rsp, ie); +} + +int rvu_mbox_handler_cpt_sts(struct rvu *rvu, struct cpt_sts_req *req, + struct cpt_sts_rsp *rsp) +{ + int blkaddr; + + blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); + if (blkaddr < 0) + return blkaddr; + + /* This message is accepted only if sent from CPT PF/VF */ + if (!is_cpt_pf(rvu, req->hdr.pcifunc) && + !is_cpt_vf(rvu, req->hdr.pcifunc)) + return CPT_AF_ERR_ACCESS_DENIED; + + get_ctx_pc(rvu, rsp, blkaddr); + + /* Get CPT engines status */ + get_eng_sts(rvu, rsp, blkaddr); + + /* Read CPT instruction PC registers */ + rsp->inst_req_pc = rvu_read64(rvu, blkaddr, CPT_AF_INST_REQ_PC); + rsp->inst_lat_pc = rvu_read64(rvu, blkaddr, CPT_AF_INST_LATENCY_PC); + rsp->rd_req_pc = rvu_read64(rvu, blkaddr, CPT_AF_RD_REQ_PC); + rsp->rd_lat_pc = rvu_read64(rvu, blkaddr, CPT_AF_RD_LATENCY_PC); + rsp->rd_uc_pc = rvu_read64(rvu, blkaddr, CPT_AF_RD_UC_PC); + rsp->active_cycles_pc = rvu_read64(rvu, blkaddr, + CPT_AF_ACTIVE_CYCLES_PC); + rsp->exe_err_info = rvu_read64(rvu, blkaddr, CPT_AF_EXE_ERR_INFO); + rsp->cptclk_cnt = rvu_read64(rvu, blkaddr, CPT_AF_CPTCLK_CNT); + rsp->diag = rvu_read64(rvu, blkaddr, CPT_AF_DIAG); + + return 0; +} + +#define RXC_ZOMBIE_THRES GENMASK_ULL(59, 48) +#define RXC_ZOMBIE_LIMIT GENMASK_ULL(43, 32) +#define RXC_ACTIVE_THRES GENMASK_ULL(27, 16) +#define RXC_ACTIVE_LIMIT GENMASK_ULL(11, 0) +#define RXC_ACTIVE_COUNT GENMASK_ULL(60, 48) +#define RXC_ZOMBIE_COUNT GENMASK_ULL(60, 48) + +static void cpt_rxc_time_cfg(struct rvu *rvu, struct cpt_rxc_time_cfg_req *req, + int blkaddr) +{ + u64 dfrg_reg; + + dfrg_reg = FIELD_PREP(RXC_ZOMBIE_THRES, req->zombie_thres); + dfrg_reg |= FIELD_PREP(RXC_ZOMBIE_LIMIT, req->zombie_limit); + dfrg_reg |= FIELD_PREP(RXC_ACTIVE_THRES, req->active_thres); + dfrg_reg |= FIELD_PREP(RXC_ACTIVE_LIMIT, req->active_limit); + + rvu_write64(rvu, blkaddr, CPT_AF_RXC_TIME_CFG, req->step); + rvu_write64(rvu, blkaddr, CPT_AF_RXC_DFRG, dfrg_reg); +} + +int rvu_mbox_handler_cpt_rxc_time_cfg(struct rvu *rvu, + struct cpt_rxc_time_cfg_req *req, + struct msg_rsp *rsp) +{ + int blkaddr; + + blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); + if (blkaddr < 0) + return blkaddr; + + /* This message is accepted only if sent from CPT PF/VF */ + if (!is_cpt_pf(rvu, req->hdr.pcifunc) && + !is_cpt_vf(rvu, req->hdr.pcifunc)) + return CPT_AF_ERR_ACCESS_DENIED; + + cpt_rxc_time_cfg(rvu, req, blkaddr); + + return 0; +} + #define INPROG_INFLIGHT(reg) ((reg) & 0x1FF) #define INPROG_GRB_PARTIAL(reg) ((reg) & BIT_ULL(31)) #define INPROG_GRB(reg) (((reg) >> 32) & 0xFF) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index aa2ca8780b9c..9bf8eaabf9ab 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -234,12 +234,14 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { - int index, off = 0, flag = 0, go_back = 0, off_prev; + int index, off = 0, flag = 0, go_back = 0, len = 0; struct rvu *rvu = filp->private_data; int lf, pf, vf, pcifunc; struct rvu_block block; int bytes_not_copied; + int lf_str_size = 12; int buf_size = 2048; + char *lfs; char *buf; /* don't allow partial reads */ @@ -249,12 +251,20 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, buf = kzalloc(buf_size, GFP_KERNEL); if (!buf) return -ENOSPC; - off += scnprintf(&buf[off], buf_size - 1 - off, "\npcifunc\t\t"); + + lfs = kzalloc(lf_str_size, GFP_KERNEL); + if (!lfs) { + kfree(buf); + return -ENOMEM; + } + off += scnprintf(&buf[off], buf_size - 1 - off, "%-*s", lf_str_size, + "pcifunc"); for (index = 0; index < BLK_COUNT; index++) - if (strlen(rvu->hw->block[index].name)) - off += scnprintf(&buf[off], buf_size - 1 - off, - "%*s\t", (index - 1) * 2, - rvu->hw->block[index].name); + if (strlen(rvu->hw->block[index].name)) { + off += scnprintf(&buf[off], buf_size - 1 - off, + "%-*s", lf_str_size, + rvu->hw->block[index].name); + } off += scnprintf(&buf[off], buf_size - 1 - off, "\n"); for (pf = 0; pf < rvu->hw->total_pfs; pf++) { for (vf = 0; vf <= rvu->hw->total_vfs; vf++) { @@ -263,14 +273,15 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, continue; if (vf) { + sprintf(lfs, "PF%d:VF%d", pf, vf - 1); go_back = scnprintf(&buf[off], buf_size - 1 - off, - "PF%d:VF%d\t\t", pf, - vf - 1); + "%-*s", lf_str_size, lfs); } else { + sprintf(lfs, "PF%d", pf); go_back = scnprintf(&buf[off], buf_size - 1 - off, - "PF%d\t\t", pf); + "%-*s", lf_str_size, lfs); } off += go_back; @@ -278,20 +289,22 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, block = rvu->hw->block[index]; if (!strlen(block.name)) continue; - off_prev = off; + len = 0; + lfs[len] = '\0'; for (lf = 0; lf < block.lf.max; lf++) { if (block.fn_map[lf] != pcifunc) continue; flag = 1; - off += scnprintf(&buf[off], buf_size - 1 - - off, "%3d,", lf); + len += sprintf(&lfs[len], "%d,", lf); } - if (flag && off_prev != off) - off--; - else - go_back++; + + if (flag) + len--; + lfs[len] = '\0'; off += scnprintf(&buf[off], buf_size - 1 - off, - "\t"); + "%-*s", lf_str_size, lfs); + if (!strlen(lfs)) + go_back += lf_str_size; } if (!flag) off -= go_back; @@ -303,6 +316,7 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, } bytes_not_copied = copy_to_user(buffer, buf, off); + kfree(lfs); kfree(buf); if (bytes_not_copied) @@ -319,7 +333,6 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused) struct rvu *rvu = filp->private; struct pci_dev *pdev = NULL; struct mac_ops *mac_ops; - int rvu_def_cgx_id = 0; char cgx[10], lmac[10]; struct rvu_pfvf *pfvf; int pf, domain, blkid; @@ -327,7 +340,10 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused) u16 pcifunc; domain = 2; - mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu)); + mac_ops = get_mac_ops(rvu_first_cgx_pdata(rvu)); + /* There can be no CGX devices at all */ + if (!mac_ops) + return 0; seq_printf(filp, "PCI dev\t\tRVU PF Func\tNIX block\t%s\tLMAC\n", mac_ops->name); for (pf = 0; pf < rvu->hw->total_pfs; pf++) { @@ -1818,7 +1834,6 @@ static void rvu_dbg_cgx_init(struct rvu *rvu) { struct mac_ops *mac_ops; unsigned long lmac_bmap; - int rvu_def_cgx_id = 0; int i, lmac_id; char dname[20]; void *cgx; @@ -1826,7 +1841,7 @@ static void rvu_dbg_cgx_init(struct rvu *rvu) if (!cgx_get_cgxcnt_max()) return; - mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu)); + mac_ops = get_mac_ops(rvu_first_cgx_pdata(rvu)); if (!mac_ops) return; @@ -2002,7 +2017,7 @@ static void rvu_dbg_npc_mcam_show_flows(struct seq_file *s, seq_printf(s, "mask 0x%x\n", ntohs(rule->mask.etype)); break; case NPC_OUTER_VID: - seq_printf(s, "%d ", ntohs(rule->packet.vlan_tci)); + seq_printf(s, "0x%x ", ntohs(rule->packet.vlan_tci)); seq_printf(s, "mask 0x%x\n", ntohs(rule->mask.vlan_tci)); break; @@ -2145,7 +2160,7 @@ static int rvu_dbg_npc_mcam_show_rules(struct seq_file *s, void *unused) seq_printf(s, "\tmcam entry: %d\n", iter->entry); rvu_dbg_npc_mcam_show_flows(s, iter); - if (iter->intf == NIX_INTF_RX) { + if (is_npc_intf_rx(iter->intf)) { target = iter->rx_action.pf_func; pf = (target >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK; seq_printf(s, "\tForward to: PF%d ", pf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index d3000194e2d3..0a8bd667cb11 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -273,7 +273,8 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf) pfvf->rx_chan_cnt = 1; pfvf->tx_chan_cnt = 1; rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, - pfvf->rx_chan_base, false); + pfvf->rx_chan_base, + pfvf->rx_chan_cnt, false); break; } @@ -2629,7 +2630,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) struct nix_rx_flowkey_alg *field; struct nix_rx_flowkey_alg tmp; u32 key_type, valid_key; - int l4_key_offset; + int l4_key_offset = 0; if (!alg) return -EINVAL; @@ -3088,7 +3089,8 @@ int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req, rvu_npc_disable_promisc_entry(rvu, pcifunc, nixlf); else rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, - pfvf->rx_chan_base, allmulti); + pfvf->rx_chan_base, + pfvf->rx_chan_cnt, allmulti); return 0; } @@ -3635,9 +3637,7 @@ int rvu_mbox_handler_nix_lf_stop_rx(struct rvu *rvu, struct msg_req *req, if (err) return err; - rvu_npc_disable_default_entries(rvu, pcifunc, nixlf); - - npc_mcam_disable_flows(rvu, pcifunc); + rvu_npc_disable_mcam_entries(rvu, pcifunc, nixlf); return rvu_cgx_start_stop_io(rvu, pcifunc, false); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 04bb0803a5c5..0bc4529691ec 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -22,10 +22,6 @@ #define RSVD_MCAM_ENTRIES_PER_PF 2 /* Bcast & Promisc */ #define RSVD_MCAM_ENTRIES_PER_NIXLF 1 /* Ucast for LFs */ -#define NIXLF_UCAST_ENTRY 0 -#define NIXLF_BCAST_ENTRY 1 -#define NIXLF_PROMISC_ENTRY 2 - #define NPC_PARSE_RESULT_DMAC_OFFSET 8 #define NPC_HW_TSTAMP_OFFSET 8 #define NPC_KEX_CHAN_MASK 0xFFFULL @@ -96,6 +92,10 @@ int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel) if (is_npc_intf_tx(intf)) return 0; + /* return in case of AF installed rules */ + if (is_pffunc_af(pcifunc)) + return 0; + if (is_afvf(pcifunc)) { end = rvu_get_num_lbk_chans(); if (end < 0) @@ -196,8 +196,8 @@ static int npc_get_ucast_mcam_index(struct npc_mcam *mcam, u16 pcifunc, return mcam->nixlf_offset + (max + nixlf) * RSVD_MCAM_ENTRIES_PER_NIXLF; } -static int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, - u16 pcifunc, int nixlf, int type) +int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, + u16 pcifunc, int nixlf, int type) { int pf = rvu_get_pf(pcifunc); int index; @@ -230,8 +230,8 @@ int npc_get_bank(struct npc_mcam *mcam, int index) return bank; } -static bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, - int blkaddr, int index) +bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, int index) { int bank = npc_get_bank(mcam, index); u64 cfg; @@ -647,13 +647,17 @@ void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, } void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, - int nixlf, u64 chan, bool allmulti) + int nixlf, u64 chan, u8 chan_cnt, + bool allmulti) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); + struct npc_install_flow_req req = { 0 }; + struct npc_install_flow_rsp rsp = { 0 }; struct npc_mcam *mcam = &rvu->hw->mcam; - int blkaddr, ucast_idx, index, kwi; - struct mcam_entry entry = { {0} }; - struct nix_rx_action action = { }; + int blkaddr, ucast_idx, index; + u8 mac_addr[ETH_ALEN] = { 0 }; + struct nix_rx_action action; + u64 relaxed_mask; /* Only PF or AF VF can add a promiscuous entry */ if ((pcifunc & RVU_PFVF_FUNC_MASK) && !is_afvf(pcifunc)) @@ -663,24 +667,15 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, if (blkaddr < 0) return; + *(u64 *)&action = 0x00; index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_PROMISC_ENTRY); - entry.kw[0] = chan; - entry.kw_mask[0] = 0xFFFULL; - - if (allmulti) { - kwi = NPC_KEXOF_DMAC / sizeof(u64); - entry.kw[kwi] = BIT_ULL(40); /* LSB bit of 1st byte in DMAC */ - entry.kw_mask[kwi] = BIT_ULL(40); - } - - ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, - nixlf, NIXLF_UCAST_ENTRY); - /* If the corresponding PF's ucast action is RSS, * use the same action for promisc also */ + ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, + nixlf, NIXLF_UCAST_ENTRY); if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx)) *(u64 *)&action = npc_get_mcam_action(rvu, mcam, blkaddr, ucast_idx); @@ -691,9 +686,36 @@ void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, action.pf_func = pcifunc; } - entry.action = *(u64 *)&action; - npc_config_mcam_entry(rvu, mcam, blkaddr, index, - pfvf->nix_rx_intf, &entry, true); + if (allmulti) { + mac_addr[0] = 0x01; /* LSB bit of 1st byte in DMAC */ + ether_addr_copy(req.packet.dmac, mac_addr); + ether_addr_copy(req.mask.dmac, mac_addr); + req.features = BIT_ULL(NPC_DMAC); + } + + req.chan_mask = 0xFFFU; + if (chan_cnt > 1) { + if (!is_power_of_2(chan_cnt)) { + dev_err(rvu->dev, + "%s: channel count more than 1, must be power of 2\n", __func__); + return; + } + relaxed_mask = GENMASK_ULL(BITS_PER_LONG_LONG - 1, + ilog2(chan_cnt)); + req.chan_mask &= relaxed_mask; + } + + req.channel = chan; + req.intf = pfvf->nix_rx_intf; + req.entry = index; + req.op = action.op; + req.hdr.pcifunc = 0; /* AF is requester */ + req.vf = pcifunc; + req.index = action.index; + req.match_id = action.match_id; + req.flow_key_alg = action.flow_key_alg; + + rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } static void npc_enadis_promisc_entry(struct rvu *rvu, u16 pcifunc, @@ -728,12 +750,14 @@ void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf) void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan) { + struct rvu_pfvf *pfvf; + struct npc_install_flow_req req = { 0 }; + struct npc_install_flow_rsp rsp = { 0 }; struct npc_mcam *mcam = &rvu->hw->mcam; - struct mcam_entry entry = { {0} }; struct rvu_hwinfo *hw = rvu->hw; - struct nix_rx_action action; - struct rvu_pfvf *pfvf; int blkaddr, index; + u32 req_index = 0; + u8 op; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) @@ -755,32 +779,29 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_BCAST_ENTRY); - /* Match ingress channel */ - entry.kw[0] = chan; - entry.kw_mask[0] = 0xfffull; - - /* Match broadcast MAC address. - * DMAC is extracted at 0th bit of PARSE_KEX::KW1 - */ - entry.kw[1] = 0xffffffffffffull; - entry.kw_mask[1] = 0xffffffffffffull; - - *(u64 *)&action = 0x00; if (!hw->cap.nix_rx_multicast) { /* Early silicon doesn't support pkt replication, * so install entry with UCAST action, so that PF * receives all broadcast packets. */ - action.op = NIX_RX_ACTIONOP_UCAST; - action.pf_func = pcifunc; + op = NIX_RX_ACTIONOP_UCAST; } else { - action.index = pfvf->bcast_mce_idx; - action.op = NIX_RX_ACTIONOP_MCAST; + op = NIX_RX_ACTIONOP_MCAST; + req_index = pfvf->bcast_mce_idx; } - entry.action = *(u64 *)&action; - npc_config_mcam_entry(rvu, mcam, blkaddr, index, - pfvf->nix_rx_intf, &entry, true); + eth_broadcast_addr((u8 *)&req.packet.dmac); + eth_broadcast_addr((u8 *)&req.mask.dmac); + req.features = BIT_ULL(NPC_DMAC); + req.channel = chan; + req.intf = pfvf->nix_rx_intf; + req.entry = index; + req.op = op; + req.hdr.pcifunc = 0; /* AF is requester */ + req.vf = pcifunc; + req.index = req_index; + + rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); } void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable) @@ -967,7 +988,7 @@ void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); struct npc_mcam *mcam = &rvu->hw->mcam; - struct rvu_npc_mcam_rule *rule; + struct rvu_npc_mcam_rule *rule, *tmp; int blkaddr; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); @@ -977,15 +998,18 @@ void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf) mutex_lock(&mcam->lock); /* Disable MCAM entries directing traffic to this 'pcifunc' */ - list_for_each_entry(rule, &mcam->mcam_rules, list) { + list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) { if (is_npc_intf_rx(rule->intf) && rule->rx_action.pf_func == pcifunc) { npc_enable_mcam_entry(rvu, mcam, blkaddr, rule->entry, false); rule->enable = false; /* Indicate that default rule is disabled */ - if (rule->default_rule) + if (rule->default_rule) { pfvf->def_ucast_rule = NULL; + list_del(&rule->list); + kfree(rule); + } } } @@ -1674,6 +1698,9 @@ void rvu_npc_get_mcam_counter_alloc_info(struct rvu *rvu, u16 pcifunc, static int npc_mcam_verify_entry(struct npc_mcam *mcam, u16 pcifunc, int entry) { + /* verify AF installed entries */ + if (is_pffunc_af(pcifunc)) + return 0; /* Verify if entry is valid and if it is indeed * allocated to the requesting PFFUNC. */ @@ -2268,6 +2295,10 @@ int rvu_mbox_handler_npc_mcam_write_entry(struct rvu *rvu, goto exit; } + /* For AF installed rules, the nix_intf should be set to target NIX */ + if (is_pffunc_af(req->hdr.pcifunc)) + nix_intf = req->intf; + npc_config_mcam_entry(rvu, mcam, blkaddr, req->entry, nix_intf, &req->entry_data, req->enable_entry); @@ -2490,10 +2521,10 @@ int rvu_mbox_handler_npc_mcam_free_counter(struct rvu *rvu, index = find_next_bit(mcam->bmap, mcam->bmap_entries, entry); if (index >= mcam->bmap_entries) break; + entry = index + 1; if (mcam->entry2cntr_map[index] != req->cntr) continue; - entry = index + 1; npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr, index, req->cntr); } @@ -2730,30 +2761,6 @@ int rvu_mbox_handler_npc_get_kex_cfg(struct rvu *rvu, struct msg_req *req, return 0; } -bool rvu_npc_write_default_rule(struct rvu *rvu, int blkaddr, int nixlf, - u16 pcifunc, u8 intf, struct mcam_entry *entry, - int *index) -{ - struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); - struct npc_mcam *mcam = &rvu->hw->mcam; - bool enable; - u8 nix_intf; - - if (is_npc_intf_tx(intf)) - nix_intf = pfvf->nix_tx_intf; - else - nix_intf = pfvf->nix_rx_intf; - - *index = npc_get_nixlf_mcam_index(mcam, pcifunc, - nixlf, NIXLF_UCAST_ENTRY); - /* dont force enable unicast entry */ - enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, *index); - npc_config_mcam_entry(rvu, mcam, blkaddr, *index, nix_intf, - entry, enable); - - return enable; -} - int rvu_mbox_handler_npc_read_base_steer_rule(struct rvu *rvu, struct msg_req *req, struct npc_mcam_read_base_rule_rsp *rsp) @@ -2799,3 +2806,42 @@ read_entry: out: return rc; } + +int rvu_mbox_handler_npc_mcam_entry_stats(struct rvu *rvu, + struct npc_mcam_get_stats_req *req, + struct npc_mcam_get_stats_rsp *rsp) +{ + struct npc_mcam *mcam = &rvu->hw->mcam; + u16 index, cntr; + int blkaddr; + u64 regval; + u32 bank; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return NPC_MCAM_INVALID_REQ; + + mutex_lock(&mcam->lock); + + index = req->entry & (mcam->banksize - 1); + bank = npc_get_bank(mcam, req->entry); + + /* read MCAM entry STAT_ACT register */ + regval = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank)); + + if (!(regval & BIT_ULL(9))) { + rsp->stat_ena = 0; + mutex_unlock(&mcam->lock); + return 0; + } + + cntr = regval & 0x1FF; + + rsp->stat_ena = 1; + rsp->stat = rvu_read64(rvu, blkaddr, NPC_AF_MATCH_STATX(cntr)); + rsp->stat &= BIT_ULL(48) - 1; + + mutex_unlock(&mcam->lock); + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 4ba9d54ce4e3..7f35b62eea13 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -29,6 +29,8 @@ static const char * const npc_flow_names[] = { [NPC_IPPROTO_TCP] = "ip proto tcp", [NPC_IPPROTO_UDP] = "ip proto udp", [NPC_IPPROTO_SCTP] = "ip proto sctp", + [NPC_IPPROTO_ICMP] = "ip proto icmp", + [NPC_IPPROTO_ICMP6] = "ip proto icmp6", [NPC_IPPROTO_AH] = "ip proto AH", [NPC_IPPROTO_ESP] = "ip proto ESP", [NPC_SPORT_TCP] = "tcp source port", @@ -427,6 +429,7 @@ do { \ * packet header fields below. * Example: Source IP is 4 bytes and starts at 12th byte of IP header */ + NPC_SCAN_HDR(NPC_TOS, NPC_LID_LC, NPC_LT_LC_IP, 1, 1); NPC_SCAN_HDR(NPC_SIP_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 12, 4); NPC_SCAN_HDR(NPC_DIP_IPV4, NPC_LID_LC, NPC_LT_LC_IP, 16, 4); NPC_SCAN_HDR(NPC_SIP_IPV6, NPC_LID_LC, NPC_LT_LC_IP6, 8, 16); @@ -477,9 +480,12 @@ static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf) BIT_ULL(NPC_IPPROTO_SCTP); } - /* for AH, check if corresponding layer type is present in the key */ - if (npc_check_field(rvu, blkaddr, NPC_LD, intf)) + /* for AH/ICMP/ICMPv6/, check if corresponding layer type is present in the key */ + if (npc_check_field(rvu, blkaddr, NPC_LD, intf)) { *features |= BIT_ULL(NPC_IPPROTO_AH); + *features |= BIT_ULL(NPC_IPPROTO_ICMP); + *features |= BIT_ULL(NPC_IPPROTO_ICMP6); + } /* for ESP, check if corresponding layer type is present in the key */ if (npc_check_field(rvu, blkaddr, NPC_LE, intf)) @@ -597,7 +603,7 @@ static int npc_check_unsupported_flows(struct rvu *rvu, u64 features, u8 intf) dev_info(rvu->dev, "Unsupported flow(s):\n"); for_each_set_bit(bit, (unsigned long *)&unsupported, 64) dev_info(rvu->dev, "%s ", npc_get_field_name(bit)); - return -EOPNOTSUPP; + return NIX_AF_ERR_NPC_KEY_NOT_SUPP; } return 0; @@ -769,6 +775,12 @@ static void npc_update_flow(struct rvu *rvu, struct mcam_entry *entry, if (features & BIT_ULL(NPC_IPPROTO_SCTP)) npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_SCTP, 0, ~0ULL, 0, intf); + if (features & BIT_ULL(NPC_IPPROTO_ICMP)) + npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_ICMP, + 0, ~0ULL, 0, intf); + if (features & BIT_ULL(NPC_IPPROTO_ICMP6)) + npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_ICMP6, + 0, ~0ULL, 0, intf); if (features & BIT_ULL(NPC_OUTER_VID)) npc_update_entry(rvu, NPC_LB, entry, @@ -798,6 +810,7 @@ do { \ NPC_WRITE_FLOW(NPC_SMAC, smac, smac_val, 0, smac_mask, 0); NPC_WRITE_FLOW(NPC_ETYPE, etype, ntohs(pkt->etype), 0, ntohs(mask->etype), 0); + NPC_WRITE_FLOW(NPC_TOS, tos, pkt->tos, 0, mask->tos, 0); NPC_WRITE_FLOW(NPC_SIP_IPV4, ip4src, ntohl(pkt->ip4src), 0, ntohl(mask->ip4src), 0); NPC_WRITE_FLOW(NPC_DIP_IPV4, ip4dst, ntohl(pkt->ip4dst), 0, @@ -903,9 +916,11 @@ static void npc_update_rx_entry(struct rvu *rvu, struct rvu_pfvf *pfvf, struct npc_install_flow_req *req, u16 target) { struct nix_rx_action action; + u64 chan_mask; - npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, - ~0ULL, 0, NIX_INTF_RX); + chan_mask = req->chan_mask ? req->chan_mask : ~0ULL; + npc_update_entry(rvu, NPC_CHAN, entry, req->channel, 0, chan_mask, 0, + NIX_INTF_RX); *(u64 *)&action = 0x00; action.pf_func = target; @@ -998,33 +1013,21 @@ static int npc_install_flow(struct rvu *rvu, int blkaddr, u16 target, if (is_npc_intf_tx(req->intf)) goto find_rule; - if (def_ucast_rule) + if (req->default_rule) { + entry_index = npc_get_nixlf_mcam_index(mcam, target, nixlf, + NIXLF_UCAST_ENTRY); + enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, entry_index); + } + + /* update mcam entry with default unicast rule attributes */ + if (def_ucast_rule && (msg_from_vf || (req->default_rule && req->append))) { missing_features = (def_ucast_rule->features ^ features) & def_ucast_rule->features; - - if (req->default_rule && req->append) { - /* add to default rule */ if (missing_features) npc_update_flow(rvu, entry, missing_features, &def_ucast_rule->packet, &def_ucast_rule->mask, &dummy, req->intf); - enable = rvu_npc_write_default_rule(rvu, blkaddr, - nixlf, target, - pfvf->nix_rx_intf, entry, - &entry_index); - installed_features = req->features | missing_features; - } else if (req->default_rule && !req->append) { - /* overwrite default rule */ - enable = rvu_npc_write_default_rule(rvu, blkaddr, - nixlf, target, - pfvf->nix_rx_intf, entry, - &entry_index); - } else if (msg_from_vf) { - /* normal rule - include default rule also to it for VF */ - npc_update_flow(rvu, entry, missing_features, - &def_ucast_rule->packet, &def_ucast_rule->mask, - &dummy, req->intf); installed_features = req->features | missing_features; } @@ -1036,12 +1039,9 @@ find_rule: return -ENOMEM; new = true; } - /* no counter for default rule */ - if (req->default_rule) - goto update_rule; /* allocate new counter if rule has no counter */ - if (req->set_cntr && !rule->has_cntr) + if (!req->default_rule && req->set_cntr && !rule->has_cntr) rvu_mcam_add_counter_to_rule(rvu, owner, rule, rsp); /* if user wants to delete an existing counter for a rule then @@ -1051,7 +1051,14 @@ find_rule: rvu_mcam_remove_counter_from_rule(rvu, owner, rule); write_req.hdr.pcifunc = owner; - write_req.entry = req->entry; + + /* AF owns the default rules so change the owner just to relax + * the checks in rvu_mbox_handler_npc_mcam_write_entry + */ + if (req->default_rule) + write_req.hdr.pcifunc = 0; + + write_req.entry = entry_index; write_req.intf = req->intf; write_req.enable_entry = (u8)enable; /* if counter is available then clear and use it */ @@ -1069,7 +1076,7 @@ find_rule: kfree(rule); return err; } -update_rule: + /* update rule */ memcpy(&rule->packet, &dummy.packet, sizeof(rule->packet)); memcpy(&rule->mask, &dummy.mask, sizeof(rule->mask)); rule->entry = entry_index; @@ -1145,8 +1152,13 @@ int rvu_mbox_handler_npc_install_flow(struct rvu *rvu, else target = req->hdr.pcifunc; - if (npc_check_unsupported_flows(rvu, req->features, req->intf)) - return -EOPNOTSUPP; + /* ignore chan_mask in case pf func is not AF, revisit later */ + if (!is_pffunc_af(req->hdr.pcifunc)) + req->chan_mask = 0xFFF; + + err = npc_check_unsupported_flows(rvu, req->features, req->intf); + if (err) + return err; if (npc_mcam_verify_channel(rvu, target, req->intf, req->channel)) return -EINVAL; @@ -1278,6 +1290,7 @@ static int npc_update_dmac_value(struct rvu *rvu, int npcblkaddr, write_req.hdr.pcifunc = rule->owner; write_req.entry = rule->entry; + write_req.intf = pfvf->nix_rx_intf; mutex_unlock(&mcam->lock); err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, &rsp); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 3e401fd8ac63..ac71c0f2f960 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -494,6 +494,27 @@ #define CPT_AF_RAS_INT_W1S (0x47028) #define CPT_AF_RAS_INT_ENA_W1S (0x47030) #define CPT_AF_RAS_INT_ENA_W1C (0x47038) +#define CPT_AF_CTX_FLUSH_TIMER (0x48000ull) +#define CPT_AF_CTX_ERR (0x48008ull) +#define CPT_AF_CTX_ENC_ID (0x48010ull) +#define CPT_AF_CTX_MIS_PC (0x49400ull) +#define CPT_AF_CTX_HIT_PC (0x49408ull) +#define CPT_AF_CTX_AOP_PC (0x49410ull) +#define CPT_AF_CTX_AOP_LATENCY_PC (0x49418ull) +#define CPT_AF_CTX_IFETCH_PC (0x49420ull) +#define CPT_AF_CTX_IFETCH_LATENCY_PC (0x49428ull) +#define CPT_AF_CTX_FFETCH_PC (0x49430ull) +#define CPT_AF_CTX_FFETCH_LATENCY_PC (0x49438ull) +#define CPT_AF_CTX_WBACK_PC (0x49440ull) +#define CPT_AF_CTX_WBACK_LATENCY_PC (0x49448ull) +#define CPT_AF_CTX_PSH_PC (0x49450ull) +#define CPT_AF_CTX_PSH_LATENCY_PC (0x49458ull) +#define CPT_AF_RXC_TIME (0x50010ull) +#define CPT_AF_RXC_TIME_CFG (0x50018ull) +#define CPT_AF_RXC_DFRG (0x50020ull) +#define CPT_AF_RXC_ACTIVE_STS (0x50028ull) +#define CPT_AF_RXC_ZOMBIE_STS (0x50030ull) +#define CPT_AF_X2PX_LINK_CFG(a) (0x51000ull | (u64)(a) << 3) #define AF_BAR2_ALIASX(a, b) (0x9100000ull | (a) << 12 | (b)) #define CPT_AF_BAR2_SEL 0x9000000 diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 745aa8a19499..457c94793e63 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ - otx2_ptp.o otx2_flows.o cn10k.o + otx2_ptp.o otx2_flows.o otx2_tc.o cn10k.o rvu_nicvf-y := otx2_vf.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index a518c2283f18..45730d0d92f2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -18,6 +18,7 @@ #include <linux/ptp_clock_kernel.h> #include <linux/timecounter.h> #include <linux/soc/marvell/octeontx2/asm.h> +#include <net/pkt_cls.h> #include <mbox.h> #include <npc.h> @@ -264,6 +265,7 @@ struct otx2_flow_config { #define OTX2_MAX_NTUPLE_FLOWS 32 #define OTX2_MAX_UNICAST_FLOWS 8 #define OTX2_MAX_VLAN_FLOWS 1 +#define OTX2_MAX_TC_FLOWS OTX2_MAX_NTUPLE_FLOWS #define OTX2_MCAM_COUNT (OTX2_MAX_NTUPLE_FLOWS + \ OTX2_MAX_UNICAST_FLOWS + \ OTX2_MAX_VLAN_FLOWS) @@ -274,10 +276,20 @@ struct otx2_flow_config { #define OTX2_PER_VF_VLAN_FLOWS 2 /* rx+tx per VF */ #define OTX2_VF_VLAN_RX_INDEX 0 #define OTX2_VF_VLAN_TX_INDEX 1 + u32 tc_flower_offset; u32 ntuple_max_flows; + u32 tc_max_flows; struct list_head flow_list; }; +struct otx2_tc_info { + /* hash table to store TC offloaded flows */ + struct rhashtable flow_table; + struct rhashtable_params flow_ht_params; + DECLARE_BITMAP(tc_entries_bitmap, OTX2_MAX_TC_FLOWS); + unsigned long num_entries; +}; + struct dev_hw_ops { int (*sq_aq_init)(void *dev, u16 qidx, u16 sqb_aura); void (*sqe_flush)(void *dev, struct otx2_snd_queue *sq, @@ -305,6 +317,8 @@ struct otx2_nic { #define OTX2_FLAG_PF_SHUTDOWN BIT_ULL(8) #define OTX2_FLAG_RX_PAUSE_ENABLED BIT_ULL(9) #define OTX2_FLAG_TX_PAUSE_ENABLED BIT_ULL(10) +#define OTX2_FLAG_TC_FLOWER_SUPPORT BIT_ULL(11) +#define OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED BIT_ULL(12) u64 flags; struct otx2_qset qset; @@ -347,6 +361,7 @@ struct otx2_nic { struct hwtstamp_config tstamp; struct otx2_flow_config *flow_cfg; + struct otx2_tc_info tc_info; }; static inline bool is_otx2_lbkvf(struct pci_dev *pdev) @@ -802,4 +817,9 @@ int otx2_add_macfilter(struct net_device *netdev, const u8 *mac); int otx2_enable_rxvlan(struct otx2_nic *pf, bool enable); int otx2_install_rxvlan_offload_flow(struct otx2_nic *pfvf); u16 otx2_get_max_mtu(struct otx2_nic *pfvf); +/* tc support */ +int otx2_init_tc(struct otx2_nic *nic); +void otx2_shutdown_tc(struct otx2_nic *nic); +int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data); #endif /* OTX2_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 0dbbf38e0597..0b4fa92ba821 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -57,10 +57,13 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf) flow_cfg->ntuple_max_flows = rsp->count; flow_cfg->ntuple_offset = 0; pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT; + flow_cfg->tc_max_flows = flow_cfg->ntuple_max_flows; + pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT; } else { flow_cfg->vf_vlan_offset = 0; flow_cfg->ntuple_offset = flow_cfg->vf_vlan_offset + vf_vlan_max_flows; + flow_cfg->tc_flower_offset = flow_cfg->ntuple_offset; flow_cfg->unicast_offset = flow_cfg->ntuple_offset + OTX2_MAX_NTUPLE_FLOWS; flow_cfg->rx_vlan_offset = flow_cfg->unicast_offset + @@ -69,6 +72,7 @@ int otx2_alloc_mcam_entries(struct otx2_nic *pfvf) pfvf->flags |= OTX2_FLAG_UCAST_FLTR_SUPPORT; pfvf->flags |= OTX2_FLAG_RX_VLAN_SUPPORT; pfvf->flags |= OTX2_FLAG_VF_VLAN_SUPPORT; + pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT; } for (i = 0; i < rsp->count; i++) @@ -93,6 +97,7 @@ int otx2_mcam_flow_init(struct otx2_nic *pf) INIT_LIST_HEAD(&pf->flow_cfg->flow_list); pf->flow_cfg->ntuple_max_flows = OTX2_MAX_NTUPLE_FLOWS; + pf->flow_cfg->tc_max_flows = pf->flow_cfg->ntuple_max_flows; err = otx2_alloc_mcam_entries(pf); if (err) @@ -257,17 +262,19 @@ int otx2_get_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc, int otx2_get_all_flows(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc, u32 *rule_locs) { + u32 rule_cnt = nfc->rule_cnt; u32 location = 0; int idx = 0; int err = 0; nfc->data = pfvf->flow_cfg->ntuple_max_flows; - while ((!err || err == -ENOENT) && idx < nfc->rule_cnt) { + while ((!err || err == -ENOENT) && idx < rule_cnt) { err = otx2_get_flow(pfvf, nfc, location); if (!err) rule_locs[idx++] = location; location++; } + nfc->rule_cnt = rule_cnt; return err; } @@ -301,6 +308,35 @@ static int otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp, sizeof(pmask->ip4dst)); req->features |= BIT_ULL(NPC_DIP_IPV4); } + if (ipv4_usr_mask->tos) { + pkt->tos = ipv4_usr_hdr->tos; + pmask->tos = ipv4_usr_mask->tos; + req->features |= BIT_ULL(NPC_TOS); + } + if (ipv4_usr_mask->proto) { + switch (ipv4_usr_hdr->proto) { + case IPPROTO_ICMP: + req->features |= BIT_ULL(NPC_IPPROTO_ICMP); + break; + case IPPROTO_TCP: + req->features |= BIT_ULL(NPC_IPPROTO_TCP); + break; + case IPPROTO_UDP: + req->features |= BIT_ULL(NPC_IPPROTO_UDP); + break; + case IPPROTO_SCTP: + req->features |= BIT_ULL(NPC_IPPROTO_SCTP); + break; + case IPPROTO_AH: + req->features |= BIT_ULL(NPC_IPPROTO_AH); + break; + case IPPROTO_ESP: + req->features |= BIT_ULL(NPC_IPPROTO_ESP); + break; + default: + return -EOPNOTSUPP; + } + } pkt->etype = cpu_to_be16(ETH_P_IP); pmask->etype = cpu_to_be16(0xFFFF); req->features |= BIT_ULL(NPC_ETYPE); @@ -325,6 +361,11 @@ static int otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp, sizeof(pmask->ip4dst)); req->features |= BIT_ULL(NPC_DIP_IPV4); } + if (ipv4_l4_mask->tos) { + pkt->tos = ipv4_l4_hdr->tos; + pmask->tos = ipv4_l4_mask->tos; + req->features |= BIT_ULL(NPC_TOS); + } if (ipv4_l4_mask->psrc) { memcpy(&pkt->sport, &ipv4_l4_hdr->psrc, sizeof(pkt->sport)); @@ -375,10 +416,14 @@ static int otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp, sizeof(pmask->ip4dst)); req->features |= BIT_ULL(NPC_DIP_IPV4); } + if (ah_esp_mask->tos) { + pkt->tos = ah_esp_hdr->tos; + pmask->tos = ah_esp_mask->tos; + req->features |= BIT_ULL(NPC_TOS); + } /* NPC profile doesn't extract AH/ESP header fields */ - if ((ah_esp_mask->spi & ah_esp_hdr->spi) || - (ah_esp_mask->tos & ah_esp_mask->tos)) + if (ah_esp_mask->spi & ah_esp_hdr->spi) return -EOPNOTSUPP; if (flow_type == AH_V4_FLOW) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 53ab1814d74b..03004fdac0c6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1672,6 +1672,7 @@ int otx2_stop(struct net_device *netdev) struct otx2_nic *pf = netdev_priv(netdev); struct otx2_cq_poll *cq_poll = NULL; struct otx2_qset *qset = &pf->qset; + struct otx2_rss_info *rss; int qidx, vec, wrk; netif_carrier_off(netdev); @@ -1684,6 +1685,10 @@ int otx2_stop(struct net_device *netdev) /* First stop packet Rx/Tx */ otx2_rxtx_enable(pf, false); + /* Clear RSS enable flag */ + rss = &pf->hw.rss_info; + rss->enable = false; + /* Cleanup Queue IRQ */ vec = pci_irq_vector(pf->pdev, pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START); @@ -1760,6 +1765,24 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } +static netdev_features_t otx2_fix_features(struct net_device *dev, + netdev_features_t features) +{ + /* check if n-tuple filters are ON */ + if ((features & NETIF_F_HW_TC) && (dev->features & NETIF_F_NTUPLE)) { + netdev_info(dev, "Disabling n-tuple filters\n"); + features &= ~NETIF_F_NTUPLE; + } + + /* check if tc hw offload is ON */ + if ((features & NETIF_F_NTUPLE) && (dev->features & NETIF_F_HW_TC)) { + netdev_info(dev, "Disabling TC hardware offload\n"); + features &= ~NETIF_F_HW_TC; + } + + return features; +} + static void otx2_set_rx_mode(struct net_device *netdev) { struct otx2_nic *pf = netdev_priv(netdev); @@ -1822,6 +1845,12 @@ static int otx2_set_features(struct net_device *netdev, if ((changed & NETIF_F_NTUPLE) && !ntuple) otx2_destroy_ntuple_flows(pf); + if ((netdev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) && + pf->tc_info.num_entries) { + netdev_err(netdev, "Can't disable TC hardware offload while flows are active\n"); + return -EBUSY; + } + return 0; } @@ -2220,6 +2249,7 @@ static const struct net_device_ops otx2_netdev_ops = { .ndo_open = otx2_open, .ndo_stop = otx2_stop, .ndo_start_xmit = otx2_xmit, + .ndo_fix_features = otx2_fix_features, .ndo_set_mac_address = otx2_set_mac_address, .ndo_change_mtu = otx2_change_mtu, .ndo_set_rx_mode = otx2_set_rx_mode, @@ -2230,6 +2260,7 @@ static const struct net_device_ops otx2_netdev_ops = { .ndo_set_vf_mac = otx2_set_vf_mac, .ndo_set_vf_vlan = otx2_set_vf_vlan, .ndo_get_vf_config = otx2_get_vf_config, + .ndo_setup_tc = otx2_setup_tc, }; static int otx2_wq_init(struct otx2_nic *pf) @@ -2449,6 +2480,10 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) NETIF_F_HW_VLAN_STAG_RX; netdev->features |= netdev->hw_features; + /* HW supports tc offload but mutually exclusive with n-tuple filters */ + if (pf->flags & OTX2_FLAG_TC_FLOWER_SUPPORT) + netdev->hw_features |= NETIF_F_HW_TC; + netdev->gso_max_segs = OTX2_MAX_GSO_SEGS; netdev->watchdog_timeo = OTX2_TX_TIMEOUT; @@ -2470,6 +2505,10 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) otx2_set_ethtool_ops(netdev); + err = otx2_init_tc(pf); + if (err) + goto err_mcam_flow_del; + /* Enable link notifications */ otx2_cgx_config_linkevents(pf, true); @@ -2479,6 +2518,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; +err_mcam_flow_del: + otx2_mcam_flow_del(pf); err_unreg_netdev: unregister_netdev(netdev); err_del_mcam_entries: @@ -2646,6 +2687,7 @@ static void otx2_remove(struct pci_dev *pdev) otx2_ptp_destroy(pf); otx2_mcam_flow_del(pf); + otx2_shutdown_tc(pf); otx2_detach_resources(&pf->mbox); if (pf->hw.lmt_base) iounmap(pf->hw.lmt_base); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h index 21b811c6ee0f..f4fd72ee9a25 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h @@ -152,6 +152,7 @@ #define NIX_AF_TL3X_SCHEDULE(a) (0x1000 | (a) << 16) #define NIX_AF_TL4X_PARENT(a) (0x1288 | (a) << 16) #define NIX_AF_TL4X_SCHEDULE(a) (0x1200 | (a) << 16) +#define NIX_AF_TL4X_PIR(a) (0x1230 | (a) << 16) #define NIX_AF_MDQX_SCHEDULE(a) (0x1400 | (a) << 16) #define NIX_AF_MDQX_PARENT(a) (0x1480 | (a) << 16) #define NIX_AF_TL3_TL2X_LINKX_CFG(a, b) (0x1700 | (a) << 16 | (b) << 3) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c new file mode 100644 index 000000000000..51157b283f6f --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -0,0 +1,787 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Physcial Function ethernet driver + * + * Copyright (C) 2021 Marvell. + */ +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/inetdevice.h> +#include <linux/rhashtable.h> +#include <linux/bitfield.h> +#include <net/flow_dissector.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_vlan.h> +#include <net/ipv6.h> + +#include "otx2_common.h" + +/* Egress rate limiting definitions */ +#define MAX_BURST_EXPONENT 0x0FULL +#define MAX_BURST_MANTISSA 0xFFULL +#define MAX_BURST_SIZE 130816ULL +#define MAX_RATE_DIVIDER_EXPONENT 12ULL +#define MAX_RATE_EXPONENT 0x0FULL +#define MAX_RATE_MANTISSA 0xFFULL + +/* Bitfields in NIX_TLX_PIR register */ +#define TLX_RATE_MANTISSA GENMASK_ULL(8, 1) +#define TLX_RATE_EXPONENT GENMASK_ULL(12, 9) +#define TLX_RATE_DIVIDER_EXPONENT GENMASK_ULL(16, 13) +#define TLX_BURST_MANTISSA GENMASK_ULL(36, 29) +#define TLX_BURST_EXPONENT GENMASK_ULL(40, 37) + +struct otx2_tc_flow_stats { + u64 bytes; + u64 pkts; + u64 used; +}; + +struct otx2_tc_flow { + struct rhash_head node; + unsigned long cookie; + u16 entry; + unsigned int bitpos; + struct rcu_head rcu; + struct otx2_tc_flow_stats stats; + spinlock_t lock; /* lock for stats */ +}; + +static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp, + u32 *burst_mantissa) +{ + unsigned int tmp; + + /* Burst is calculated as + * ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256 + * Max supported burst size is 130,816 bytes. + */ + burst = min_t(u32, burst, MAX_BURST_SIZE); + if (burst) { + *burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0; + tmp = burst - rounddown_pow_of_two(burst); + if (burst < MAX_BURST_MANTISSA) + *burst_mantissa = tmp * 2; + else + *burst_mantissa = tmp / (1ULL << (*burst_exp - 7)); + } else { + *burst_exp = MAX_BURST_EXPONENT; + *burst_mantissa = MAX_BURST_MANTISSA; + } +} + +static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp, + u32 *mantissa, u32 *div_exp) +{ + unsigned int tmp; + + /* Rate calculation by hardware + * + * PIR_ADD = ((256 + mantissa) << exp) / 256 + * rate = (2 * PIR_ADD) / ( 1 << div_exp) + * The resultant rate is in Mbps. + */ + + /* 2Mbps to 100Gbps can be expressed with div_exp = 0. + * Setting this to '0' will ease the calculation of + * exponent and mantissa. + */ + *div_exp = 0; + + if (maxrate) { + *exp = ilog2(maxrate) ? ilog2(maxrate) - 1 : 0; + tmp = maxrate - rounddown_pow_of_two(maxrate); + if (maxrate < MAX_RATE_MANTISSA) + *mantissa = tmp * 2; + else + *mantissa = tmp / (1ULL << (*exp - 7)); + } else { + /* Instead of disabling rate limiting, set all values to max */ + *exp = MAX_RATE_EXPONENT; + *mantissa = MAX_RATE_MANTISSA; + } +} + +static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate) +{ + struct otx2_hw *hw = &nic->hw; + struct nix_txschq_config *req; + u32 burst_exp, burst_mantissa; + u32 exp, mantissa, div_exp; + int txschq, err; + + /* All SQs share the same TL4, so pick the first scheduler */ + txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0]; + + /* Get exponent and mantissa values from the desired rate */ + otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa); + otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp); + + mutex_lock(&nic->mbox.lock); + req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox); + if (!req) { + mutex_unlock(&nic->mbox.lock); + return -ENOMEM; + } + + req->lvl = NIX_TXSCH_LVL_TL4; + req->num_regs = 1; + req->reg[0] = NIX_AF_TL4X_PIR(txschq); + req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) | + FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) | + FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) | + FIELD_PREP(TLX_RATE_EXPONENT, exp) | + FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0); + + err = otx2_sync_mbox_msg(&nic->mbox); + mutex_unlock(&nic->mbox.lock); + return err; +} + +static int otx2_tc_validate_flow(struct otx2_nic *nic, + struct flow_action *actions, + struct netlink_ext_ack *extack) +{ + if (nic->flags & OTX2_FLAG_INTF_DOWN) { + NL_SET_ERR_MSG_MOD(extack, "Interface not initialized"); + return -EINVAL; + } + + if (!flow_action_has_entries(actions)) { + NL_SET_ERR_MSG_MOD(extack, "MATCHALL offload called with no action"); + return -EINVAL; + } + + if (!flow_offload_has_one_action(actions)) { + NL_SET_ERR_MSG_MOD(extack, + "Egress MATCHALL offload supports only 1 policing action"); + return -EINVAL; + } + return 0; +} + +static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, + struct tc_cls_matchall_offload *cls) +{ + struct netlink_ext_ack *extack = cls->common.extack; + struct flow_action *actions = &cls->rule->action; + struct flow_action_entry *entry; + u32 rate; + int err; + + err = otx2_tc_validate_flow(nic, actions, extack); + if (err) + return err; + + if (nic->flags & OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED) { + NL_SET_ERR_MSG_MOD(extack, + "Only one Egress MATCHALL ratelimiter can be offloaded"); + return -ENOMEM; + } + + entry = &cls->rule->action.entries[0]; + switch (entry->id) { + case FLOW_ACTION_POLICE: + if (entry->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + /* Convert bytes per second to Mbps */ + rate = entry->police.rate_bytes_ps * 8; + rate = max_t(u32, rate / 1000000, 1); + err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate); + if (err) + return err; + nic->flags |= OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED; + break; + default: + NL_SET_ERR_MSG_MOD(extack, + "Only police action is supported with Egress MATCHALL offload"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int otx2_tc_egress_matchall_delete(struct otx2_nic *nic, + struct tc_cls_matchall_offload *cls) +{ + struct netlink_ext_ack *extack = cls->common.extack; + int err; + + if (nic->flags & OTX2_FLAG_INTF_DOWN) { + NL_SET_ERR_MSG_MOD(extack, "Interface not initialized"); + return -EINVAL; + } + + err = otx2_set_matchall_egress_rate(nic, 0, 0); + nic->flags &= ~OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED; + return err; +} + +static int otx2_tc_parse_actions(struct otx2_nic *nic, + struct flow_action *flow_action, + struct npc_install_flow_req *req) +{ + struct flow_action_entry *act; + struct net_device *target; + struct otx2_nic *priv; + int i; + + if (!flow_action_has_entries(flow_action)) { + netdev_info(nic->netdev, "no tc actions specified"); + return -EINVAL; + } + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_DROP: + req->op = NIX_RX_ACTIONOP_DROP; + return 0; + case FLOW_ACTION_ACCEPT: + req->op = NIX_RX_ACTION_DEFAULT; + return 0; + case FLOW_ACTION_REDIRECT_INGRESS: + target = act->dev; + priv = netdev_priv(target); + /* npc_install_flow_req doesn't support passing a target pcifunc */ + if (rvu_get_pf(nic->pcifunc) != rvu_get_pf(priv->pcifunc)) { + netdev_info(nic->netdev, + "can't redirect to other pf/vf\n"); + return -EOPNOTSUPP; + } + req->vf = priv->pcifunc & RVU_PFVF_FUNC_MASK; + req->op = NIX_RX_ACTION_DEFAULT; + return 0; + case FLOW_ACTION_VLAN_POP: + req->vtag0_valid = true; + /* use RX_VTAG_TYPE7 which is initialized to strip vlan tag */ + req->vtag0_type = NIX_AF_LFX_RX_VTAG_TYPE7; + break; + default: + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int otx2_tc_prepare_flow(struct otx2_nic *nic, + struct flow_cls_offload *f, + struct npc_install_flow_req *req) +{ + struct flow_msg *flow_spec = &req->packet; + struct flow_msg *flow_mask = &req->mask; + struct flow_dissector *dissector; + struct flow_rule *rule; + u8 ip_proto = 0; + + rule = flow_cls_offload_flow_rule(f); + dissector = rule->match.dissector; + + if ((dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_IP)))) { + netdev_info(nic->netdev, "unsupported flow used key 0x%x", + dissector->used_keys); + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + + /* All EtherTypes can be matched, no hw limitation */ + flow_spec->etype = match.key->n_proto; + flow_mask->etype = match.mask->n_proto; + req->features |= BIT_ULL(NPC_ETYPE); + + if (match.mask->ip_proto && + (match.key->ip_proto != IPPROTO_TCP && + match.key->ip_proto != IPPROTO_UDP && + match.key->ip_proto != IPPROTO_SCTP && + match.key->ip_proto != IPPROTO_ICMP && + match.key->ip_proto != IPPROTO_ICMPV6)) { + netdev_info(nic->netdev, + "ip_proto=0x%x not supported\n", + match.key->ip_proto); + return -EOPNOTSUPP; + } + if (match.mask->ip_proto) + ip_proto = match.key->ip_proto; + + if (ip_proto == IPPROTO_UDP) + req->features |= BIT_ULL(NPC_IPPROTO_UDP); + else if (ip_proto == IPPROTO_TCP) + req->features |= BIT_ULL(NPC_IPPROTO_TCP); + else if (ip_proto == IPPROTO_SCTP) + req->features |= BIT_ULL(NPC_IPPROTO_SCTP); + else if (ip_proto == IPPROTO_ICMP) + req->features |= BIT_ULL(NPC_IPPROTO_ICMP); + else if (ip_proto == IPPROTO_ICMPV6) + req->features |= BIT_ULL(NPC_IPPROTO_ICMP6); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); + if (!is_zero_ether_addr(match.mask->src)) { + netdev_err(nic->netdev, "src mac match not supported\n"); + return -EOPNOTSUPP; + } + + if (!is_zero_ether_addr(match.mask->dst)) { + ether_addr_copy(flow_spec->dmac, (u8 *)&match.key->dst); + ether_addr_copy(flow_mask->dmac, + (u8 *)&match.mask->dst); + req->features |= BIT_ULL(NPC_DMAC); + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match; + + flow_rule_match_ip(rule, &match); + if ((ntohs(flow_spec->etype) != ETH_P_IP) && + match.mask->tos) { + netdev_err(nic->netdev, "tos not supported\n"); + return -EOPNOTSUPP; + } + if (match.mask->ttl) { + netdev_err(nic->netdev, "ttl not supported\n"); + return -EOPNOTSUPP; + } + flow_spec->tos = match.key->tos; + flow_mask->tos = match.mask->tos; + req->features |= BIT_ULL(NPC_TOS); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + u16 vlan_tci, vlan_tci_mask; + + flow_rule_match_vlan(rule, &match); + + if (ntohs(match.key->vlan_tpid) != ETH_P_8021Q) { + netdev_err(nic->netdev, "vlan tpid 0x%x not supported\n", + ntohs(match.key->vlan_tpid)); + return -EOPNOTSUPP; + } + + if (match.mask->vlan_id || + match.mask->vlan_dei || + match.mask->vlan_priority) { + vlan_tci = match.key->vlan_id | + match.key->vlan_dei << 12 | + match.key->vlan_priority << 13; + + vlan_tci_mask = match.mask->vlan_id | + match.key->vlan_dei << 12 | + match.key->vlan_priority << 13; + + flow_spec->vlan_tci = htons(vlan_tci); + flow_mask->vlan_tci = htons(vlan_tci_mask); + req->features |= BIT_ULL(NPC_OUTER_VID); + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + + flow_spec->ip4dst = match.key->dst; + flow_mask->ip4dst = match.mask->dst; + req->features |= BIT_ULL(NPC_DIP_IPV4); + + flow_spec->ip4src = match.key->src; + flow_mask->ip4src = match.mask->src; + req->features |= BIT_ULL(NPC_SIP_IPV4); + } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + + if (ipv6_addr_loopback(&match.key->dst) || + ipv6_addr_loopback(&match.key->src)) { + netdev_err(nic->netdev, + "Flow matching on IPv6 loopback addr is not supported\n"); + return -EOPNOTSUPP; + } + + if (!ipv6_addr_any(&match.mask->dst)) { + memcpy(&flow_spec->ip6dst, + (struct in6_addr *)&match.key->dst, + sizeof(flow_spec->ip6dst)); + memcpy(&flow_mask->ip6dst, + (struct in6_addr *)&match.mask->dst, + sizeof(flow_spec->ip6dst)); + req->features |= BIT_ULL(NPC_DIP_IPV6); + } + + if (!ipv6_addr_any(&match.mask->src)) { + memcpy(&flow_spec->ip6src, + (struct in6_addr *)&match.key->src, + sizeof(flow_spec->ip6src)); + memcpy(&flow_mask->ip6src, + (struct in6_addr *)&match.mask->src, + sizeof(flow_spec->ip6src)); + req->features |= BIT_ULL(NPC_SIP_IPV6); + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + + flow_spec->dport = match.key->dst; + flow_mask->dport = match.mask->dst; + if (ip_proto == IPPROTO_UDP) + req->features |= BIT_ULL(NPC_DPORT_UDP); + else if (ip_proto == IPPROTO_TCP) + req->features |= BIT_ULL(NPC_DPORT_TCP); + else if (ip_proto == IPPROTO_SCTP) + req->features |= BIT_ULL(NPC_DPORT_SCTP); + + flow_spec->sport = match.key->src; + flow_mask->sport = match.mask->src; + if (ip_proto == IPPROTO_UDP) + req->features |= BIT_ULL(NPC_SPORT_UDP); + else if (ip_proto == IPPROTO_TCP) + req->features |= BIT_ULL(NPC_SPORT_TCP); + else if (ip_proto == IPPROTO_SCTP) + req->features |= BIT_ULL(NPC_SPORT_SCTP); + } + + return otx2_tc_parse_actions(nic, &rule->action, req); +} + +static int otx2_del_mcam_flow_entry(struct otx2_nic *nic, u16 entry) +{ + struct npc_delete_flow_req *req; + int err; + + mutex_lock(&nic->mbox.lock); + req = otx2_mbox_alloc_msg_npc_delete_flow(&nic->mbox); + if (!req) { + mutex_unlock(&nic->mbox.lock); + return -ENOMEM; + } + + req->entry = entry; + + /* Send message to AF */ + err = otx2_sync_mbox_msg(&nic->mbox); + if (err) { + netdev_err(nic->netdev, "Failed to delete MCAM flow entry %d\n", + entry); + mutex_unlock(&nic->mbox.lock); + return -EFAULT; + } + mutex_unlock(&nic->mbox.lock); + + return 0; +} + +static int otx2_tc_del_flow(struct otx2_nic *nic, + struct flow_cls_offload *tc_flow_cmd) +{ + struct otx2_tc_info *tc_info = &nic->tc_info; + struct otx2_tc_flow *flow_node; + + flow_node = rhashtable_lookup_fast(&tc_info->flow_table, + &tc_flow_cmd->cookie, + tc_info->flow_ht_params); + if (!flow_node) { + netdev_err(nic->netdev, "tc flow not found for cookie 0x%lx\n", + tc_flow_cmd->cookie); + return -EINVAL; + } + + otx2_del_mcam_flow_entry(nic, flow_node->entry); + + WARN_ON(rhashtable_remove_fast(&nic->tc_info.flow_table, + &flow_node->node, + nic->tc_info.flow_ht_params)); + kfree_rcu(flow_node, rcu); + + clear_bit(flow_node->bitpos, tc_info->tc_entries_bitmap); + tc_info->num_entries--; + + return 0; +} + +static int otx2_tc_add_flow(struct otx2_nic *nic, + struct flow_cls_offload *tc_flow_cmd) +{ + struct otx2_tc_info *tc_info = &nic->tc_info; + struct otx2_tc_flow *new_node, *old_node; + struct npc_install_flow_req *req; + int rc; + + if (!(nic->flags & OTX2_FLAG_TC_FLOWER_SUPPORT)) + return -ENOMEM; + + /* allocate memory for the new flow and it's node */ + new_node = kzalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; + spin_lock_init(&new_node->lock); + new_node->cookie = tc_flow_cmd->cookie; + + mutex_lock(&nic->mbox.lock); + req = otx2_mbox_alloc_msg_npc_install_flow(&nic->mbox); + if (!req) { + mutex_unlock(&nic->mbox.lock); + return -ENOMEM; + } + + rc = otx2_tc_prepare_flow(nic, tc_flow_cmd, req); + if (rc) { + otx2_mbox_reset(&nic->mbox.mbox, 0); + mutex_unlock(&nic->mbox.lock); + return rc; + } + + /* If a flow exists with the same cookie, delete it */ + old_node = rhashtable_lookup_fast(&tc_info->flow_table, + &tc_flow_cmd->cookie, + tc_info->flow_ht_params); + if (old_node) + otx2_tc_del_flow(nic, tc_flow_cmd); + + if (bitmap_full(tc_info->tc_entries_bitmap, nic->flow_cfg->tc_max_flows)) { + netdev_err(nic->netdev, "Not enough MCAM space to add the flow\n"); + otx2_mbox_reset(&nic->mbox.mbox, 0); + mutex_unlock(&nic->mbox.lock); + return -ENOMEM; + } + + new_node->bitpos = find_first_zero_bit(tc_info->tc_entries_bitmap, + nic->flow_cfg->tc_max_flows); + req->channel = nic->hw.rx_chan_base; + req->entry = nic->flow_cfg->entry[nic->flow_cfg->tc_flower_offset + + nic->flow_cfg->tc_max_flows - new_node->bitpos]; + req->intf = NIX_INTF_RX; + req->set_cntr = 1; + new_node->entry = req->entry; + + /* Send message to AF */ + rc = otx2_sync_mbox_msg(&nic->mbox); + if (rc) { + netdev_err(nic->netdev, "Failed to install MCAM flow entry\n"); + mutex_unlock(&nic->mbox.lock); + goto out; + } + mutex_unlock(&nic->mbox.lock); + + /* add new flow to flow-table */ + rc = rhashtable_insert_fast(&nic->tc_info.flow_table, &new_node->node, + nic->tc_info.flow_ht_params); + if (rc) { + otx2_del_mcam_flow_entry(nic, req->entry); + kfree_rcu(new_node, rcu); + goto out; + } + + set_bit(new_node->bitpos, tc_info->tc_entries_bitmap); + tc_info->num_entries++; +out: + return rc; +} + +static int otx2_tc_get_flow_stats(struct otx2_nic *nic, + struct flow_cls_offload *tc_flow_cmd) +{ + struct otx2_tc_info *tc_info = &nic->tc_info; + struct npc_mcam_get_stats_req *req; + struct npc_mcam_get_stats_rsp *rsp; + struct otx2_tc_flow_stats *stats; + struct otx2_tc_flow *flow_node; + int err; + + flow_node = rhashtable_lookup_fast(&tc_info->flow_table, + &tc_flow_cmd->cookie, + tc_info->flow_ht_params); + if (!flow_node) { + netdev_info(nic->netdev, "tc flow not found for cookie %lx", + tc_flow_cmd->cookie); + return -EINVAL; + } + + mutex_lock(&nic->mbox.lock); + + req = otx2_mbox_alloc_msg_npc_mcam_entry_stats(&nic->mbox); + if (!req) { + mutex_unlock(&nic->mbox.lock); + return -ENOMEM; + } + + req->entry = flow_node->entry; + + err = otx2_sync_mbox_msg(&nic->mbox); + if (err) { + netdev_err(nic->netdev, "Failed to get stats for MCAM flow entry %d\n", + req->entry); + mutex_unlock(&nic->mbox.lock); + return -EFAULT; + } + + rsp = (struct npc_mcam_get_stats_rsp *)otx2_mbox_get_rsp + (&nic->mbox.mbox, 0, &req->hdr); + if (IS_ERR(rsp)) { + mutex_unlock(&nic->mbox.lock); + return PTR_ERR(rsp); + } + + mutex_unlock(&nic->mbox.lock); + + if (!rsp->stat_ena) + return -EINVAL; + + stats = &flow_node->stats; + + spin_lock(&flow_node->lock); + flow_stats_update(&tc_flow_cmd->stats, 0x0, rsp->stat - stats->pkts, 0x0, 0x0, + FLOW_ACTION_HW_STATS_IMMEDIATE); + stats->pkts = rsp->stat; + spin_unlock(&flow_node->lock); + + return 0; +} + +static int otx2_setup_tc_cls_flower(struct otx2_nic *nic, + struct flow_cls_offload *cls_flower) +{ + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return otx2_tc_add_flow(nic, cls_flower); + case FLOW_CLS_DESTROY: + return otx2_tc_del_flow(nic, cls_flower); + case FLOW_CLS_STATS: + return otx2_tc_get_flow_stats(nic, cls_flower); + default: + return -EOPNOTSUPP; + } +} + +static int otx2_setup_tc_block_ingress_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct otx2_nic *nic = cb_priv; + + if (!tc_cls_can_offload_and_chain0(nic->netdev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return otx2_setup_tc_cls_flower(nic, type_data); + default: + break; + } + + return -EOPNOTSUPP; +} + +static int otx2_setup_tc_egress_matchall(struct otx2_nic *nic, + struct tc_cls_matchall_offload *cls_matchall) +{ + switch (cls_matchall->command) { + case TC_CLSMATCHALL_REPLACE: + return otx2_tc_egress_matchall_install(nic, cls_matchall); + case TC_CLSMATCHALL_DESTROY: + return otx2_tc_egress_matchall_delete(nic, cls_matchall); + case TC_CLSMATCHALL_STATS: + default: + break; + } + + return -EOPNOTSUPP; +} + +static int otx2_setup_tc_block_egress_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct otx2_nic *nic = cb_priv; + + if (!tc_cls_can_offload_and_chain0(nic->netdev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSMATCHALL: + return otx2_setup_tc_egress_matchall(nic, type_data); + default: + break; + } + + return -EOPNOTSUPP; +} + +static LIST_HEAD(otx2_block_cb_list); + +static int otx2_setup_tc_block(struct net_device *netdev, + struct flow_block_offload *f) +{ + struct otx2_nic *nic = netdev_priv(netdev); + flow_setup_cb_t *cb; + bool ingress; + + if (f->block_shared) + return -EOPNOTSUPP; + + if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) { + cb = otx2_setup_tc_block_ingress_cb; + ingress = true; + } else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) { + cb = otx2_setup_tc_block_egress_cb; + ingress = false; + } else { + return -EOPNOTSUPP; + } + + return flow_block_cb_setup_simple(f, &otx2_block_cb_list, cb, + nic, nic, ingress); +} + +int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return otx2_setup_tc_block(netdev, type_data); + default: + return -EOPNOTSUPP; + } +} + +static const struct rhashtable_params tc_flow_ht_params = { + .head_offset = offsetof(struct otx2_tc_flow, node), + .key_offset = offsetof(struct otx2_tc_flow, cookie), + .key_len = sizeof(((struct otx2_tc_flow *)0)->cookie), + .automatic_shrinking = true, +}; + +int otx2_init_tc(struct otx2_nic *nic) +{ + struct otx2_tc_info *tc = &nic->tc_info; + + tc->flow_ht_params = tc_flow_ht_params; + return rhashtable_init(&tc->flow_table, &tc->flow_ht_params); +} + +void otx2_shutdown_tc(struct otx2_nic *nic) +{ + struct otx2_tc_info *tc = &nic->tc_info; + + rhashtable_destroy(&tc->flow_table); +} diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 25dd903a3e92..2768c78528a5 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -431,7 +431,8 @@ static void prestera_port_handle_event(struct prestera_switch *sw, netif_carrier_on(port->dev); if (!delayed_work_pending(caching_dw)) queue_delayed_work(prestera_wq, caching_dw, 0); - } else { + } else if (netif_running(port->dev) && + netif_carrier_ok(port->dev)) { netif_carrier_off(port->dev); if (delayed_work_pending(caching_dw)) cancel_delayed_work(caching_dw); @@ -456,20 +457,17 @@ static int prestera_switch_set_base_mac_addr(struct prestera_switch *sw) { struct device_node *base_mac_np; struct device_node *np; - const char *base_mac; + int ret; np = of_find_compatible_node(NULL, NULL, "marvell,prestera"); base_mac_np = of_parse_phandle(np, "base-mac-provider", 0); - base_mac = of_get_mac_address(base_mac_np); - of_node_put(base_mac_np); - if (!IS_ERR(base_mac)) - ether_addr_copy(sw->base_mac, base_mac); - - if (!is_valid_ether_addr(sw->base_mac)) { + ret = of_get_mac_address(base_mac_np, sw->base_mac); + if (ret) { eth_random_addr(sw->base_mac); dev_info(prestera_dev(sw), "using random base mac address\n"); } + of_node_put(base_mac_np); return prestera_hw_switch_mac_set(sw, sw->base_mac); } diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c index be5677623455..298110119272 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c @@ -756,6 +756,7 @@ static void prestera_pci_remove(struct pci_dev *pdev) static const struct pci_device_id prestera_pci_devices[] = { { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0xC804) }, + { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, 0xC80C) }, { } }; MODULE_DEVICE_TABLE(pci, prestera_pci_devices); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index 49e052273f30..cb564890a3dc 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -798,7 +798,7 @@ static void prestera_fdb_event_work(struct work_struct *work) switch (swdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: fdb_info = &swdev_work->fdb_info; - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; err = prestera_port_fdb_set(port, fdb_info, true); diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index d1e4d42e497d..e967867828d8 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1392,7 +1392,6 @@ static int pxa168_eth_probe(struct platform_device *pdev) struct resource *res; struct clk *clk; struct device_node *np; - const unsigned char *mac_addr = NULL; int err; printk(KERN_NOTICE "PXA168 10/100 Ethernet Driver\n"); @@ -1435,12 +1434,8 @@ static int pxa168_eth_probe(struct platform_device *pdev) INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task); - if (pdev->dev.of_node) - mac_addr = of_get_mac_address(pdev->dev.of_node); - - if (!IS_ERR_OR_NULL(mac_addr)) { - ether_addr_copy(dev->dev_addr, mac_addr); - } else { + err = of_get_mac_address(pdev->dev.of_node, dev->dev_addr); + if (err) { /* try reading the mac address, if set by the bootloader */ pxa168_eth_get_mac_address(dev, dev->dev_addr); if (!is_valid_ether_addr(dev->dev_addr)) { @@ -1544,8 +1539,8 @@ static int pxa168_eth_remove(struct platform_device *pdev) clk_disable_unprepare(pep->clk); mdiobus_unregister(pep->smi_bus); mdiobus_free(pep->smi_bus); - unregister_netdev(dev); cancel_work_sync(&pep->tx_timeout_task); + unregister_netdev(dev); free_netdev(dev); return 0; } diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 8a9c0f490bfb..d4bb27ba1419 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -1617,7 +1617,7 @@ static void genesis_mac_init(struct skge_hw *hw, int port) xm_write16(hw, port, XM_TX_THR, 512); /* - * Enable the reception of all error frames. This is is + * Enable the reception of all error frames. This is * a necessary evil due to the design of the XMAC. The * XMAC's receive FIFO is only 8K in size, however jumbo * frames can be up to 9000 bytes in length. When bad @@ -2959,8 +2959,9 @@ static void genesis_set_multicast(struct net_device *dev) static void yukon_add_filter(u8 filter[8], const u8 *addr) { - u32 bit = ether_crc(ETH_ALEN, addr) & 0x3f; - filter[bit/8] |= 1 << (bit%8); + u32 bit = ether_crc(ETH_ALEN, addr) & 0x3f; + + filter[bit / 8] |= 1 << (bit % 8); } static void yukon_set_multicast(struct net_device *dev) @@ -3849,7 +3850,7 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port, /* Only used for Genesis XMAC */ if (is_genesis(hw)) - timer_setup(&skge->link_timer, xm_link_timer, 0); + timer_setup(&skge->link_timer, xm_link_timer, 0); else { dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_RXCSUM; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index dbec8e187a68..222c32367b2c 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -55,7 +55,8 @@ #define RX_DEF_PENDING RX_MAX_PENDING /* This is the worst case number of transmit list elements for a single skb: - VLAN:GSO + CKSUM + Data + skb_frags * DMA */ + * VLAN:GSO + CKSUM + Data + skb_frags * DMA + */ #define MAX_SKB_TX_LE (2 + (sizeof(dma_addr_t)/sizeof(u32))*(MAX_SKB_FRAGS+1)) #define TX_MIN_PENDING (MAX_SKB_TX_LE+1) #define TX_MAX_PENDING 1024 @@ -1529,7 +1530,8 @@ static void sky2_rx_start(struct sky2_port *sky2) sky2_write32(hw, Q_ADDR(rxq, Q_WM), BMU_WM_PEX); /* These chips have no ram buffer? - * MAC Rx RAM Read is controlled by hardware */ + * MAC Rx RAM Read is controlled by hardware + */ if (hw->chip_id == CHIP_ID_YUKON_EC_U && hw->chip_rev > CHIP_REV_YU_EC_U_A0) sky2_write32(hw, Q_ADDR(rxq, Q_TEST), F_M_RX_RAM_DIS); @@ -4135,7 +4137,7 @@ static int sky2_set_coalesce(struct net_device *dev, /* * Hardware is limited to min of 128 and max of 2048 for ring size * and rounded up to next power of two - * to avoid division in modulus calclation + * to avoid division in modulus calculation */ static unsigned long roundup_ring_size(unsigned long pending) { @@ -4684,7 +4686,8 @@ static __exit void sky2_debug_cleanup(void) #endif /* Two copies of network device operations to handle special case of - not allowing netpoll on second port */ + * not allowing netpoll on second port + */ static const struct net_device_ops sky2_netdev_ops[2] = { { .ndo_open = sky2_open, @@ -4725,7 +4728,7 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, { struct sky2_port *sky2; struct net_device *dev = alloc_etherdev(sizeof(*sky2)); - const void *iap; + int ret; if (!dev) return NULL; @@ -4795,10 +4798,8 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port, * 1) from device tree data * 2) from internal registers set by bootloader */ - iap = of_get_mac_address(hw->pdev->dev.of_node); - if (!IS_ERR(iap)) - ether_addr_copy(dev->dev_addr, iap); - else + ret = of_get_mac_address(hw->pdev->dev.of_node, dev->dev_addr); + if (ret) memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port * 8, ETH_ALEN); diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig index 3362b148de23..c357c193378e 100644 --- a/drivers/net/ethernet/mediatek/Kconfig +++ b/drivers/net/ethernet/mediatek/Kconfig @@ -9,7 +9,9 @@ if NET_VENDOR_MEDIATEK config NET_MEDIATEK_SOC tristate "MediaTek SoC Gigabit Ethernet support" + depends on NET_DSA || !NET_DSA select PHYLINK + select DIMLIB help This driver supports the gigabit ethernet MACs in the MediaTek SoC family. diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile index 3a777b4a6cd3..79d4cdbbcbf5 100644 --- a/drivers/net/ethernet/mediatek/Makefile +++ b/drivers/net/ethernet/mediatek/Makefile @@ -4,5 +4,5 @@ # obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o -mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o +mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 01d3ee4b5829..ed4eacef17ce 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -19,6 +19,8 @@ #include <linux/interrupt.h> #include <linux/pinctrl/devinfo.h> #include <linux/phylink.h> +#include <linux/jhash.h> +#include <net/dsa.h> #include "mtk_eth_soc.h" @@ -85,7 +87,7 @@ static int mtk_mdio_busy_wait(struct mtk_eth *eth) return 0; if (time_after(jiffies, t_start + PHY_IAC_TIMEOUT)) break; - usleep_range(10, 20); + cond_resched(); } dev_err(eth->dev, "mdio: MDIO timeout\n"); @@ -776,13 +778,18 @@ static inline int mtk_max_buf_size(int frag_size) return buf_size; } -static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd, +static inline bool mtk_rx_get_desc(struct mtk_rx_dma *rxd, struct mtk_rx_dma *dma_rxd) { - rxd->rxd1 = READ_ONCE(dma_rxd->rxd1); rxd->rxd2 = READ_ONCE(dma_rxd->rxd2); + if (!(rxd->rxd2 & RX_DMA_DONE)) + return false; + + rxd->rxd1 = READ_ONCE(dma_rxd->rxd1); rxd->rxd3 = READ_ONCE(dma_rxd->rxd3); rxd->rxd4 = READ_ONCE(dma_rxd->rxd4); + + return true; } /* the qdma core needs scratch memory to be setup */ @@ -857,7 +864,8 @@ static int txd_to_idx(struct mtk_tx_ring *ring, struct mtk_tx_dma *dma) return ((void *)dma - (void *)ring->dma) / sizeof(*dma); } -static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf) +static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf, + bool napi) { if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) { if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) { @@ -889,8 +897,12 @@ static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf) tx_buf->flags = 0; if (tx_buf->skb && - (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC)) - dev_kfree_skb_any(tx_buf->skb); + (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC)) { + if (napi) + napi_consume_skb(tx_buf->skb, napi); + else + dev_kfree_skb_any(tx_buf->skb); + } tx_buf->skb = NULL; } @@ -1068,7 +1080,7 @@ err_dma: tx_buf = mtk_desc_to_tx_buf(ring, itxd); /* unmap dma */ - mtk_tx_unmap(eth, tx_buf); + mtk_tx_unmap(eth, tx_buf, false); itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU; if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) @@ -1125,17 +1137,6 @@ static void mtk_wake_queue(struct mtk_eth *eth) } } -static void mtk_stop_queue(struct mtk_eth *eth) -{ - int i; - - for (i = 0; i < MTK_MAC_COUNT; i++) { - if (!eth->netdev[i]) - continue; - netif_stop_queue(eth->netdev[i]); - } -} - static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); @@ -1156,7 +1157,7 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_num = mtk_cal_txd_req(skb); if (unlikely(atomic_read(&ring->free_count) <= tx_num)) { - mtk_stop_queue(eth); + netif_stop_queue(dev); netif_err(eth, tx_queued, dev, "Tx Ring full when queue awake!\n"); spin_unlock(ð->page_lock); @@ -1182,7 +1183,7 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) goto drop; if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) - mtk_stop_queue(eth); + netif_stop_queue(dev); spin_unlock(ð->page_lock); @@ -1238,17 +1239,19 @@ static void mtk_update_rx_cpu_idx(struct mtk_eth *eth) static int mtk_poll_rx(struct napi_struct *napi, int budget, struct mtk_eth *eth) { + struct dim_sample dim_sample = {}; struct mtk_rx_ring *ring; int idx; struct sk_buff *skb; u8 *data, *new_data; struct mtk_rx_dma *rxd, trxd; - int done = 0; + int done = 0, bytes = 0; while (done < budget) { struct net_device *netdev; unsigned int pktlen; dma_addr_t dma_addr; + u32 hash; int mac; ring = mtk_get_rx_ring(eth); @@ -1259,18 +1262,16 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, rxd = &ring->dma[idx]; data = ring->data[idx]; - mtk_rx_get_desc(&trxd, rxd); - if (!(trxd.rxd2 & RX_DMA_DONE)) + if (!mtk_rx_get_desc(&trxd, rxd)) break; /* find out which mac the packet come from. values start at 1 */ - if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) { + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) || + (trxd.rxd4 & RX_DMA_SPECIAL_TAG)) mac = 0; - } else { - mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) & - RX_DMA_FPORT_MASK; - mac--; - } + else + mac = ((trxd.rxd4 >> RX_DMA_FPORT_SHIFT) & + RX_DMA_FPORT_MASK) - 1; if (unlikely(mac < 0 || mac >= MTK_MAC_COUNT || !eth->netdev[mac])) @@ -1298,17 +1299,18 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, goto release_desc; } + dma_unmap_single(eth->dev, trxd.rxd1, + ring->buf_size, DMA_FROM_DEVICE); + /* receive data */ skb = build_skb(data, ring->frag_size); if (unlikely(!skb)) { - skb_free_frag(new_data); + skb_free_frag(data); netdev->stats.rx_dropped++; - goto release_desc; + goto skip_rx; } skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - dma_unmap_single(eth->dev, trxd.rxd1, - ring->buf_size, DMA_FROM_DEVICE); pktlen = RX_DMA_GET_PLEN0(trxd.rxd2); skb->dev = netdev; skb_put(skb, pktlen); @@ -1317,14 +1319,22 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, else skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, netdev); + bytes += pktlen; + + hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY; + if (hash != MTK_RXD4_FOE_ENTRY) { + hash = jhash_1word(hash, 0); + skb_set_hash(skb, hash, PKT_HASH_TYPE_L4); + } if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX && - RX_DMA_VID(trxd.rxd3)) + (trxd.rxd2 & RX_DMA_VTAG)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), RX_DMA_VID(trxd.rxd3)); skb_record_rx_queue(skb, 0); napi_gro_receive(napi, skb); +skip_rx: ring->data[idx] = new_data; rxd->rxd1 = (unsigned int)dma_addr; @@ -1348,6 +1358,12 @@ rx_done: mtk_update_rx_cpu_idx(eth); } + eth->rx_packets += done; + eth->rx_bytes += bytes; + dim_update_sample(eth->rx_events, eth->rx_packets, eth->rx_bytes, + &dim_sample); + net_dim(ð->rx_dim, dim_sample); + return done; } @@ -1360,7 +1376,7 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget, struct mtk_tx_buf *tx_buf; u32 cpu, dma; - cpu = mtk_r32(eth, MTK_QTX_CRX_PTR); + cpu = ring->last_free_ptr; dma = mtk_r32(eth, MTK_QTX_DRX_PTR); desc = mtk_qdma_phys_to_virt(ring, cpu); @@ -1386,7 +1402,7 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget, done[mac]++; budget--; } - mtk_tx_unmap(eth, tx_buf); + mtk_tx_unmap(eth, tx_buf, true); ring->last_free = desc; atomic_inc(&ring->free_count); @@ -1394,6 +1410,7 @@ static int mtk_poll_tx_qdma(struct mtk_eth *eth, int budget, cpu = next_cpu; } + ring->last_free_ptr = cpu; mtk_w32(eth, cpu, MTK_QTX_CRX_PTR); return budget; @@ -1423,7 +1440,7 @@ static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget, budget--; } - mtk_tx_unmap(eth, tx_buf); + mtk_tx_unmap(eth, tx_buf, true); desc = &ring->dma[cpu]; ring->last_free = desc; @@ -1440,6 +1457,7 @@ static int mtk_poll_tx_pdma(struct mtk_eth *eth, int budget, static int mtk_poll_tx(struct mtk_eth *eth, int budget) { struct mtk_tx_ring *ring = ð->tx_ring; + struct dim_sample dim_sample = {}; unsigned int done[MTK_MAX_DEVS]; unsigned int bytes[MTK_MAX_DEVS]; int total = 0, i; @@ -1457,8 +1475,14 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) continue; netdev_completed_queue(eth->netdev[i], done[i], bytes[i]); total += done[i]; + eth->tx_packets += done[i]; + eth->tx_bytes += bytes[i]; } + dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes, + &dim_sample); + net_dim(ð->tx_dim, dim_sample); + if (mtk_queue_stopped(eth) && (atomic_read(&ring->free_count) > ring->thresh)) mtk_wake_queue(eth); @@ -1480,7 +1504,6 @@ static void mtk_handle_status_irq(struct mtk_eth *eth) static int mtk_napi_tx(struct napi_struct *napi, int budget) { struct mtk_eth *eth = container_of(napi, struct mtk_eth, tx_napi); - u32 status, mask; int tx_done = 0; if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) @@ -1489,22 +1512,20 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget) tx_done = mtk_poll_tx(eth, budget); if (unlikely(netif_msg_intr(eth))) { - status = mtk_r32(eth, eth->tx_int_status_reg); - mask = mtk_r32(eth, eth->tx_int_mask_reg); dev_info(eth->dev, - "done tx %d, intr 0x%08x/0x%x\n", - tx_done, status, mask); + "done tx %d, intr 0x%08x/0x%x\n", tx_done, + mtk_r32(eth, eth->tx_int_status_reg), + mtk_r32(eth, eth->tx_int_mask_reg)); } if (tx_done == budget) return budget; - status = mtk_r32(eth, eth->tx_int_status_reg); - if (status & MTK_TX_DONE_INT) + if (mtk_r32(eth, eth->tx_int_status_reg) & MTK_TX_DONE_INT) return budget; - napi_complete(napi); - mtk_tx_irq_enable(eth, MTK_TX_DONE_INT); + if (napi_complete_done(napi, tx_done)) + mtk_tx_irq_enable(eth, MTK_TX_DONE_INT); return tx_done; } @@ -1512,35 +1533,33 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget) static int mtk_napi_rx(struct napi_struct *napi, int budget) { struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi); - u32 status, mask; - int rx_done = 0; - int remain_budget = budget; + int rx_done_total = 0; mtk_handle_status_irq(eth); -poll_again: - mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS); - rx_done = mtk_poll_rx(napi, remain_budget, eth); + do { + int rx_done; - if (unlikely(netif_msg_intr(eth))) { - status = mtk_r32(eth, MTK_PDMA_INT_STATUS); - mask = mtk_r32(eth, MTK_PDMA_INT_MASK); - dev_info(eth->dev, - "done rx %d, intr 0x%08x/0x%x\n", - rx_done, status, mask); - } - if (rx_done == remain_budget) - return budget; + mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS); + rx_done = mtk_poll_rx(napi, budget - rx_done_total, eth); + rx_done_total += rx_done; - status = mtk_r32(eth, MTK_PDMA_INT_STATUS); - if (status & MTK_RX_DONE_INT) { - remain_budget -= rx_done; - goto poll_again; - } - napi_complete(napi); - mtk_rx_irq_enable(eth, MTK_RX_DONE_INT); + if (unlikely(netif_msg_intr(eth))) { + dev_info(eth->dev, + "done rx %d, intr 0x%08x/0x%x\n", rx_done, + mtk_r32(eth, MTK_PDMA_INT_STATUS), + mtk_r32(eth, MTK_PDMA_INT_MASK)); + } - return rx_done + budget - remain_budget; + if (rx_done_total == budget) + return budget; + + } while (mtk_r32(eth, MTK_PDMA_INT_STATUS) & MTK_RX_DONE_INT); + + if (napi_complete_done(napi, rx_done_total)) + mtk_rx_irq_enable(eth, MTK_RX_DONE_INT); + + return rx_done_total; } static int mtk_tx_alloc(struct mtk_eth *eth) @@ -1587,6 +1606,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) atomic_set(&ring->free_count, MTK_DMA_SIZE - 2); ring->next_free = &ring->dma[0]; ring->last_free = &ring->dma[MTK_DMA_SIZE - 1]; + ring->last_free_ptr = (u32)(ring->phys + ((MTK_DMA_SIZE - 1) * sz)); ring->thresh = MAX_SKB_FRAGS; /* make sure that all changes to the dma ring are flushed before we @@ -1600,9 +1620,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) mtk_w32(eth, ring->phys + ((MTK_DMA_SIZE - 1) * sz), MTK_QTX_CRX_PTR); - mtk_w32(eth, - ring->phys + ((MTK_DMA_SIZE - 1) * sz), - MTK_QTX_DRX_PTR); + mtk_w32(eth, ring->last_free_ptr, MTK_QTX_DRX_PTR); mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0)); } else { @@ -1625,7 +1643,7 @@ static void mtk_tx_clean(struct mtk_eth *eth) if (ring->buf) { for (i = 0; i < MTK_DMA_SIZE; i++) - mtk_tx_unmap(eth, &ring->buf[i]); + mtk_tx_unmap(eth, &ring->buf[i], false); kfree(ring->buf); ring->buf = NULL; } @@ -2015,25 +2033,22 @@ static int mtk_set_features(struct net_device *dev, netdev_features_t features) /* wait for DMA to finish whatever it is doing before we start using it again */ static int mtk_dma_busy_wait(struct mtk_eth *eth) { - unsigned long t_start = jiffies; + unsigned int reg; + int ret; + u32 val; - while (1) { - if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) { - if (!(mtk_r32(eth, MTK_QDMA_GLO_CFG) & - (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY))) - return 0; - } else { - if (!(mtk_r32(eth, MTK_PDMA_GLO_CFG) & - (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY))) - return 0; - } + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + reg = MTK_QDMA_GLO_CFG; + else + reg = MTK_PDMA_GLO_CFG; - if (time_after(jiffies, t_start + MTK_DMA_BUSY_TIMEOUT)) - break; - } + ret = readx_poll_timeout_atomic(__raw_readl, eth->base + reg, val, + !(val & (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)), + 5, MTK_DMA_BUSY_TIMEOUT_US); + if (ret) + dev_err(eth->dev, "DMA init timeout\n"); - dev_err(eth->dev, "DMA init timeout\n"); - return -1; + return ret; } static int mtk_dma_init(struct mtk_eth *eth) @@ -2133,6 +2148,7 @@ static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth) { struct mtk_eth *eth = _eth; + eth->rx_events++; if (likely(napi_schedule_prep(ð->rx_napi))) { __napi_schedule(ð->rx_napi); mtk_rx_irq_disable(eth, MTK_RX_DONE_INT); @@ -2145,6 +2161,7 @@ static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth) { struct mtk_eth *eth = _eth; + eth->tx_events++; if (likely(napi_schedule_prep(ð->tx_napi))) { __napi_schedule(ð->tx_napi); mtk_tx_irq_disable(eth, MTK_TX_DONE_INT); @@ -2197,7 +2214,7 @@ static int mtk_start_dma(struct mtk_eth *eth) if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) { mtk_w32(eth, MTK_TX_WB_DDONE | MTK_TX_DMA_EN | - MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO | + MTK_TX_BT_32DWORDS | MTK_NDP_CO_PRO | MTK_RX_DMA_EN | MTK_RX_2B_OFFSET | MTK_RX_BT_32DWORDS, MTK_QDMA_GLO_CFG); @@ -2233,6 +2250,9 @@ static void mtk_gdm_config(struct mtk_eth *eth, u32 config) val |= config; + if (!i && eth->netdev[0] && netdev_uses_dsa(eth->netdev[0])) + val |= MTK_GDMA_SPECIAL_TAG; + mtk_w32(eth, val, MTK_GDMA_FWD_CFG(i)); } /* Reset and enable PSE */ @@ -2255,12 +2275,17 @@ static int mtk_open(struct net_device *dev) /* we run 2 netdevs on the same dma ring so we only bring it up once */ if (!refcount_read(ð->dma_refcnt)) { - int err = mtk_start_dma(eth); + u32 gdm_config = MTK_GDMA_TO_PDMA; + int err; + err = mtk_start_dma(eth); if (err) return err; - mtk_gdm_config(eth, MTK_GDMA_TO_PDMA); + if (eth->soc->offload_version && mtk_ppe_start(ð->ppe) == 0) + gdm_config = MTK_GDMA_TO_PPE; + + mtk_gdm_config(eth, gdm_config); napi_enable(ð->tx_napi); napi_enable(ð->rx_napi); @@ -2321,12 +2346,18 @@ static int mtk_stop(struct net_device *dev) napi_disable(ð->tx_napi); napi_disable(ð->rx_napi); + cancel_work_sync(ð->rx_dim.work); + cancel_work_sync(ð->tx_dim.work); + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) mtk_stop_dma(eth, MTK_QDMA_GLO_CFG); mtk_stop_dma(eth, MTK_PDMA_GLO_CFG); mtk_dma_free(eth); + if (eth->soc->offload_version) + mtk_ppe_stop(ð->ppe); + return 0; } @@ -2370,6 +2401,64 @@ err_disable_clks: return ret; } +static void mtk_dim_rx(struct work_struct *work) +{ + struct dim *dim = container_of(work, struct dim, work); + struct mtk_eth *eth = container_of(dim, struct mtk_eth, rx_dim); + struct dim_cq_moder cur_profile; + u32 val, cur; + + cur_profile = net_dim_get_rx_moderation(eth->rx_dim.mode, + dim->profile_ix); + spin_lock_bh(ð->dim_lock); + + val = mtk_r32(eth, MTK_PDMA_DELAY_INT); + val &= MTK_PDMA_DELAY_TX_MASK; + val |= MTK_PDMA_DELAY_RX_EN; + + cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK); + val |= cur << MTK_PDMA_DELAY_RX_PTIME_SHIFT; + + cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK); + val |= cur << MTK_PDMA_DELAY_RX_PINT_SHIFT; + + mtk_w32(eth, val, MTK_PDMA_DELAY_INT); + mtk_w32(eth, val, MTK_QDMA_DELAY_INT); + + spin_unlock_bh(ð->dim_lock); + + dim->state = DIM_START_MEASURE; +} + +static void mtk_dim_tx(struct work_struct *work) +{ + struct dim *dim = container_of(work, struct dim, work); + struct mtk_eth *eth = container_of(dim, struct mtk_eth, tx_dim); + struct dim_cq_moder cur_profile; + u32 val, cur; + + cur_profile = net_dim_get_tx_moderation(eth->tx_dim.mode, + dim->profile_ix); + spin_lock_bh(ð->dim_lock); + + val = mtk_r32(eth, MTK_PDMA_DELAY_INT); + val &= MTK_PDMA_DELAY_RX_MASK; + val |= MTK_PDMA_DELAY_TX_EN; + + cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK); + val |= cur << MTK_PDMA_DELAY_TX_PTIME_SHIFT; + + cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK); + val |= cur << MTK_PDMA_DELAY_TX_PINT_SHIFT; + + mtk_w32(eth, val, MTK_PDMA_DELAY_INT); + mtk_w32(eth, val, MTK_QDMA_DELAY_INT); + + spin_unlock_bh(ð->dim_lock); + + dim->state = DIM_START_MEASURE; +} + static int mtk_hw_init(struct mtk_eth *eth) { int i, val, ret; @@ -2391,9 +2480,6 @@ static int mtk_hw_init(struct mtk_eth *eth) goto err_disable_pm; } - /* enable interrupt delay for RX */ - mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT); - /* disable delay and normal interrupt */ mtk_tx_irq_disable(eth, ~0); mtk_rx_irq_disable(eth, ~0); @@ -2432,11 +2518,11 @@ static int mtk_hw_init(struct mtk_eth *eth) /* Enable RX VLan Offloading */ mtk_w32(eth, 1, MTK_CDMP_EG_CTRL); - /* enable interrupt delay for RX */ - mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT); + /* set interrupt delays based on current Net DIM sample */ + mtk_dim_rx(ð->rx_dim.work); + mtk_dim_tx(ð->tx_dim.work); /* disable delay and normal interrupt */ - mtk_w32(eth, 0, MTK_QDMA_DELAY_INT); mtk_tx_irq_disable(eth, ~0); mtk_rx_irq_disable(eth, ~0); @@ -2473,14 +2559,11 @@ static int __init mtk_init(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; - const char *mac_addr; - - mac_addr = of_get_mac_address(mac->of_node); - if (!IS_ERR(mac_addr)) - ether_addr_copy(dev->dev_addr, mac_addr); + int ret; - /* If the mac address is invalid, use random mac address */ - if (!is_valid_ether_addr(dev->dev_addr)) { + ret = of_get_mac_address(mac->of_node, dev->dev_addr); + if (ret) { + /* If the mac address is invalid, use random mac address */ eth_hw_addr_random(dev); dev_err(eth->dev, "generated random MAC address %pM\n", dev->dev_addr); @@ -2832,6 +2915,7 @@ static const struct net_device_ops mtk_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mtk_poll_controller, #endif + .ndo_setup_tc = mtk_eth_setup_tc, }; static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) @@ -2973,6 +3057,13 @@ static int mtk_probe(struct platform_device *pdev) spin_lock_init(ð->page_lock); spin_lock_init(ð->tx_irq_lock); spin_lock_init(ð->rx_irq_lock); + spin_lock_init(ð->dim_lock); + + eth->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + INIT_WORK(ð->rx_dim.work, mtk_dim_rx); + + eth->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + INIT_WORK(ð->tx_dim.work, mtk_dim_tx); if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) { eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, @@ -3088,6 +3179,17 @@ static int mtk_probe(struct platform_device *pdev) goto err_free_dev; } + if (eth->soc->offload_version) { + err = mtk_ppe_init(ð->ppe, eth->dev, + eth->base + MTK_ETH_PPE_BASE, 2); + if (err) + goto err_free_dev; + + err = mtk_eth_offload_init(eth); + if (err) + goto err_free_dev; + } + for (i = 0; i < MTK_MAX_DEVS; i++) { if (!eth->netdev[i]) continue; @@ -3162,6 +3264,7 @@ static const struct mtk_soc_data mt7621_data = { .hw_features = MTK_HW_FEATURES, .required_clks = MT7621_CLKS_BITMAP, .required_pctl = false, + .offload_version = 2, }; static const struct mtk_soc_data mt7622_data = { @@ -3170,6 +3273,7 @@ static const struct mtk_soc_data mt7622_data = { .hw_features = MTK_HW_FEATURES, .required_clks = MT7622_CLKS_BITMAP, .required_pctl = false, + .offload_version = 2, }; static const struct mtk_soc_data mt7623_data = { @@ -3177,6 +3281,7 @@ static const struct mtk_soc_data mt7623_data = { .hw_features = MTK_HW_FEATURES, .required_clks = MT7623_CLKS_BITMAP, .required_pctl = true, + .offload_version = 2, }; static const struct mtk_soc_data mt7629_data = { diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index fd3cec8f06ba..11331b44ba07 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -15,12 +15,15 @@ #include <linux/u64_stats_sync.h> #include <linux/refcount.h> #include <linux/phylink.h> +#include <linux/rhashtable.h> +#include <linux/dim.h> +#include "mtk_ppe.h" #define MTK_QDMA_PAGE_SIZE 2048 #define MTK_MAX_RX_LENGTH 1536 #define MTK_MAX_RX_LENGTH_2K 2048 #define MTK_TX_DMA_BUF_LEN 0x3fff -#define MTK_DMA_SIZE 256 +#define MTK_DMA_SIZE 512 #define MTK_NAPI_WEIGHT 64 #define MTK_MAC_COUNT 2 #define MTK_RX_ETH_HLEN (ETH_HLEN + ETH_FCS_LEN) @@ -40,7 +43,8 @@ NETIF_F_HW_VLAN_CTAG_RX | \ NETIF_F_SG | NETIF_F_TSO | \ NETIF_F_TSO6 | \ - NETIF_F_IPV6_CSUM) + NETIF_F_IPV6_CSUM |\ + NETIF_F_HW_TC) #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM) #define NEXT_DESP_IDX(X, Y) (((X) + 1) & ((Y) - 1)) @@ -82,10 +86,12 @@ /* GDM Exgress Control Register */ #define MTK_GDMA_FWD_CFG(x) (0x500 + (x * 0x1000)) +#define MTK_GDMA_SPECIAL_TAG BIT(24) #define MTK_GDMA_ICS_EN BIT(22) #define MTK_GDMA_TCS_EN BIT(21) #define MTK_GDMA_UCS_EN BIT(20) #define MTK_GDMA_TO_PDMA 0x0 +#define MTK_GDMA_TO_PPE 0x4444 #define MTK_GDMA_DROP_ALL 0x7777 /* Unicast Filter MAC Address Register - Low */ @@ -132,13 +138,18 @@ /* PDMA Delay Interrupt Register */ #define MTK_PDMA_DELAY_INT 0xa0c +#define MTK_PDMA_DELAY_RX_MASK GENMASK(15, 0) #define MTK_PDMA_DELAY_RX_EN BIT(15) -#define MTK_PDMA_DELAY_RX_PINT 4 #define MTK_PDMA_DELAY_RX_PINT_SHIFT 8 -#define MTK_PDMA_DELAY_RX_PTIME 4 -#define MTK_PDMA_DELAY_RX_DELAY \ - (MTK_PDMA_DELAY_RX_EN | MTK_PDMA_DELAY_RX_PTIME | \ - (MTK_PDMA_DELAY_RX_PINT << MTK_PDMA_DELAY_RX_PINT_SHIFT)) +#define MTK_PDMA_DELAY_RX_PTIME_SHIFT 0 + +#define MTK_PDMA_DELAY_TX_MASK GENMASK(31, 16) +#define MTK_PDMA_DELAY_TX_EN BIT(31) +#define MTK_PDMA_DELAY_TX_PINT_SHIFT 24 +#define MTK_PDMA_DELAY_TX_PTIME_SHIFT 16 + +#define MTK_PDMA_DELAY_PINT_MASK 0x7f +#define MTK_PDMA_DELAY_PTIME_MASK 0xff /* PDMA Interrupt Status Register */ #define MTK_PDMA_INT_STATUS 0xa20 @@ -198,12 +209,12 @@ #define MTK_RX_BT_32DWORDS (3 << 11) #define MTK_NDP_CO_PRO BIT(10) #define MTK_TX_WB_DDONE BIT(6) -#define MTK_DMA_SIZE_16DWORDS (2 << 4) +#define MTK_TX_BT_32DWORDS (3 << 4) #define MTK_RX_DMA_BUSY BIT(3) #define MTK_TX_DMA_BUSY BIT(1) #define MTK_RX_DMA_EN BIT(2) #define MTK_TX_DMA_EN BIT(0) -#define MTK_DMA_BUSY_TIMEOUT HZ +#define MTK_DMA_BUSY_TIMEOUT_US 1000000 /* QDMA Reset Index Register */ #define MTK_QDMA_RST_IDX 0x1A08 @@ -220,6 +231,7 @@ /* QDMA Interrupt Status Register */ #define MTK_QDMA_INT_STATUS 0x1A18 #define MTK_RX_DONE_DLY BIT(30) +#define MTK_TX_DONE_DLY BIT(28) #define MTK_RX_DONE_INT3 BIT(19) #define MTK_RX_DONE_INT2 BIT(18) #define MTK_RX_DONE_INT1 BIT(17) @@ -229,8 +241,7 @@ #define MTK_TX_DONE_INT1 BIT(1) #define MTK_TX_DONE_INT0 BIT(0) #define MTK_RX_DONE_INT MTK_RX_DONE_DLY -#define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \ - MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3) +#define MTK_TX_DONE_INT MTK_TX_DONE_DLY /* QDMA Interrupt grouping registers */ #define MTK_QDMA_INT_GRP1 0x1a20 @@ -296,15 +307,23 @@ #define RX_DMA_LSO BIT(30) #define RX_DMA_PLEN0(_x) (((_x) & 0x3fff) << 16) #define RX_DMA_GET_PLEN0(_x) (((_x) >> 16) & 0x3fff) +#define RX_DMA_VTAG BIT(15) /* QDMA descriptor rxd3 */ #define RX_DMA_VID(_x) ((_x) & 0xfff) /* QDMA descriptor rxd4 */ +#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0) +#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14) +#define MTK_RXD4_SRC_PORT GENMASK(21, 19) +#define MTK_RXD4_ALG GENMASK(31, 22) + +/* QDMA descriptor rxd4 */ #define RX_DMA_L4_VALID BIT(24) #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */ #define RX_DMA_FPORT_SHIFT 19 #define RX_DMA_FPORT_MASK 0x7 +#define RX_DMA_SPECIAL_TAG BIT(22) /* PHY Indirect Access Control registers */ #define MTK_PHY_IAC 0x10004 @@ -623,6 +642,7 @@ struct mtk_tx_buf { * @phys: The physical addr of tx_buf * @next_free: Pointer to the next free descriptor * @last_free: Pointer to the last free descriptor + * @last_free_ptr: Hardware pointer value of the last free descriptor * @thresh: The threshold of minimum amount of free descriptors * @free_count: QDMA uses a linked list. Track how many free descriptors * are present @@ -633,6 +653,7 @@ struct mtk_tx_ring { dma_addr_t phys; struct mtk_tx_dma *next_free; struct mtk_tx_dma *last_free; + u32 last_free_ptr; u16 thresh; atomic_t free_count; int dma_size; @@ -802,6 +823,7 @@ struct mtk_soc_data { u32 caps; u32 required_clks; bool required_pctl; + u8 offload_version; netdev_features_t hw_features; }; @@ -835,6 +857,7 @@ struct mtk_sgmii { * @page_lock: Make sure that register operations are atomic * @tx_irq__lock: Make sure that IRQ register operations are atomic * @rx_irq__lock: Make sure that IRQ register operations are atomic + * @dim_lock: Make sure that Net DIM operations are atomic * @dummy_dev: we run 2 netdevs on 1 physical DMA ring and need a * dummy for NAPI to work * @netdev: The netdev instances @@ -853,6 +876,14 @@ struct mtk_sgmii { * @rx_ring_qdma: Pointer to the memory holding info about the QDMA RX ring * @tx_napi: The TX NAPI struct * @rx_napi: The RX NAPI struct + * @rx_events: Net DIM RX event counter + * @rx_packets: Net DIM RX packet counter + * @rx_bytes: Net DIM RX byte counter + * @rx_dim: Net DIM RX context + * @tx_events: Net DIM TX event counter + * @tx_packets: Net DIM TX packet counter + * @tx_bytes: Net DIM TX byte counter + * @tx_dim: Net DIM TX context * @scratch_ring: Newer SoCs need memory for a second HW managed TX ring * @phy_scratch_ring: physical address of scratch_ring * @scratch_head: The scratch memory that scratch_ring points to. @@ -897,10 +928,25 @@ struct mtk_eth { const struct mtk_soc_data *soc; + spinlock_t dim_lock; + + u32 rx_events; + u32 rx_packets; + u32 rx_bytes; + struct dim rx_dim; + + u32 tx_events; + u32 tx_packets; + u32 tx_bytes; + struct dim tx_dim; + u32 tx_int_mask_reg; u32 tx_int_status_reg; u32 rx_dma_l4_valid; int ip_align; + + struct mtk_ppe ppe; + struct rhashtable flow_table; }; /* struct mtk_mac - the structure that holds the info about the MACs of the @@ -945,4 +991,9 @@ int mtk_gmac_sgmii_path_setup(struct mtk_eth *eth, int mac_id); int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id); int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id); +int mtk_eth_offload_init(struct mtk_eth *eth); +int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data); + + #endif /* MTK_ETH_H */ diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c new file mode 100644 index 000000000000..3ad10c793308 --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -0,0 +1,509 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */ + +#include <linux/kernel.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/etherdevice.h> +#include <linux/platform_device.h> +#include "mtk_ppe.h" +#include "mtk_ppe_regs.h" + +static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val) +{ + writel(val, ppe->base + reg); +} + +static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg) +{ + return readl(ppe->base + reg); +} + +static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set) +{ + u32 val; + + val = ppe_r32(ppe, reg); + val &= ~mask; + val |= set; + ppe_w32(ppe, reg, val); + + return val; +} + +static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val) +{ + return ppe_m32(ppe, reg, 0, val); +} + +static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val) +{ + return ppe_m32(ppe, reg, val, 0); +} + +static int mtk_ppe_wait_busy(struct mtk_ppe *ppe) +{ + int ret; + u32 val; + + ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val, + !(val & MTK_PPE_GLO_CFG_BUSY), + 20, MTK_PPE_WAIT_TIMEOUT_US); + + if (ret) + dev_err(ppe->dev, "PPE table busy"); + + return ret; +} + +static void mtk_ppe_cache_clear(struct mtk_ppe *ppe) +{ + ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR); + ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR); +} + +static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable) +{ + mtk_ppe_cache_clear(ppe); + + ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN, + enable * MTK_PPE_CACHE_CTL_EN); +} + +static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e) +{ + u32 hv1, hv2, hv3; + u32 hash; + + switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) { + case MTK_PPE_PKT_TYPE_BRIDGE: + hv1 = e->bridge.src_mac_lo; + hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16); + hv2 = e->bridge.src_mac_hi >> 16; + hv2 ^= e->bridge.dest_mac_lo; + hv3 = e->bridge.dest_mac_hi; + break; + case MTK_PPE_PKT_TYPE_IPV4_ROUTE: + case MTK_PPE_PKT_TYPE_IPV4_HNAPT: + hv1 = e->ipv4.orig.ports; + hv2 = e->ipv4.orig.dest_ip; + hv3 = e->ipv4.orig.src_ip; + break; + case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T: + case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T: + hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3]; + hv1 ^= e->ipv6.ports; + + hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2]; + hv2 ^= e->ipv6.dest_ip[0]; + + hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1]; + hv3 ^= e->ipv6.src_ip[0]; + break; + case MTK_PPE_PKT_TYPE_IPV4_DSLITE: + case MTK_PPE_PKT_TYPE_IPV6_6RD: + default: + WARN_ON_ONCE(1); + return MTK_PPE_HASH_MASK; + } + + hash = (hv1 & hv2) | ((~hv1) & hv3); + hash = (hash >> 24) | ((hash & 0xffffff) << 8); + hash ^= hv1 ^ hv2 ^ hv3; + hash ^= hash >> 16; + hash <<= 1; + hash &= MTK_PPE_ENTRIES - 1; + + return hash; +} + +static inline struct mtk_foe_mac_info * +mtk_foe_entry_l2(struct mtk_foe_entry *entry) +{ + int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); + + if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) + return &entry->ipv6.l2; + + return &entry->ipv4.l2; +} + +static inline u32 * +mtk_foe_entry_ib2(struct mtk_foe_entry *entry) +{ + int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); + + if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) + return &entry->ipv6.ib2; + + return &entry->ipv4.ib2; +} + +int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto, + u8 pse_port, u8 *src_mac, u8 *dest_mac) +{ + struct mtk_foe_mac_info *l2; + u32 ports_pad, val; + + memset(entry, 0, sizeof(*entry)); + + val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) | + FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) | + FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) | + MTK_FOE_IB1_BIND_TTL | + MTK_FOE_IB1_BIND_CACHE; + entry->ib1 = val; + + val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) | + FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) | + FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port); + + if (is_multicast_ether_addr(dest_mac)) + val |= MTK_FOE_IB2_MULTICAST; + + ports_pad = 0xa5a5a500 | (l4proto & 0xff); + if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE) + entry->ipv4.orig.ports = ports_pad; + if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T) + entry->ipv6.ports = ports_pad; + + if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) { + entry->ipv6.ib2 = val; + l2 = &entry->ipv6.l2; + } else { + entry->ipv4.ib2 = val; + l2 = &entry->ipv4.l2; + } + + l2->dest_mac_hi = get_unaligned_be32(dest_mac); + l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4); + l2->src_mac_hi = get_unaligned_be32(src_mac); + l2->src_mac_lo = get_unaligned_be16(src_mac + 4); + + if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T) + l2->etype = ETH_P_IPV6; + else + l2->etype = ETH_P_IP; + + return 0; +} + +int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port) +{ + u32 *ib2 = mtk_foe_entry_ib2(entry); + u32 val; + + val = *ib2; + val &= ~MTK_FOE_IB2_DEST_PORT; + val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port); + *ib2 = val; + + return 0; +} + +int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress, + __be32 src_addr, __be16 src_port, + __be32 dest_addr, __be16 dest_port) +{ + int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); + struct mtk_ipv4_tuple *t; + + switch (type) { + case MTK_PPE_PKT_TYPE_IPV4_HNAPT: + if (egress) { + t = &entry->ipv4.new; + break; + } + fallthrough; + case MTK_PPE_PKT_TYPE_IPV4_DSLITE: + case MTK_PPE_PKT_TYPE_IPV4_ROUTE: + t = &entry->ipv4.orig; + break; + case MTK_PPE_PKT_TYPE_IPV6_6RD: + entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr); + entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr); + return 0; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + t->src_ip = be32_to_cpu(src_addr); + t->dest_ip = be32_to_cpu(dest_addr); + + if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE) + return 0; + + t->src_port = be16_to_cpu(src_port); + t->dest_port = be16_to_cpu(dest_port); + + return 0; +} + +int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry, + __be32 *src_addr, __be16 src_port, + __be32 *dest_addr, __be16 dest_port) +{ + int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); + u32 *src, *dest; + int i; + + switch (type) { + case MTK_PPE_PKT_TYPE_IPV4_DSLITE: + src = entry->dslite.tunnel_src_ip; + dest = entry->dslite.tunnel_dest_ip; + break; + case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T: + case MTK_PPE_PKT_TYPE_IPV6_6RD: + entry->ipv6.src_port = be16_to_cpu(src_port); + entry->ipv6.dest_port = be16_to_cpu(dest_port); + fallthrough; + case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T: + src = entry->ipv6.src_ip; + dest = entry->ipv6.dest_ip; + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + for (i = 0; i < 4; i++) + src[i] = be32_to_cpu(src_addr[i]); + for (i = 0; i < 4; i++) + dest[i] = be32_to_cpu(dest_addr[i]); + + return 0; +} + +int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port) +{ + struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry); + + l2->etype = BIT(port); + + if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER)) + entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1); + else + l2->etype |= BIT(8); + + entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG; + + return 0; +} + +int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid) +{ + struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry); + + switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) { + case 0: + entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG | + FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1); + l2->vlan1 = vid; + return 0; + case 1: + if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) { + l2->vlan1 = vid; + l2->etype |= BIT(8); + } else { + l2->vlan2 = vid; + entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1); + } + return 0; + default: + return -ENOSPC; + } +} + +int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid) +{ + struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry); + + if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) || + (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) + l2->etype = ETH_P_PPP_SES; + + entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE; + l2->pppoe_id = sid; + + return 0; +} + +static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry) +{ + return !(entry->ib1 & MTK_FOE_IB1_STATIC) && + FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND; +} + +int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry, + u16 timestamp) +{ + struct mtk_foe_entry *hwe; + u32 hash; + + timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP; + entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP; + entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp); + + hash = mtk_ppe_hash_entry(entry); + hwe = &ppe->foe_table[hash]; + if (!mtk_foe_entry_usable(hwe)) { + hwe++; + hash++; + + if (!mtk_foe_entry_usable(hwe)) + return -ENOSPC; + } + + memcpy(&hwe->data, &entry->data, sizeof(hwe->data)); + wmb(); + hwe->ib1 = entry->ib1; + + dma_wmb(); + + mtk_ppe_cache_clear(ppe); + + return hash; +} + +int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base, + int version) +{ + struct mtk_foe_entry *foe; + + /* need to allocate a separate device, since it PPE DMA access is + * not coherent. + */ + ppe->base = base; + ppe->dev = dev; + ppe->version = version; + + foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe), + &ppe->foe_phys, GFP_KERNEL); + if (!foe) + return -ENOMEM; + + ppe->foe_table = foe; + + mtk_ppe_debugfs_init(ppe); + + return 0; +} + +static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe) +{ + static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 }; + int i, k; + + memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(ppe->foe_table)); + + if (!IS_ENABLED(CONFIG_SOC_MT7621)) + return; + + /* skip all entries that cross the 1024 byte boundary */ + for (i = 0; i < MTK_PPE_ENTRIES; i += 128) + for (k = 0; k < ARRAY_SIZE(skip); k++) + ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC; +} + +int mtk_ppe_start(struct mtk_ppe *ppe) +{ + u32 val; + + mtk_ppe_init_foe_table(ppe); + ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys); + + val = MTK_PPE_TB_CFG_ENTRY_80B | + MTK_PPE_TB_CFG_AGE_NON_L4 | + MTK_PPE_TB_CFG_AGE_UNBIND | + MTK_PPE_TB_CFG_AGE_TCP | + MTK_PPE_TB_CFG_AGE_UDP | + MTK_PPE_TB_CFG_AGE_TCP_FIN | + FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS, + MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) | + FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE, + MTK_PPE_KEEPALIVE_DISABLE) | + FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) | + FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE, + MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) | + FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM, + MTK_PPE_ENTRIES_SHIFT); + ppe_w32(ppe, MTK_PPE_TB_CFG, val); + + ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK, + MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6); + + mtk_ppe_cache_enable(ppe, true); + + val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG | + MTK_PPE_FLOW_CFG_IP4_UDP_FRAG | + MTK_PPE_FLOW_CFG_IP6_3T_ROUTE | + MTK_PPE_FLOW_CFG_IP6_5T_ROUTE | + MTK_PPE_FLOW_CFG_IP6_6RD | + MTK_PPE_FLOW_CFG_IP4_NAT | + MTK_PPE_FLOW_CFG_IP4_NAPT | + MTK_PPE_FLOW_CFG_IP4_DSLITE | + MTK_PPE_FLOW_CFG_L2_BRIDGE | + MTK_PPE_FLOW_CFG_IP4_NAT_FRAG; + ppe_w32(ppe, MTK_PPE_FLOW_CFG, val); + + val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) | + FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3); + ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val); + + val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 12) | + FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1); + ppe_w32(ppe, MTK_PPE_BIND_AGE0, val); + + val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) | + FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 7); + ppe_w32(ppe, MTK_PPE_BIND_AGE1, val); + + val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF; + ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val); + + val = MTK_PPE_BIND_LIMIT1_FULL | + FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1); + ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val); + + val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) | + FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1); + ppe_w32(ppe, MTK_PPE_BIND_RATE, val); + + /* enable PPE */ + val = MTK_PPE_GLO_CFG_EN | + MTK_PPE_GLO_CFG_IP4_L4_CS_DROP | + MTK_PPE_GLO_CFG_IP4_CS_DROP | + MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE; + ppe_w32(ppe, MTK_PPE_GLO_CFG, val); + + ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0); + + return 0; +} + +int mtk_ppe_stop(struct mtk_ppe *ppe) +{ + u32 val; + int i; + + for (i = 0; i < MTK_PPE_ENTRIES; i++) + ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE, + MTK_FOE_STATE_INVALID); + + mtk_ppe_cache_enable(ppe, false); + + /* disable offload engine */ + ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN); + ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0); + + /* disable aging */ + val = MTK_PPE_TB_CFG_AGE_NON_L4 | + MTK_PPE_TB_CFG_AGE_UNBIND | + MTK_PPE_TB_CFG_AGE_TCP | + MTK_PPE_TB_CFG_AGE_UDP | + MTK_PPE_TB_CFG_AGE_TCP_FIN; + ppe_clear(ppe, MTK_PPE_TB_CFG, val); + + return mtk_ppe_wait_busy(ppe); +} diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h new file mode 100644 index 000000000000..242fb8f2ae65 --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */ + +#ifndef __MTK_PPE_H +#define __MTK_PPE_H + +#include <linux/kernel.h> +#include <linux/bitfield.h> + +#define MTK_ETH_PPE_BASE 0xc00 + +#define MTK_PPE_ENTRIES_SHIFT 3 +#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT) +#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1) +#define MTK_PPE_WAIT_TIMEOUT_US 1000000 + +#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0) +#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8) +#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24) + +#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0) +#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15) +#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16) +#define MTK_FOE_IB1_BIND_PPPOE BIT(19) +#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20) +#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21) +#define MTK_FOE_IB1_BIND_CACHE BIT(22) +#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23) +#define MTK_FOE_IB1_BIND_TTL BIT(24) + +#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25) +#define MTK_FOE_IB1_STATE GENMASK(29, 28) +#define MTK_FOE_IB1_UDP BIT(30) +#define MTK_FOE_IB1_STATIC BIT(31) + +enum { + MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0, + MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1, + MTK_PPE_PKT_TYPE_BRIDGE = 2, + MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3, + MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4, + MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5, + MTK_PPE_PKT_TYPE_IPV6_6RD = 7, +}; + +#define MTK_FOE_IB2_QID GENMASK(3, 0) +#define MTK_FOE_IB2_PSE_QOS BIT(4) +#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5) +#define MTK_FOE_IB2_MULTICAST BIT(8) + +#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12) +#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16) +#define MTK_FOE_IB2_WHNAT_NAT BIT(17) + +#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12) + +#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18) + +#define MTK_FOE_IB2_DSCP GENMASK(31, 24) + +#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0) +#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6) +#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14) + +enum { + MTK_FOE_STATE_INVALID, + MTK_FOE_STATE_UNBIND, + MTK_FOE_STATE_BIND, + MTK_FOE_STATE_FIN +}; + +struct mtk_foe_mac_info { + u16 vlan1; + u16 etype; + + u32 dest_mac_hi; + + u16 vlan2; + u16 dest_mac_lo; + + u32 src_mac_hi; + + u16 pppoe_id; + u16 src_mac_lo; +}; + +struct mtk_foe_bridge { + u32 dest_mac_hi; + + u16 src_mac_lo; + u16 dest_mac_lo; + + u32 src_mac_hi; + + u32 ib2; + + u32 _rsv[5]; + + u32 udf_tsid; + struct mtk_foe_mac_info l2; +}; + +struct mtk_ipv4_tuple { + u32 src_ip; + u32 dest_ip; + union { + struct { + u16 dest_port; + u16 src_port; + }; + struct { + u8 protocol; + u8 _pad[3]; /* fill with 0xa5a5a5 */ + }; + u32 ports; + }; +}; + +struct mtk_foe_ipv4 { + struct mtk_ipv4_tuple orig; + + u32 ib2; + + struct mtk_ipv4_tuple new; + + u16 timestamp; + u16 _rsv0[3]; + + u32 udf_tsid; + + struct mtk_foe_mac_info l2; +}; + +struct mtk_foe_ipv4_dslite { + struct mtk_ipv4_tuple ip4; + + u32 tunnel_src_ip[4]; + u32 tunnel_dest_ip[4]; + + u8 flow_label[3]; + u8 priority; + + u32 udf_tsid; + + u32 ib2; + + struct mtk_foe_mac_info l2; +}; + +struct mtk_foe_ipv6 { + u32 src_ip[4]; + u32 dest_ip[4]; + + union { + struct { + u8 protocol; + u8 _pad[3]; /* fill with 0xa5a5a5 */ + }; /* 3-tuple */ + struct { + u16 dest_port; + u16 src_port; + }; /* 5-tuple */ + u32 ports; + }; + + u32 _rsv[3]; + + u32 udf; + + u32 ib2; + struct mtk_foe_mac_info l2; +}; + +struct mtk_foe_ipv6_6rd { + u32 src_ip[4]; + u32 dest_ip[4]; + u16 dest_port; + u16 src_port; + + u32 tunnel_src_ip; + u32 tunnel_dest_ip; + + u16 hdr_csum; + u8 dscp; + u8 ttl; + + u8 flag; + u8 pad; + u8 per_flow_6rd_id; + u8 pad2; + + u32 ib2; + struct mtk_foe_mac_info l2; +}; + +struct mtk_foe_entry { + u32 ib1; + + union { + struct mtk_foe_bridge bridge; + struct mtk_foe_ipv4 ipv4; + struct mtk_foe_ipv4_dslite dslite; + struct mtk_foe_ipv6 ipv6; + struct mtk_foe_ipv6_6rd ipv6_6rd; + u32 data[19]; + }; +}; + +enum { + MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02, + MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03, + MTK_PPE_CPU_REASON_NO_FLOW = 0x07, + MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08, + MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09, + MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a, + MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b, + MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c, + MTK_PPE_CPU_REASON_UN_HIT = 0x0d, + MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e, + MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f, + MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10, + MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11, + MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12, + MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13, + MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14, + MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15, + MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16, + MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17, + MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18, + MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19, + MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a, + MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b, + MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c, + MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e, + MTK_PPE_CPU_REASON_INVALID = 0x1f, +}; + +struct mtk_ppe { + struct device *dev; + void __iomem *base; + int version; + + struct mtk_foe_entry *foe_table; + dma_addr_t foe_phys; + + void *acct_table; +}; + +int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base, + int version); +int mtk_ppe_start(struct mtk_ppe *ppe); +int mtk_ppe_stop(struct mtk_ppe *ppe); + +static inline void +mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash) +{ + ppe->foe_table[hash].ib1 = 0; + dma_wmb(); +} + +static inline int +mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash) +{ + u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1); + + if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND) + return -1; + + return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1); +} + +int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto, + u8 pse_port, u8 *src_mac, u8 *dest_mac); +int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port); +int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig, + __be32 src_addr, __be16 src_port, + __be32 dest_addr, __be16 dest_port); +int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry, + __be32 *src_addr, __be16 src_port, + __be32 *dest_addr, __be16 dest_port); +int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port); +int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid); +int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid); +int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry, + u16 timestamp); +int mtk_ppe_debugfs_init(struct mtk_ppe *ppe); + +#endif diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c new file mode 100644 index 000000000000..98b1d3577bcd --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */ + +#include <linux/kernel.h> +#include <linux/debugfs.h> +#include "mtk_eth_soc.h" + +struct mtk_flow_addr_info +{ + void *src, *dest; + u16 *src_port, *dest_port; + bool ipv6; +}; + +static const char *mtk_foe_entry_state_str(int state) +{ + static const char * const state_str[] = { + [MTK_FOE_STATE_INVALID] = "INV", + [MTK_FOE_STATE_UNBIND] = "UNB", + [MTK_FOE_STATE_BIND] = "BND", + [MTK_FOE_STATE_FIN] = "FIN", + }; + + if (state >= ARRAY_SIZE(state_str) || !state_str[state]) + return "UNK"; + + return state_str[state]; +} + +static const char *mtk_foe_pkt_type_str(int type) +{ + static const char * const type_str[] = { + [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T", + [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T", + [MTK_PPE_PKT_TYPE_BRIDGE] = "L2", + [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE", + [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T", + [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T", + [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD", + }; + + if (type >= ARRAY_SIZE(type_str) || !type_str[type]) + return "UNKNOWN"; + + return type_str[type]; +} + +static void +mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6) +{ + u32 n_addr[4]; + int i; + + if (!ipv6) { + seq_printf(m, "%pI4h", addr); + return; + } + + for (i = 0; i < ARRAY_SIZE(n_addr); i++) + n_addr[i] = htonl(addr[i]); + seq_printf(m, "%pI6", n_addr); +} + +static void +mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai) +{ + mtk_print_addr(m, ai->src, ai->ipv6); + if (ai->src_port) + seq_printf(m, ":%d", *ai->src_port); + seq_printf(m, "->"); + mtk_print_addr(m, ai->dest, ai->ipv6); + if (ai->dest_port) + seq_printf(m, ":%d", *ai->dest_port); +} + +static int +mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind) +{ + struct mtk_ppe *ppe = m->private; + int i; + + for (i = 0; i < MTK_PPE_ENTRIES; i++) { + struct mtk_foe_entry *entry = &ppe->foe_table[i]; + struct mtk_foe_mac_info *l2; + struct mtk_flow_addr_info ai = {}; + unsigned char h_source[ETH_ALEN]; + unsigned char h_dest[ETH_ALEN]; + int type, state; + u32 ib2; + + + state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1); + if (!state) + continue; + + if (bind && state != MTK_FOE_STATE_BIND) + continue; + + type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1); + seq_printf(m, "%05x %s %7s", i, + mtk_foe_entry_state_str(state), + mtk_foe_pkt_type_str(type)); + + switch (type) { + case MTK_PPE_PKT_TYPE_IPV4_HNAPT: + case MTK_PPE_PKT_TYPE_IPV4_DSLITE: + ai.src_port = &entry->ipv4.orig.src_port; + ai.dest_port = &entry->ipv4.orig.dest_port; + fallthrough; + case MTK_PPE_PKT_TYPE_IPV4_ROUTE: + ai.src = &entry->ipv4.orig.src_ip; + ai.dest = &entry->ipv4.orig.dest_ip; + break; + case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T: + ai.src_port = &entry->ipv6.src_port; + ai.dest_port = &entry->ipv6.dest_port; + fallthrough; + case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T: + case MTK_PPE_PKT_TYPE_IPV6_6RD: + ai.src = &entry->ipv6.src_ip; + ai.dest = &entry->ipv6.dest_ip; + ai.ipv6 = true; + break; + } + + seq_printf(m, " orig="); + mtk_print_addr_info(m, &ai); + + switch (type) { + case MTK_PPE_PKT_TYPE_IPV4_HNAPT: + case MTK_PPE_PKT_TYPE_IPV4_DSLITE: + ai.src_port = &entry->ipv4.new.src_port; + ai.dest_port = &entry->ipv4.new.dest_port; + fallthrough; + case MTK_PPE_PKT_TYPE_IPV4_ROUTE: + ai.src = &entry->ipv4.new.src_ip; + ai.dest = &entry->ipv4.new.dest_ip; + seq_printf(m, " new="); + mtk_print_addr_info(m, &ai); + break; + } + + if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) { + l2 = &entry->ipv6.l2; + ib2 = entry->ipv6.ib2; + } else { + l2 = &entry->ipv4.l2; + ib2 = entry->ipv4.ib2; + } + + *((__be32 *)h_source) = htonl(l2->src_mac_hi); + *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo); + *((__be32 *)h_dest) = htonl(l2->dest_mac_hi); + *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo); + + seq_printf(m, " eth=%pM->%pM etype=%04x" + " vlan=%d,%d ib1=%08x ib2=%08x\n", + h_source, h_dest, ntohs(l2->etype), + l2->vlan1, l2->vlan2, entry->ib1, ib2); + } + + return 0; +} + +static int +mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private) +{ + return mtk_ppe_debugfs_foe_show(m, private, false); +} + +static int +mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private) +{ + return mtk_ppe_debugfs_foe_show(m, private, true); +} + +static int +mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file) +{ + return single_open(file, mtk_ppe_debugfs_foe_show_all, + inode->i_private); +} + +static int +mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file) +{ + return single_open(file, mtk_ppe_debugfs_foe_show_bind, + inode->i_private); +} + +int mtk_ppe_debugfs_init(struct mtk_ppe *ppe) +{ + static const struct file_operations fops_all = { + .open = mtk_ppe_debugfs_foe_open_all, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + }; + + static const struct file_operations fops_bind = { + .open = mtk_ppe_debugfs_foe_open_bind, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + }; + + struct dentry *root; + + root = debugfs_create_dir("mtk_ppe", NULL); + if (!root) + return -ENOMEM; + + debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all); + debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind); + + return 0; +} diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c new file mode 100644 index 000000000000..b5f68f66d42a --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c @@ -0,0 +1,495 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> + */ + +#include <linux/if_ether.h> +#include <linux/rhashtable.h> +#include <linux/ip.h> +#include <net/flow_offload.h> +#include <net/pkt_cls.h> +#include <net/dsa.h> +#include "mtk_eth_soc.h" + +struct mtk_flow_data { + struct ethhdr eth; + + union { + struct { + __be32 src_addr; + __be32 dst_addr; + } v4; + }; + + __be16 src_port; + __be16 dst_port; + + struct { + u16 id; + __be16 proto; + u8 num; + } vlan; + struct { + u16 sid; + u8 num; + } pppoe; +}; + +struct mtk_flow_entry { + struct rhash_head node; + unsigned long cookie; + u16 hash; +}; + +static const struct rhashtable_params mtk_flow_ht_params = { + .head_offset = offsetof(struct mtk_flow_entry, node), + .key_offset = offsetof(struct mtk_flow_entry, cookie), + .key_len = sizeof(unsigned long), + .automatic_shrinking = true, +}; + +static u32 +mtk_eth_timestamp(struct mtk_eth *eth) +{ + return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP; +} + +static int +mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data, + bool egress) +{ + return mtk_foe_entry_set_ipv4_tuple(foe, egress, + data->v4.src_addr, data->src_port, + data->v4.dst_addr, data->dst_port); +} + +static void +mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth) +{ + void *dest = eth + act->mangle.offset; + const void *src = &act->mangle.val; + + if (act->mangle.offset > 8) + return; + + if (act->mangle.mask == 0xffff) { + src += 2; + dest += 2; + } + + memcpy(dest, src, act->mangle.mask ? 2 : 4); +} + + +static int +mtk_flow_mangle_ports(const struct flow_action_entry *act, + struct mtk_flow_data *data) +{ + u32 val = ntohl(act->mangle.val); + + switch (act->mangle.offset) { + case 0: + if (act->mangle.mask == ~htonl(0xffff)) + data->dst_port = cpu_to_be16(val); + else + data->src_port = cpu_to_be16(val >> 16); + break; + case 2: + data->dst_port = cpu_to_be16(val); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +mtk_flow_mangle_ipv4(const struct flow_action_entry *act, + struct mtk_flow_data *data) +{ + __be32 *dest; + + switch (act->mangle.offset) { + case offsetof(struct iphdr, saddr): + dest = &data->v4.src_addr; + break; + case offsetof(struct iphdr, daddr): + dest = &data->v4.dst_addr; + break; + default: + return -EINVAL; + } + + memcpy(dest, &act->mangle.val, sizeof(u32)); + + return 0; +} + +static int +mtk_flow_get_dsa_port(struct net_device **dev) +{ +#if IS_ENABLED(CONFIG_NET_DSA) + struct dsa_port *dp; + + dp = dsa_port_from_netdev(*dev); + if (IS_ERR(dp)) + return -ENODEV; + + if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK) + return -ENODEV; + + *dev = dp->cpu_dp->master; + + return dp->index; +#else + return -ENODEV; +#endif +} + +static int +mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe, + struct net_device *dev) +{ + int pse_port, dsa_port; + + dsa_port = mtk_flow_get_dsa_port(&dev); + if (dsa_port >= 0) + mtk_foe_entry_set_dsa(foe, dsa_port); + + if (dev == eth->netdev[0]) + pse_port = 1; + else if (dev == eth->netdev[1]) + pse_port = 2; + else + return -EOPNOTSUPP; + + mtk_foe_entry_set_pse_port(foe, pse_port); + + return 0; +} + +static int +mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_action_entry *act; + struct mtk_flow_data data = {}; + struct mtk_foe_entry foe; + struct net_device *odev = NULL; + struct mtk_flow_entry *entry; + int offload_type = 0; + u16 addr_type = 0; + u32 timestamp; + u8 l4proto = 0; + int err = 0; + int hash; + int i; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { + struct flow_match_meta match; + + flow_rule_match_meta(rule, &match); + } else { + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type; + } else { + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + l4proto = match.key->ip_proto; + } else { + return -EOPNOTSUPP; + } + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_MANGLE: + if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH) + mtk_flow_offload_mangle_eth(act, &data.eth); + break; + case FLOW_ACTION_REDIRECT: + odev = act->dev; + break; + case FLOW_ACTION_CSUM: + break; + case FLOW_ACTION_VLAN_PUSH: + if (data.vlan.num == 1 || + act->vlan.proto != htons(ETH_P_8021Q)) + return -EOPNOTSUPP; + + data.vlan.id = act->vlan.vid; + data.vlan.proto = act->vlan.proto; + data.vlan.num++; + break; + case FLOW_ACTION_VLAN_POP: + break; + case FLOW_ACTION_PPPOE_PUSH: + if (data.pppoe.num == 1) + return -EOPNOTSUPP; + + data.pppoe.sid = act->pppoe.sid; + data.pppoe.num++; + break; + default: + return -EOPNOTSUPP; + } + } + + switch (addr_type) { + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT; + break; + default: + return -EOPNOTSUPP; + } + + if (!is_valid_ether_addr(data.eth.h_source) || + !is_valid_ether_addr(data.eth.h_dest)) + return -EINVAL; + + err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0, + data.eth.h_source, + data.eth.h_dest); + if (err) + return err; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports ports; + + flow_rule_match_ports(rule, &ports); + data.src_port = ports.key->src; + data.dst_port = ports.key->dst; + } else { + return -EOPNOTSUPP; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs addrs; + + flow_rule_match_ipv4_addrs(rule, &addrs); + + data.v4.src_addr = addrs.key->src; + data.v4.dst_addr = addrs.key->dst; + + mtk_flow_set_ipv4_addr(&foe, &data, false); + } + + flow_action_for_each(i, act, &rule->action) { + if (act->id != FLOW_ACTION_MANGLE) + continue; + + switch (act->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + err = mtk_flow_mangle_ports(act, &data); + break; + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + err = mtk_flow_mangle_ipv4(act, &data); + break; + case FLOW_ACT_MANGLE_HDR_TYPE_ETH: + /* handled earlier */ + break; + default: + return -EOPNOTSUPP; + } + + if (err) + return err; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + err = mtk_flow_set_ipv4_addr(&foe, &data, true); + if (err) + return err; + } + + if (data.vlan.num == 1) { + if (data.vlan.proto != htons(ETH_P_8021Q)) + return -EOPNOTSUPP; + + mtk_foe_entry_set_vlan(&foe, data.vlan.id); + } + if (data.pppoe.num == 1) + mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid); + + err = mtk_flow_set_output_device(eth, &foe, odev); + if (err) + return err; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->cookie = f->cookie; + timestamp = mtk_eth_timestamp(eth); + hash = mtk_foe_entry_commit(ð->ppe, &foe, timestamp); + if (hash < 0) { + err = hash; + goto free; + } + + entry->hash = hash; + err = rhashtable_insert_fast(ð->flow_table, &entry->node, + mtk_flow_ht_params); + if (err < 0) + goto clear_flow; + + return 0; +clear_flow: + mtk_foe_entry_clear(ð->ppe, hash); +free: + kfree(entry); + return err; +} + +static int +mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f) +{ + struct mtk_flow_entry *entry; + + entry = rhashtable_lookup(ð->flow_table, &f->cookie, + mtk_flow_ht_params); + if (!entry) + return -ENOENT; + + mtk_foe_entry_clear(ð->ppe, entry->hash); + rhashtable_remove_fast(ð->flow_table, &entry->node, + mtk_flow_ht_params); + kfree(entry); + + return 0; +} + +static int +mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f) +{ + struct mtk_flow_entry *entry; + int timestamp; + u32 idle; + + entry = rhashtable_lookup(ð->flow_table, &f->cookie, + mtk_flow_ht_params); + if (!entry) + return -ENOENT; + + timestamp = mtk_foe_entry_timestamp(ð->ppe, entry->hash); + if (timestamp < 0) + return -ETIMEDOUT; + + idle = mtk_eth_timestamp(eth) - timestamp; + f->stats.lastused = jiffies - idle * HZ; + + return 0; +} + +static DEFINE_MUTEX(mtk_flow_offload_mutex); + +static int +mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ + struct flow_cls_offload *cls = type_data; + struct net_device *dev = cb_priv; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int err; + + if (!tc_can_offload(dev)) + return -EOPNOTSUPP; + + if (type != TC_SETUP_CLSFLOWER) + return -EOPNOTSUPP; + + mutex_lock(&mtk_flow_offload_mutex); + switch (cls->command) { + case FLOW_CLS_REPLACE: + err = mtk_flow_offload_replace(eth, cls); + break; + case FLOW_CLS_DESTROY: + err = mtk_flow_offload_destroy(eth, cls); + break; + case FLOW_CLS_STATS: + err = mtk_flow_offload_stats(eth, cls); + break; + default: + err = -EOPNOTSUPP; + break; + } + mutex_unlock(&mtk_flow_offload_mutex); + + return err; +} + +static int +mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f) +{ + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + static LIST_HEAD(block_cb_list); + struct flow_block_cb *block_cb; + flow_setup_cb_t *cb; + + if (!eth->ppe.foe_table) + return -EOPNOTSUPP; + + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + cb = mtk_eth_setup_tc_block_cb; + f->driver_block_list = &block_cb_list; + + switch (f->command) { + case FLOW_BLOCK_BIND: + block_cb = flow_block_cb_lookup(f->block, cb, dev); + if (block_cb) { + flow_block_cb_incref(block_cb); + return 0; + } + block_cb = flow_block_cb_alloc(cb, dev, dev, NULL); + if (IS_ERR(block_cb)) + return PTR_ERR(block_cb); + + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &block_cb_list); + return 0; + case FLOW_BLOCK_UNBIND: + block_cb = flow_block_cb_lookup(f->block, cb, dev); + if (!block_cb) + return -ENOENT; + + if (flow_block_cb_decref(block_cb)) { + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + } + return 0; + default: + return -EOPNOTSUPP; + } +} + +int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + if (type == TC_SETUP_FT) + return mtk_eth_setup_tc_block(dev, type_data); + + return -EOPNOTSUPP; +} + +int mtk_eth_offload_init(struct mtk_eth *eth) +{ + if (!eth->ppe.foe_table) + return 0; + + return rhashtable_init(ð->flow_table, &mtk_flow_ht_params); +} diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h new file mode 100644 index 000000000000..0c45ea0900f1 --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */ + +#ifndef __MTK_PPE_REGS_H +#define __MTK_PPE_REGS_H + +#define MTK_PPE_GLO_CFG 0x200 +#define MTK_PPE_GLO_CFG_EN BIT(0) +#define MTK_PPE_GLO_CFG_TSID_EN BIT(1) +#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2) +#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3) +#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4) +#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5) +#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6) +#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7) +#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8) +#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9) +#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10) +#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11) +#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12) +#define MTK_PPE_GLO_CFG_BUSY BIT(31) + +#define MTK_PPE_FLOW_CFG 0x204 +#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6) +#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7) +#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8) +#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9) +#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10) +#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12) +#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13) +#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14) +#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15) +#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16) +#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17) +#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18) +#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19) +#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20) + +#define MTK_PPE_IP_PROTO_CHK 0x208 +#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0) +#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16) + +#define MTK_PPE_TB_CFG 0x21c +#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0) +#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3) +#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4) +#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6) +#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7) +#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8) +#define MTK_PPE_TB_CFG_AGE_TCP BIT(9) +#define MTK_PPE_TB_CFG_AGE_UDP BIT(10) +#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11) +#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12) +#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14) +#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16) +#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18) + +enum { + MTK_PPE_SCAN_MODE_DISABLED, + MTK_PPE_SCAN_MODE_CHECK_AGE, + MTK_PPE_SCAN_MODE_KEEPALIVE_AGE, +}; + +enum { + MTK_PPE_KEEPALIVE_DISABLE, + MTK_PPE_KEEPALIVE_UNICAST_CPU, + MTK_PPE_KEEPALIVE_DUP_CPU = 3, +}; + +enum { + MTK_PPE_SEARCH_MISS_ACTION_DROP, + MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2, + MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3, +}; + +#define MTK_PPE_TB_BASE 0x220 + +#define MTK_PPE_TB_USED 0x224 +#define MTK_PPE_TB_USED_NUM GENMASK(13, 0) + +#define MTK_PPE_BIND_RATE 0x228 +#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0) +#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16) + +#define MTK_PPE_BIND_LIMIT0 0x22c +#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0) +#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16) + +#define MTK_PPE_BIND_LIMIT1 0x230 +#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0) +#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16) + +#define MTK_PPE_KEEPALIVE 0x234 +#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0) +#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16) +#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24) + +#define MTK_PPE_UNBIND_AGE 0x238 +#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16) +#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0) + +#define MTK_PPE_BIND_AGE0 0x23c +#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16) +#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0) + +#define MTK_PPE_BIND_AGE1 0x240 +#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16) +#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0) + +#define MTK_PPE_HASH_SEED 0x244 + +#define MTK_PPE_DEFAULT_CPU_PORT 0x248 +#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4)) + +#define MTK_PPE_MTU_DROP 0x308 + +#define MTK_PPE_VLAN_MTU0 0x30c +#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0) +#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16) + +#define MTK_PPE_VLAN_MTU1 0x310 +#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0) +#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16) + +#define MTK_PPE_VPM_TPID 0x318 + +#define MTK_PPE_CACHE_CTL 0x320 +#define MTK_PPE_CACHE_CTL_EN BIT(0) +#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4) +#define MTK_PPE_CACHE_CTL_REQ BIT(8) +#define MTK_PPE_CACHE_CTL_CLEAR BIT(9) +#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12) + +#define MTK_PPE_MIB_CFG 0x334 +#define MTK_PPE_MIB_CFG_EN BIT(0) +#define MTK_PPE_MIB_CFG_RD_CLR BIT(1) + +#define MTK_PPE_MIB_TB_BASE 0x338 + +#define MTK_PPE_MIB_CACHE_CTL 0x350 +#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0) +#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2) + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index c678344d22a2..8d751383530b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2241,43 +2241,52 @@ void mlx4_master_comm_channel(struct work_struct *work) struct mlx4_priv *priv = container_of(mfunc, struct mlx4_priv, mfunc); struct mlx4_dev *dev = &priv->dev; - __be32 *bit_vec; + u32 lbit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE]; + u32 nmbr_bits; u32 comm_cmd; - u32 vec; - int i, j, slave; + int i, slave; int toggle; + bool first = true; int served = 0; int reported = 0; u32 slt; - bit_vec = master->comm_arm_bit_vector; - for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) { - vec = be32_to_cpu(bit_vec[i]); - for (j = 0; j < 32; j++) { - if (!(vec & (1 << j))) - continue; - ++reported; - slave = (i * 32) + j; - comm_cmd = swab32(readl( - &mfunc->comm[slave].slave_write)); - slt = swab32(readl(&mfunc->comm[slave].slave_read)) - >> 31; - toggle = comm_cmd >> 31; - if (toggle != slt) { - if (master->slave_state[slave].comm_toggle - != slt) { - pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n", - slave, slt, - master->slave_state[slave].comm_toggle); - master->slave_state[slave].comm_toggle = - slt; - } - mlx4_master_do_cmd(dev, slave, - comm_cmd >> 16 & 0xff, - comm_cmd & 0xffff, toggle); - ++served; + for (i = 0; i < COMM_CHANNEL_BIT_ARRAY_SIZE; i++) + lbit_vec[i] = be32_to_cpu(master->comm_arm_bit_vector[i]); + nmbr_bits = dev->persist->num_vfs + 1; + if (++master->next_slave >= nmbr_bits) + master->next_slave = 0; + slave = master->next_slave; + while (true) { + slave = find_next_bit((const unsigned long *)&lbit_vec, nmbr_bits, slave); + if (!first && slave >= master->next_slave) + break; + if (slave == nmbr_bits) { + if (!first) + break; + first = false; + slave = 0; + continue; + } + ++reported; + comm_cmd = swab32(readl(&mfunc->comm[slave].slave_write)); + slt = swab32(readl(&mfunc->comm[slave].slave_read)) >> 31; + toggle = comm_cmd >> 31; + if (toggle != slt) { + if (master->slave_state[slave].comm_toggle + != slt) { + pr_info("slave %d out of sync. read toggle %d, state toggle %d. Resynching.\n", + slave, slt, + master->slave_state[slave].comm_toggle); + master->slave_state[slave].comm_toggle = + slt; } + mlx4_master_do_cmd(dev, slave, + comm_cmd >> 16 & 0xff, + comm_cmd & 0xffff, toggle); + ++served; } + slave++; } if (reported && reported != served) @@ -2389,6 +2398,8 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (!priv->mfunc.master.vf_oper) goto err_comm_oper; + priv->mfunc.master.next_slave = 0; + for (i = 0; i < dev->num_slaves; ++i) { vf_admin = &priv->mfunc.master.vf_admin[i]; vf_oper = &priv->mfunc.master.vf_oper[i]; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 64bed7ac3836..6ccf340660d9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -603,6 +603,7 @@ struct mlx4_mfunc_master_ctx { struct mlx4_slave_event_eq slave_eq; struct mutex gen_eqe_mutex[MLX4_MFUNC_MAX]; struct mlx4_qos_manager qos_ctl[MLX4_MAX_PORTS + 1]; + u32 next_slave; /* mlx4_master_comm_channel */ }; struct mlx4_mfunc { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 9d623e38d783..461a43f338e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -104,6 +104,18 @@ config MLX5_TC_CT If unsure, set to Y +config MLX5_TC_SAMPLE + bool "MLX5 TC sample offload support" + depends on MLX5_CLS_ACT + default y + help + Say Y here if you want to support offloading sample rules via tc + sample action. + If set to N, will not be able to configure tc rules with sample + action. + + If unsure, set to Y + config MLX5_CORE_EN_DCB bool "Data Center Bridging (DCB) Support" default y diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 8cb2625472c3..a1223e904190 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -27,7 +27,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ - en/qos.o en/trap.o + en/qos.o en/trap.o en/fs_tt_redirect.o # # Netdev extra @@ -37,9 +37,10 @@ mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \ - en_rep.o en/rep/bond.o en/mod_hdr.o + en_rep.o en/rep/bond.o en/mod_hdr.o \ + en/mapping.o mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ - en/mapping.o lib/fs_chains.o en/tc_tun.o \ + lib/fs_chains.o en/tc_tun.o \ esw/indir_table.o en/tc_tun_encap.o \ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o @@ -49,11 +50,12 @@ mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o # Core extra # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ - ecpf.o rdma.o + ecpf.o rdma.o esw/legacy.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \ - esw/devlink_port.o + esw/devlink_port.o esw/vporttbl.o +mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += esw/sample.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e8cecd50558d..9d79c5ec31e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -263,15 +263,15 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent) return 0; } -static void dump_buf(void *buf, int size, int data_only, int offset) +static void dump_buf(void *buf, int size, int data_only, int offset, int idx) { __be32 *p = buf; int i; for (i = 0; i < size; i += 16) { - pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]), - be32_to_cpu(p[1]), be32_to_cpu(p[2]), - be32_to_cpu(p[3])); + pr_debug("cmd[%d]: %03x: %08x %08x %08x %08x\n", idx, offset, + be32_to_cpu(p[0]), be32_to_cpu(p[1]), + be32_to_cpu(p[2]), be32_to_cpu(p[3])); p += 4; offset += 16; } @@ -802,39 +802,41 @@ static void dump_command(struct mlx5_core_dev *dev, int dump_len; int i; + mlx5_core_dbg(dev, "cmd[%d]: start dump\n", ent->idx); data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA)); if (data_only) mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA, - "dump command data %s(0x%x) %s\n", - mlx5_command_str(op), op, + "cmd[%d]: dump command data %s(0x%x) %s\n", + ent->idx, mlx5_command_str(op), op, input ? "INPUT" : "OUTPUT"); else - mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n", - mlx5_command_str(op), op, + mlx5_core_dbg(dev, "cmd[%d]: dump command %s(0x%x) %s\n", + ent->idx, mlx5_command_str(op), op, input ? "INPUT" : "OUTPUT"); if (data_only) { if (input) { - dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset); + dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset, ent->idx); offset += sizeof(ent->lay->in); } else { - dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset); + dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset, ent->idx); offset += sizeof(ent->lay->out); } } else { - dump_buf(ent->lay, sizeof(*ent->lay), 0, offset); + dump_buf(ent->lay, sizeof(*ent->lay), 0, offset, ent->idx); offset += sizeof(*ent->lay); } for (i = 0; i < n && next; i++) { if (data_only) { dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset); - dump_buf(next->buf, dump_len, 1, offset); + dump_buf(next->buf, dump_len, 1, offset, ent->idx); offset += MLX5_CMD_DATA_BLOCK_SIZE; } else { - mlx5_core_dbg(dev, "command block:\n"); - dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset); + mlx5_core_dbg(dev, "cmd[%d]: command block:\n", ent->idx); + dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset, + ent->idx); offset += sizeof(struct mlx5_cmd_prot_block); } next = next->next; @@ -842,6 +844,8 @@ static void dump_command(struct mlx5_core_dev *dev, if (data_only) pr_debug("\n"); + + mlx5_core_dbg(dev, "cmd[%d]: end dump\n", ent->idx); } static u16 msg_to_opcode(struct mlx5_cmd_msg *in) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index b051417ede67..a9166cd85013 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -58,9 +58,6 @@ static bool is_eth_supported(struct mlx5_core_dev *dev) if (!IS_ENABLED(CONFIG_MLX5_CORE_EN)) return false; - if (is_eth_rep_supported(dev)) - return false; - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return false; @@ -191,12 +188,12 @@ static bool is_ib_supported(struct mlx5_core_dev *dev) } enum { - MLX5_INTERFACE_PROTOCOL_ETH_REP, MLX5_INTERFACE_PROTOCOL_ETH, + MLX5_INTERFACE_PROTOCOL_ETH_REP, + MLX5_INTERFACE_PROTOCOL_IB, MLX5_INTERFACE_PROTOCOL_IB_REP, MLX5_INTERFACE_PROTOCOL_MPIB, - MLX5_INTERFACE_PROTOCOL_IB, MLX5_INTERFACE_PROTOCOL_VNET, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index d7d8a68ef23d..44c458443428 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -137,18 +137,18 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, * unregistering devlink instance while holding devlink_mutext. * Hence, do not support reload. */ - NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated\n"); + NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated"); return -EOPNOTSUPP; } if (mlx5_lag_is_active(dev)) { - NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode\n"); + NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode"); return -EOPNOTSUPP; } switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: - mlx5_unload_one(dev, false); + mlx5_unload_one(dev); return 0; case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) @@ -170,13 +170,13 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a *actions_performed = BIT(action); switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: - return mlx5_load_one(dev, false); + return mlx5_load_one(dev); case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) break; /* On fw_activate action, also driver is reloaded and reinit performed */ *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); - return mlx5_load_one(dev, false); + return mlx5_load_one(dev); default: /* Unsupported action should not get to this function */ WARN_ON(1); @@ -246,6 +246,11 @@ static int mlx5_devlink_trap_action_set(struct devlink *devlink, struct mlx5_devlink_trap *dl_trap; int err = 0; + if (is_mdev_switchdev_mode(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Devlink traps can't be set in switchdev mode"); + return -EOPNOTSUPP; + } + dl_trap = mlx5_find_trap_by_id(dev, trap->id); if (!dl_trap) { mlx5_core_err(dev, "Devlink trap: Set action on invalid trap id 0x%x", trap->id); @@ -456,6 +461,50 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id return 0; } + +static int mlx5_devlink_esw_port_metadata_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + return mlx5_esw_offloads_vport_metadata_set(dev->priv.eswitch, ctx->val.vbool); +} + +static int mlx5_devlink_esw_port_metadata_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + ctx->val.vbool = mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch); + return 0; +} + +static int mlx5_devlink_esw_port_metadata_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u8 esw_mode; + + if (!MLX5_ESWITCH_MANAGER(dev)) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported"); + return -EOPNOTSUPP; + } + esw_mode = mlx5_eswitch_mode(dev); + if (esw_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch must either disabled or non switchdev mode"); + return -EBUSY; + } + return 0; +} + #endif static int mlx5_devlink_enable_remote_dev_reset_set(struct devlink *devlink, u32 id, @@ -490,6 +539,12 @@ static const struct devlink_param mlx5_devlink_params[] = { BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_large_group_num_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, + "esw_port_metadata", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_esw_port_metadata_get, + mlx5_devlink_esw_port_metadata_set, + mlx5_devlink_esw_port_metadata_validate), #endif DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME), mlx5_devlink_enable_remote_dev_reset_get, @@ -519,6 +574,18 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) devlink_param_driverinit_value_set(devlink, MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, value); + + if (MLX5_ESWITCH_MANAGER(dev)) { + if (mlx5_esw_vport_match_metadata_supported(dev->priv.eswitch)) { + dev->priv.eswitch->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + value.vbool = true; + } else { + value.vbool = false; + } + devlink_param_driverinit_value_set(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, + value); + } #endif } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index eff107dad922..7318d44b774b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -10,6 +10,7 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, }; struct mlx5_trap_ctx { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 2eb022ad7fd0..01a1d02dcf15 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -1100,7 +1100,7 @@ int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer) int err; if (IS_ERR_OR_NULL(tracer)) - return -EINVAL; + return 0; dev = tracer->dev; mlx5_fw_tracer_cleanup(tracer); @@ -1126,8 +1126,7 @@ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void switch (eqe->sub_type) { case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE: - if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) - queue_work(tracer->work_queue, &tracer->ownership_change_work); + queue_work(tracer->work_queue, &tracer->ownership_change_work); break; case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE: if (likely(tracer->str_db.loaded)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7435fe6829b6..b636d63358d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -92,14 +92,15 @@ struct page_pool; MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) -#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) +#define MLX5_ALIGN_MTTS(mtts) (ALIGN(mtts, 8)) +#define MLX5_ALIGNED_MTTS_OCTW(mtts) ((mtts) / 2) +#define MLX5_MTT_OCTW(mtts) (MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts))) /* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between * WQEs, This page will absorb write overflow by the hardware, when * receiving packets larger than MTU. These oversize packets are * dropped by the driver at a later stage. */ -#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE + 1, 8)) -#define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS)) +#define MLX5E_REQUIRED_WQE_MTTS (MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1)) #define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS) #define MLX5E_MAX_RQ_NUM_MTTS \ ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ @@ -268,6 +269,7 @@ struct mlx5e_params { struct mlx5e_xsk *xsk; unsigned int sw_mtu; int hard_mtu; + bool ptp_rx; }; enum { @@ -323,9 +325,9 @@ enum { MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, - MLX5E_SQ_STATE_TLS, MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, MLX5E_SQ_STATE_PENDING_XSK_TX, + MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, }; struct mlx5e_tx_mpwqe { @@ -498,6 +500,8 @@ struct mlx5e_xdpsq { struct mlx5e_channel *channel; } ____cacheline_aligned_in_smp; +struct mlx5e_ktls_resync_resp; + struct mlx5e_icosq { /* data path */ u16 cc; @@ -515,7 +519,9 @@ struct mlx5e_icosq { struct mlx5_wq_cyc wq; void __iomem *uar_map; u32 sqn; + u16 reserved_room; unsigned long state; + struct mlx5e_ktls_resync_resp *ktls_resync; /* control path */ struct mlx5_wq_ctrl wq_ctrl; @@ -706,11 +712,11 @@ struct mlx5e_channel { int cpu; }; -struct mlx5e_port_ptp; +struct mlx5e_ptp; struct mlx5e_channels { struct mlx5e_channel **c; - struct mlx5e_port_ptp *port_ptp; + struct mlx5e_ptp *ptp; unsigned int num; struct mlx5e_params params; }; @@ -725,10 +731,11 @@ struct mlx5e_channel_stats { struct mlx5e_xdpsq_stats xsksq; } ____cacheline_aligned_in_smp; -struct mlx5e_port_ptp_stats { +struct mlx5e_ptp_stats { struct mlx5e_ch_stats ch; struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; struct mlx5e_ptp_cq_stats cq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq_stats rq; } ____cacheline_aligned_in_smp; enum { @@ -835,6 +842,7 @@ struct mlx5e_priv { struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_tir ptp_tir; struct mlx5e_rss_params rss_params; u32 tx_rates[MLX5E_MAX_NUM_SQS]; @@ -854,10 +862,11 @@ struct mlx5e_priv { struct mlx5e_stats stats; struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_channel_stats trap_stats; - struct mlx5e_port_ptp_stats port_ptp_stats; + struct mlx5e_ptp_stats ptp_stats; u16 max_nch; u8 max_opened_tc; - bool port_ptp_opened; + bool tx_ptp_opened; + bool rx_ptp_opened; struct hwtstamp_config tstamp; u16 q_counter; u16 drop_rq_q_counter; @@ -880,7 +889,6 @@ struct mlx5e_priv { #endif struct devlink_health_reporter *tx_reporter; struct devlink_health_reporter *rx_reporter; - struct devlink_port dl_port; struct mlx5e_xsk xsk; #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) struct mlx5e_hv_vhca_stats_agent stats_agent; @@ -914,13 +922,12 @@ struct mlx5e_profile { const struct mlx5e_rx_handlers *rx_handlers; int max_tc; u8 rq_groups; + bool rx_ptp_support; }; void mlx5e_build_ptys2ethtool_map(void); bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); -bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); @@ -963,9 +970,9 @@ struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types struct mlx5e_xsk_param; struct mlx5e_rq_param; -int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, - struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq); +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, + struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_rq *rq); int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); void mlx5e_close_rq(struct mlx5e_rq *rq); @@ -1011,27 +1018,20 @@ int fn##_ctx(struct mlx5e_priv *priv, void *context) \ return fn(priv); \ } int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv); -int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *new_chs, - mlx5e_fp_preactivate preactivate, - void *context); +int mlx5e_safe_switch_params(struct mlx5e_priv *priv, + struct mlx5e_params *new_params, + mlx5e_fp_preactivate preactivate, + void *context, bool reset); int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv); int mlx5e_num_channels_changed(struct mlx5e_priv *priv); int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); +int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx); void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels); -void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); - -void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state); void mlx5e_activate_rq(struct mlx5e_rq *rq); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); @@ -1090,10 +1090,10 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); @@ -1174,10 +1174,9 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, const struct mlx5e_profile *new_profile, void *new_ppriv); +void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); -void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels); void mlx5e_rx_dim_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index a69c62d72d16..0dd7615e5931 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -2,37 +2,70 @@ /* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ #include "en/devlink.h" +#include "eswitch.h" + +static void +mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid) +{ + u64 parent_id; + + parent_id = mlx5_query_nic_system_image_guid(dev); + ppid->id_len = sizeof(parent_id); + memcpy(ppid->id, &parent_id, sizeof(parent_id)); +} int mlx5e_devlink_port_register(struct mlx5e_priv *priv) { struct devlink *devlink = priv_to_devlink(priv->mdev); struct devlink_port_attrs attrs = {}; + struct netdev_phys_item_id ppid = {}; + struct devlink_port *dl_port; + unsigned int dl_port_index; if (mlx5_core_is_pf(priv->mdev)) { attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; attrs.phys.port_number = PCI_FUNC(priv->mdev->pdev->devfn); + if (MLX5_ESWITCH_MANAGER(priv->mdev)) { + mlx5e_devlink_get_port_parent_id(priv->mdev, &ppid); + memcpy(attrs.switch_id.id, ppid.id, ppid.id_len); + attrs.switch_id.id_len = ppid.id_len; + } + dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, + MLX5_VPORT_UPLINK); } else { attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL; + dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, 0); } - devlink_port_attrs_set(&priv->dl_port, &attrs); + dl_port = mlx5e_devlink_get_dl_port(priv); + memset(dl_port, 0, sizeof(*dl_port)); + devlink_port_attrs_set(dl_port, &attrs); - return devlink_port_register(devlink, &priv->dl_port, 1); + return devlink_port_register(devlink, dl_port, dl_port_index); } void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv) { - devlink_port_type_eth_set(&priv->dl_port, priv->netdev); + struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); + + devlink_port_type_eth_set(dl_port, priv->netdev); } void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) { - devlink_port_unregister(&priv->dl_port); + struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); + + if (dl_port->registered) + devlink_port_unregister(dl_port); } struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); + struct devlink_port *port; - return &priv->dl_port; + port = mlx5e_devlink_get_dl_port(priv); + if (port->registered) + return port; + return NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h index 83123a801adc..10b50feb9883 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h @@ -12,4 +12,10 @@ void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv); void mlx5e_devlink_port_type_eth_set(struct mlx5e_priv *priv); struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev); +static inline struct devlink_port * +mlx5e_devlink_get_dl_port(struct mlx5e_priv *priv) +{ + return &priv->mdev->mlx5e_res.dl_port; +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index a16297e7e2ac..1d5ce07b83f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -29,6 +29,7 @@ struct mlx5e_tc_table { struct netdev_net_notifier netdevice_nn; struct mlx5_tc_ct_priv *ct; + struct mapping_ctx *mapping; }; struct mlx5e_flow_table { @@ -49,18 +50,10 @@ struct mlx5e_promisc_table { struct mlx5_flow_handle *rule; }; -struct mlx5e_vlan_table { - struct mlx5e_flow_table ft; - DECLARE_BITMAP(active_cvlans, VLAN_N_VID); - DECLARE_BITMAP(active_svlans, VLAN_N_VID); - struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID]; - struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID]; - struct mlx5_flow_handle *untagged_rule; - struct mlx5_flow_handle *any_cvlan_rule; - struct mlx5_flow_handle *any_svlan_rule; - struct mlx5_flow_handle *trap_rule; - bool cvlan_filter_disabled; -}; +/* Forward declaration and APIs to get private fields of vlan_table */ +struct mlx5e_vlan_table; +unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan); +struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan); struct mlx5e_l2_table { struct mlx5e_flow_table ft; @@ -137,11 +130,13 @@ enum { MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, MLX5E_INNER_TTC_FT_LEVEL, + MLX5E_FS_TT_UDP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, + MLX5E_FS_TT_ANY_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, #ifdef CONFIG_MLX5_EN_TLS - MLX5E_ACCEL_FS_TCP_FT_LEVEL, + MLX5E_ACCEL_FS_TCP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, #endif #ifdef CONFIG_MLX5_EN_ARFS - MLX5E_ARFS_FT_LEVEL, + MLX5E_ARFS_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, #endif #ifdef CONFIG_MLX5_EN_IPSEC MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, @@ -198,31 +193,7 @@ static inline int mlx5e_ethtool_get_rxnfc(struct net_device *dev, #endif /* CONFIG_MLX5_EN_RXNFC */ #ifdef CONFIG_MLX5_EN_ARFS -#define ARFS_HASH_SHIFT BITS_PER_BYTE -#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE) - -struct arfs_table { - struct mlx5e_flow_table ft; - struct mlx5_flow_handle *default_rule; - struct hlist_head rules_hash[ARFS_HASH_SIZE]; -}; - -enum arfs_type { - ARFS_IPV4_TCP, - ARFS_IPV6_TCP, - ARFS_IPV4_UDP, - ARFS_IPV6_UDP, - ARFS_NUM_TYPES, -}; - -struct mlx5e_arfs_tables { - struct arfs_table arfs_tables[ARFS_NUM_TYPES]; - /* Protect aRFS rules list */ - spinlock_t arfs_lock; - struct list_head rules; - int last_filter_id; - struct workqueue_struct *wq; -}; +struct mlx5e_arfs_tables; int mlx5e_arfs_create_tables(struct mlx5e_priv *priv); void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv); @@ -241,6 +212,10 @@ static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) { return -EOPNOTSU struct mlx5e_accel_fs_tcp; #endif +struct mlx5e_fs_udp; +struct mlx5e_fs_any; +struct mlx5e_ptp_fs; + struct mlx5e_flow_steering { struct mlx5_flow_namespace *ns; struct mlx5_flow_namespace *egress_ns; @@ -249,16 +224,19 @@ struct mlx5e_flow_steering { #endif struct mlx5e_tc_table tc; struct mlx5e_promisc_table promisc; - struct mlx5e_vlan_table vlan; + struct mlx5e_vlan_table *vlan; struct mlx5e_l2_table l2; struct mlx5e_ttc_table ttc; struct mlx5e_ttc_table inner_ttc; #ifdef CONFIG_MLX5_EN_ARFS - struct mlx5e_arfs_tables arfs; + struct mlx5e_arfs_tables *arfs; #endif #ifdef CONFIG_MLX5_EN_TLS struct mlx5e_accel_fs_tcp *accel_tcp; #endif + struct mlx5e_fs_udp *udp; + struct mlx5e_fs_any *any; + struct mlx5e_ptp_fs *ptp_fs; }; struct ttc_params { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c new file mode 100644 index 000000000000..909faa6c89d7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -0,0 +1,605 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include <linux/netdevice.h> +#include "en/fs_tt_redirect.h" +#include "fs_core.h" + +enum fs_udp_type { + FS_IPV4_UDP, + FS_IPV6_UDP, + FS_UDP_NUM_TYPES, +}; + +struct mlx5e_fs_udp { + struct mlx5e_flow_table tables[FS_UDP_NUM_TYPES]; + struct mlx5_flow_handle *default_rules[FS_UDP_NUM_TYPES]; + int ref_cnt; +}; + +struct mlx5e_fs_any { + struct mlx5e_flow_table table; + struct mlx5_flow_handle *default_rule; + int ref_cnt; +}; + +static char *fs_udp_type2str(enum fs_udp_type i) +{ + switch (i) { + case FS_IPV4_UDP: + return "UDP v4"; + default: /* FS_IPV6_UDP */ + return "UDP v6"; + } +} + +static enum mlx5e_traffic_types fs_udp2tt(enum fs_udp_type i) +{ + switch (i) { + case FS_IPV4_UDP: + return MLX5E_TT_IPV4_UDP; + default: /* FS_IPV6_UDP */ + return MLX5E_TT_IPV6_UDP; + } +} + +static enum fs_udp_type tt2fs_udp(enum mlx5e_traffic_types i) +{ + switch (i) { + case MLX5E_TT_IPV4_UDP: + return FS_IPV4_UDP; + case MLX5E_TT_IPV6_UDP: + return FS_IPV6_UDP; + default: + return FS_UDP_NUM_TYPES; + } +} + +void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule) +{ + mlx5_del_flow_rules(rule); +} + +static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type type, + u16 udp_dport) +{ + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, + type == FS_IPV4_UDP ? 4 : 6); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, udp_dport); +} + +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, + enum mlx5e_traffic_types ttc_type, + u32 tir_num, u16 d_port) +{ + enum fs_udp_type type = tt2fs_udp(ttc_type); + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_table *ft = NULL; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + struct mlx5e_fs_udp *fs_udp; + int err; + + if (type == FS_UDP_NUM_TYPES) + return ERR_PTR(-EINVAL); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + fs_udp = priv->fs.udp; + ft = fs_udp->tables[type].t; + + fs_udp_set_dport_flow(spec, type, d_port); + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tir_num; + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add %s rule failed, err %d\n", + __func__, fs_udp_type2str(type), err); + } + return rule; +} + +static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type type) +{ + struct mlx5e_flow_table *fs_udp_t; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5e_fs_udp *fs_udp; + int err; + + fs_udp = priv->fs.udp; + fs_udp_t = &fs_udp->tables[type]; + + dest = mlx5e_ttc_get_default_dest(priv, fs_udp2tt(type)); + rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, + "%s: add default rule failed, fs type=%d, err %d\n", + __func__, type, err); + return err; + } + + fs_udp->default_rules[type] = rule; + return 0; +} + +#define MLX5E_FS_UDP_NUM_GROUPS (2) +#define MLX5E_FS_UDP_GROUP1_SIZE (BIT(16)) +#define MLX5E_FS_UDP_GROUP2_SIZE (BIT(0)) +#define MLX5E_FS_UDP_TABLE_SIZE (MLX5E_FS_UDP_GROUP1_SIZE +\ + MLX5E_FS_UDP_GROUP2_SIZE) +static int fs_udp_create_groups(struct mlx5e_flow_table *ft, enum fs_udp_type type) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in || !ft->g) { + kfree(ft->g); + kvfree(in); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version); + + switch (type) { + case FS_IPV4_UDP: + case FS_IPV6_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + break; + default: + err = -EINVAL; + goto out; + } + /* Match on udp protocol, Ipv4/6 and dport */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_UDP_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Default Flow Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_UDP_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; +out: + kvfree(in); + + return err; +} + +static int fs_udp_create_table(struct mlx5e_priv *priv, enum fs_udp_type type) +{ + struct mlx5e_flow_table *ft = &priv->fs.udp->tables[type]; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE; + ft_attr.level = MLX5E_FS_TT_UDP_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + netdev_dbg(priv->netdev, "Created fs %s table id %u level %u\n", + fs_udp_type2str(type), ft->t->id, ft->t->level); + + err = fs_udp_create_groups(ft, type); + if (err) + goto err; + + err = fs_udp_add_default_rule(priv, type); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void fs_udp_destroy_table(struct mlx5e_fs_udp *fs_udp, int i) +{ + if (IS_ERR_OR_NULL(fs_udp->tables[i].t)) + return; + + mlx5_del_flow_rules(fs_udp->default_rules[i]); + mlx5e_destroy_flow_table(&fs_udp->tables[i]); + fs_udp->tables[i].t = NULL; +} + +static int fs_udp_disable(struct mlx5e_priv *priv) +{ + int err, i; + + for (i = 0; i < FS_UDP_NUM_TYPES; i++) { + /* Modify ttc rules destination to point back to the indir TIRs */ + err = mlx5e_ttc_fwd_default_dest(priv, fs_udp2tt(i)); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, fs_udp2tt(i), err); + return err; + } + } + + return 0; +} + +static int fs_udp_enable(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest = {}; + int err, i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + for (i = 0; i < FS_UDP_NUM_TYPES; i++) { + dest.ft = priv->fs.udp->tables[i].t; + + /* Modify ttc rules destination to point on the accel_fs FTs */ + err = mlx5e_ttc_fwd_dest(priv, fs_udp2tt(i), &dest); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, fs_udp2tt(i), err); + return err; + } + } + return 0; +} + +void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv) +{ + struct mlx5e_fs_udp *fs_udp = priv->fs.udp; + int i; + + if (!fs_udp) + return; + + if (--fs_udp->ref_cnt) + return; + + fs_udp_disable(priv); + + for (i = 0; i < FS_UDP_NUM_TYPES; i++) + fs_udp_destroy_table(fs_udp, i); + + kfree(fs_udp); + priv->fs.udp = NULL; +} + +int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv) +{ + int i, err; + + if (priv->fs.udp) { + priv->fs.udp->ref_cnt++; + return 0; + } + + priv->fs.udp = kzalloc(sizeof(*priv->fs.udp), GFP_KERNEL); + if (!priv->fs.udp) + return -ENOMEM; + + for (i = 0; i < FS_UDP_NUM_TYPES; i++) { + err = fs_udp_create_table(priv, i); + if (err) + goto err_destroy_tables; + } + + err = fs_udp_enable(priv); + if (err) + goto err_destroy_tables; + + priv->fs.udp->ref_cnt = 1; + + return 0; + +err_destroy_tables: + while (--i >= 0) + fs_udp_destroy_table(priv->fs.udp, i); + + kfree(priv->fs.udp); + priv->fs.udp = NULL; + return err; +} + +static void fs_any_set_ethertype_flow(struct mlx5_flow_spec *spec, u16 ether_type) +{ + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ether_type); +} + +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv, + u32 tir_num, u16 ether_type) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_table *ft = NULL; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + struct mlx5e_fs_any *fs_any; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + fs_any = priv->fs.any; + ft = fs_any->table.t; + + fs_any_set_ethertype_flow(spec, ether_type); + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tir_num; + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add ANY rule failed, err %d\n", + __func__, err); + } + return rule; +} + +static int fs_any_add_default_rule(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *fs_any_t; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5e_fs_any *fs_any; + int err; + + fs_any = priv->fs.any; + fs_any_t = &fs_any->table; + + dest = mlx5e_ttc_get_default_dest(priv, MLX5E_TT_ANY); + rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, + "%s: add default rule failed, fs type=ANY, err %d\n", + __func__, err); + return err; + } + + fs_any->default_rule = rule; + return 0; +} + +#define MLX5E_FS_ANY_NUM_GROUPS (2) +#define MLX5E_FS_ANY_GROUP1_SIZE (BIT(16)) +#define MLX5E_FS_ANY_GROUP2_SIZE (BIT(0)) +#define MLX5E_FS_ANY_TABLE_SIZE (MLX5E_FS_ANY_GROUP1_SIZE +\ + MLX5E_FS_ANY_GROUP2_SIZE) + +static int fs_any_create_groups(struct mlx5e_flow_table *ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_FS_UDP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in || !ft->g) { + kfree(ft->g); + kvfree(in); + return -ENOMEM; + } + + /* Match on ethertype */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_ANY_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Default Flow Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_FS_ANY_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int fs_any_create_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fs.any->table; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_FS_UDP_TABLE_SIZE; + ft_attr.level = MLX5E_FS_TT_ANY_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + netdev_dbg(priv->netdev, "Created fs ANY table id %u level %u\n", + ft->t->id, ft->t->level); + + err = fs_any_create_groups(ft); + if (err) + goto err; + + err = fs_any_add_default_rule(priv); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static int fs_any_disable(struct mlx5e_priv *priv) +{ + int err; + + /* Modify ttc rules destination to point back to the indir TIRs */ + err = mlx5e_ttc_fwd_default_dest(priv, MLX5E_TT_ANY); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc[%d] default destination failed, err(%d)\n", + __func__, MLX5E_TT_ANY, err); + return err; + } + return 0; +} + +static int fs_any_enable(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest = {}; + int err; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.any->table.t; + + /* Modify ttc rules destination to point on the accel_fs FTs */ + err = mlx5e_ttc_fwd_dest(priv, MLX5E_TT_ANY, &dest); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc[%d] destination to accel failed, err(%d)\n", + __func__, MLX5E_TT_ANY, err); + return err; + } + return 0; +} + +static void fs_any_destroy_table(struct mlx5e_fs_any *fs_any) +{ + if (IS_ERR_OR_NULL(fs_any->table.t)) + return; + + mlx5_del_flow_rules(fs_any->default_rule); + mlx5e_destroy_flow_table(&fs_any->table); + fs_any->table.t = NULL; +} + +void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv) +{ + struct mlx5e_fs_any *fs_any = priv->fs.any; + + if (!fs_any) + return; + + if (--fs_any->ref_cnt) + return; + + fs_any_disable(priv); + + fs_any_destroy_table(fs_any); + + kfree(fs_any); + priv->fs.any = NULL; +} + +int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv) +{ + int err; + + if (priv->fs.any) { + priv->fs.any->ref_cnt++; + return 0; + } + + priv->fs.any = kzalloc(sizeof(*priv->fs.any), GFP_KERNEL); + if (!priv->fs.any) + return -ENOMEM; + + err = fs_any_create_table(priv); + if (err) + return err; + + err = fs_any_enable(priv); + if (err) + goto err_destroy_table; + + priv->fs.any->ref_cnt = 1; + + return 0; + +err_destroy_table: + fs_any_destroy_table(priv->fs.any); + + kfree(priv->fs.any); + priv->fs.any = NULL; + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h new file mode 100644 index 000000000000..8385df24eb99 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5E_FS_TT_REDIRECT_H__ +#define __MLX5E_FS_TT_REDIRECT_H__ + +#include "en.h" +#include "en/fs.h" + +void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule); + +/* UDP traffic type redirect */ +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, + enum mlx5e_traffic_types ttc_type, + u32 tir_num, u16 d_port); +void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv); +int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv); + +/* ANY traffic type redirect*/ +struct mlx5_flow_handle * +mlx5e_fs_tt_redirect_any_add_rule(struct mlx5e_priv *priv, + u32 tir_num, u16 ether_type); +void mlx5e_fs_tt_redirect_any_destroy(struct mlx5e_priv *priv); +int mlx5e_fs_tt_redirect_any_create(struct mlx5e_priv *priv); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 84e501e057b4..6f4e6c34b2a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -128,7 +128,7 @@ int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg if (err) return err; - err = devlink_fmsg_u32_pair_put(fmsg, "size", eq->core.nent); + err = devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core)); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 36381a2ed5a5..f410c1268422 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -3,10 +3,13 @@ #include "en/params.h" #include "en/txrx.h" -#include "en_accel/tls_rxtx.h" +#include "en/port.h" +#include "en_accel/en_accel.h" +#include "accel/ipsec.h" +#include "fpga/ipsec.h" -static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +static bool mlx5e_rx_is_xdp(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { return params->xdp_prog || xsk; } @@ -37,8 +40,8 @@ u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, return linear_rq_headroom + hw_mtu; } -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +static u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { u32 frag_sz = mlx5e_rx_get_min_frag_sz(params, xsk); @@ -87,30 +90,39 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, return !params->lro_en && linear_frag_sz <= PAGE_SIZE; } -#define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \ - MLX5_MPWQE_LOG_STRIDE_SZ_BASE) -bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk) +bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + u8 log_stride_sz, u8 log_num_strides) { - u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk); - s8 signed_log_num_strides_param; - u8 log_num_strides; + if (log_stride_sz + log_num_strides != MLX5_MPWRQ_LOG_WQE_SZ) + return false; - if (!mlx5e_rx_is_linear_skb(params, xsk)) + if (log_stride_sz < MLX5_MPWQE_LOG_STRIDE_SZ_BASE || + log_stride_sz > MLX5_MPWQE_LOG_STRIDE_SZ_MAX) return false; - if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) + if (log_num_strides > MLX5_MPWQE_LOG_NUM_STRIDES_MAX) return false; if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) - return true; + return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_EXT_BASE; + + return log_num_strides >= MLX5_MPWQE_LOG_NUM_STRIDES_BASE; +} - log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); - signed_log_num_strides_param = - (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE; +bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + s8 log_num_strides; + u8 log_stride_sz; - return signed_log_num_strides_param >= 0; + if (!mlx5e_rx_is_linear_skb(params, xsk)) + return false; + + log_stride_sz = order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk)); + log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - log_stride_sz; + + return mlx5e_verify_rx_mpwqe_strides(mdev, log_stride_sz, log_num_strides); } u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, @@ -172,17 +184,505 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par return stop_room; } -int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params) +int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { size_t sq_size = 1 << params->log_sq_size; u16 stop_room; - stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params); + stop_room = mlx5e_calc_sq_stop_room(mdev, params); if (stop_room >= sq_size) { - netdev_err(priv->netdev, "Stop room %u is bigger than the SQ size %zu\n", - stop_room, sq_size); + mlx5_core_err(mdev, "Stop room %u is bigger than the SQ size %zu\n", + stop_room, sq_size); return -EINVAL; } return 0; } + +static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) +{ + struct dim_cq_moder moder; + + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} + +static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +{ + struct dim_cq_moder moder; + + moder.cq_period_mode = cq_period_mode; + moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; + + return moder; +} + +static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) +{ + return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? + DIM_CQ_PERIOD_MODE_START_FROM_CQE : + DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode) +{ + if (params->tx_dim_enabled) { + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode); + } else { + params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); + } +} + +void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode) +{ + if (params->rx_dim_enabled) { + u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); + + params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode); + } else { + params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); + } +} + +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + mlx5e_reset_tx_moderation(params, cq_period_mode); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, + params->tx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +} + +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + mlx5e_reset_rx_moderation(params, cq_period_mode); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, + params->rx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +} + +bool slow_pci_heuristic(struct mlx5_core_dev *mdev) +{ + u32 link_speed = 0; + u32 pci_bw = 0; + + mlx5e_port_max_linkspeed(mdev, &link_speed); + pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL); + mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); + +#define MLX5E_SLOW_PCI_RATIO (2) + + return link_speed && pci_bw && + link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; +} + +bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) + return false; + + if (mlx5_fpga_is_ipsec_device(mdev)) + return false; + + if (params->xdp_prog) { + /* XSK params are not considered here. If striding RQ is in use, + * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will + * be called with the known XSK params. + */ + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) + return false; + } + + return true; +} + +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + params->log_rq_mtu_frames = is_kdump_kernel() ? + MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : + MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + + mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? + BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) : + BIT(params->log_rq_mtu_frames), + BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); +} + +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) && + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_CYCLIC; +} + +void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + /* Prefer Striding RQ, unless any of the following holds: + * - Striding RQ configuration is not possible/supported. + * - Slow PCI heuristic. + * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. + * + * No XSK params: checking the availability of striding RQ in general. + */ + if (!slow_pci_heuristic(mdev) && + mlx5e_striding_rq_possible(mdev, params) && + (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || + !mlx5e_rx_is_linear_skb(params, NULL))) + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); + mlx5e_set_rq_type(mdev, params); + mlx5e_init_rq_type_params(mdev, params); +} + +/* Build queue parameters */ + +void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c) +{ + *ccp = (struct mlx5e_create_cq_param) { + .napi = &c->napi, + .ch_stats = c->stats, + .node = cpu_to_node(c->cpu), + .ix = c->ix, + }; +} + +#define DEFAULT_FRAG_SIZE (2048) + +static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_frags_info *info) +{ + u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); + int frag_size_max = DEFAULT_FRAG_SIZE; + u32 buf_size = 0; + int i; + + if (mlx5_fpga_is_ipsec_device(mdev)) + byte_count += MLX5E_METADATA_ETHER_LEN; + + if (mlx5e_rx_is_linear_skb(params, xsk)) { + int frag_stride; + + frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk); + frag_stride = roundup_pow_of_two(frag_stride); + + info->arr[0].frag_size = byte_count; + info->arr[0].frag_stride = frag_stride; + info->num_frags = 1; + info->wqe_bulk = PAGE_SIZE / frag_stride; + goto out; + } + + if (byte_count > PAGE_SIZE + + (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max) + frag_size_max = PAGE_SIZE; + + i = 0; + while (buf_size < byte_count) { + int frag_size = byte_count - buf_size; + + if (i < MLX5E_MAX_RX_FRAGS - 1) + frag_size = min(frag_size, frag_size_max); + + info->arr[i].frag_size = frag_size; + info->arr[i].frag_stride = roundup_pow_of_two(frag_size); + + buf_size += frag_size; + i++; + } + info->num_frags = i; + /* number of different wqes sharing a page */ + info->wqe_bulk = 1 + (info->num_frags % 2); + +out: + info->wqe_bulk = max_t(u8, info->wqe_bulk, 8); + info->log_num_frags = order_base_2(info->num_frags); +} + +static u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) +{ + int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs; + + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + sz += sizeof(struct mlx5e_rx_wqe_ll); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + sz += sizeof(struct mlx5e_rx_wqe_cyc); + } + + return order_base_2(sz); +} + +static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); +} + +static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_cq_param *param) +{ + bool hw_stridx = false; + void *cqc = param->cqc; + u8 log_cq_size; + + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index); + break; + default: /* MLX5_WQ_TYPE_CYCLIC */ + log_cq_size = params->log_rq_mtu_frames; + } + + MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ? + MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM); + MLX5_SET(cqc, cqc, cqe_comp_en, 1); + } + + mlx5e_build_common_cq_param(mdev, param); + param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; +} + +int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + u16 q_counter, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + int ndsegs = 1; + + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: { + u8 log_wqe_num_of_strides = mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + u8 log_wqe_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + + if (!mlx5e_verify_rx_mpwqe_strides(mdev, log_wqe_stride_size, + log_wqe_num_of_strides)) { + mlx5_core_err(mdev, + "Bad RX MPWQE params: log_stride_size %u, log_num_strides %u\n", + log_wqe_stride_size, log_wqe_num_of_strides); + return -EINVAL; + } + + MLX5_SET(wq, wq, log_wqe_num_of_strides, + log_wqe_num_of_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); + MLX5_SET(wq, wq, log_wqe_stride_size, + log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); + MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); + break; + } + default: /* MLX5_WQ_TYPE_CYCLIC */ + MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); + mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); + ndsegs = param->frags_info.num_frags; + } + + MLX5_SET(wq, wq, wq_type, params->rq_wq_type); + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, log_wq_stride, + mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(rqc, rqc, counter_set_id, q_counter); + MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); + MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); + mlx5e_build_rx_cq_param(mdev, params, xsk, ¶m->cqp); + + return 0; +} + +void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, + u16 q_counter, + struct mlx5e_rq_param *param) +{ + void *rqc = param->rqc; + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, log_wq_stride, + mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); + MLX5_SET(rqc, rqc, counter_set_id, q_counter); + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); +} + +void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); + + mlx5e_build_common_cq_param(mdev, param); + param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; +} + +void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); + + param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); +} + +void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + bool allow_swp; + + allow_swp = mlx5_geneve_tx_allowed(mdev) || + !!MLX5_IPSEC_DEV(mdev); + mlx5e_build_sq_param_common(mdev, param); + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + MLX5_SET(sqc, sqc, allow_swp, allow_swp); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); + param->stop_room = mlx5e_calc_sq_stop_room(mdev, params); + mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); +} + +static void mlx5e_build_ico_cq_param(struct mlx5_core_dev *mdev, + u8 log_wq_size, + struct mlx5e_cq_param *param) +{ + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); + + mlx5e_build_common_cq_param(mdev, param); + + param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; +} + +static u8 mlx5e_get_rq_log_wq_sz(void *rqc) +{ + void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + + return MLX5_GET(wq, wq, log_wq_sz); +} + +static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params, + struct mlx5e_rq_param *rqp) +{ + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, + order_base_2(MLX5E_UMR_WQEBBS) + + mlx5e_get_rq_log_wq_sz(rqp->rqc)); + default: /* MLX5_WQ_TYPE_CYCLIC */ + return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + } +} + +static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev) +{ + if (mlx5_accel_is_ktls_rx(mdev)) + return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + + return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; +} + +static void mlx5e_build_icosq_param(struct mlx5_core_dev *mdev, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(mdev, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); + mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); +} + +static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(mdev, param); + param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */ + param->is_tls = mlx5_accel_is_ktls_rx(mdev); + if (param->is_tls) + param->stop_room += mlx5e_stop_room_for_wqe(1); /* for TLS RX resync NOP */ + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq)); + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + mlx5e_build_ico_cq_param(mdev, log_wq_size, ¶m->cqp); +} + +void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(mdev, param); + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); + mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); +} + +int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 q_counter, + struct mlx5e_channel_param *cparam) +{ + u8 icosq_log_wq_sz, async_icosq_log_wq_sz; + int err; + + err = mlx5e_build_rq_param(mdev, params, NULL, q_counter, &cparam->rq); + if (err) + return err; + + icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq); + async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev); + + mlx5e_build_sq_param(mdev, params, &cparam->txq_sq); + mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); + mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq); + mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index ea2cfb04b31a..e9593f5f0661 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -30,6 +30,7 @@ struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; bool is_mpw; + bool is_tls; u16 stop_room; }; @@ -84,12 +85,23 @@ static inline bool mlx5e_qid_validate(const struct mlx5e_profile *profile, /* Parameter calculations */ +void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); + +bool slow_pci_heuristic(struct mlx5_core_dev *mdev); +bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); + +bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev, + u8 log_stride_sz, u8 log_num_strides); u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); u32 mlx5e_rx_get_min_frag_sz(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, @@ -112,32 +124,31 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, /* Build queue parameters */ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c); -void mlx5e_build_rq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_param *param); -void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, +int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + u16 q_counter, + struct mlx5e_rq_param *param); +void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, + u16 q_counter, + struct mlx5e_rq_param *param); +void mlx5e_build_sq_param_common(struct mlx5_core_dev *mdev, struct mlx5e_sq_param *param); -void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, +void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, struct mlx5e_sq_param *param); -void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_cq_param *param); -void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, +void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_cq_param *param); -void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_cq_param *param); -void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_sq_param *param); -void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, +void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_sq_param *param); +int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 q_counter, + struct mlx5e_channel_param *cparam); u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); -int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params); +int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); #endif /* __MLX5_EN_PARAMS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 308fd279669e..89510cac46c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -387,21 +387,6 @@ enum mlx5e_fec_supported_link_mode { *_policy = MLX5_GET(pplm_reg, _buf, fec_override_admin_##link); \ } while (0) -#define MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(buf, policy, write, link) \ - do { \ - unsigned long policy_long; \ - u16 *__policy = &(policy); \ - bool _write = (write); \ - \ - policy_long = *__policy; \ - if (_write && *__policy) \ - *__policy = find_first_bit(&policy_long, \ - sizeof(policy_long) * BITS_PER_BYTE);\ - MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, *__policy, _write, link); \ - if (!_write && *__policy) \ - *__policy = 1 << *__policy; \ - } while (0) - /* get/set FEC admin field for a given speed */ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, enum mlx5e_fec_supported_link_mode link_mode) @@ -423,16 +408,16 @@ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 50g_1x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 50g_1x); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_100G_2X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 100g_2x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 100g_2x); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_4X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 200g_4x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_4x); break; case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_8X: - MLX5E_FEC_OVERRIDE_ADMIN_50G_POLICY(pplm, *fec_policy, write, 400g_8x); + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_8x); break; default: return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index d57b6f06382f..d907c1acd4d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -1,8 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2020 Mellanox Technologies +#include <linux/ptp_classify.h> #include "en/ptp.h" #include "en/txrx.h" +#include "en/params.h" +#include "en/fs_tt_redirect.h" + +struct mlx5e_ptp_fs { + struct mlx5_flow_handle *l2_rule; + struct mlx5_flow_handle *udp_v4_rule; + struct mlx5_flow_handle *udp_v6_rule; + bool valid; +}; + +#define MLX5E_PTP_CHANNEL_IX 0 + +struct mlx5e_ptp_params { + struct mlx5e_params params; + struct mlx5e_sq_param txq_sq_param; + struct mlx5e_rq_param rq_param; +}; struct mlx5e_skb_cb_hwtstamp { ktime_t cqe_hwtstamp; @@ -116,9 +134,9 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget) { - struct mlx5e_port_ptp *c = container_of(napi, struct mlx5e_port_ptp, - napi); + struct mlx5e_ptp *c = container_of(napi, struct mlx5e_ptp, napi); struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_rq *rq = &c->rq; bool busy = false; int work_done = 0; int i; @@ -127,9 +145,19 @@ static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget) ch_stats->poll++; - for (i = 0; i < c->num_tc; i++) { - busy |= mlx5e_poll_tx_cq(&c->ptpsq[i].txqsq.cq, budget); - busy |= mlx5e_ptp_poll_ts_cq(&c->ptpsq[i].ts_cq, budget); + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (i = 0; i < c->num_tc; i++) { + busy |= mlx5e_poll_tx_cq(&c->ptpsq[i].txqsq.cq, budget); + busy |= mlx5e_ptp_poll_ts_cq(&c->ptpsq[i].ts_cq, budget); + } + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state) && likely(budget)) { + work_done = mlx5e_poll_rx_cq(&rq->cq, budget); + busy |= work_done == budget; + busy |= INDIRECT_CALL_2(rq->post_wqes, + mlx5e_post_rx_mpwqes, + mlx5e_post_rx_wqes, + rq); } if (busy) { @@ -142,10 +170,14 @@ static int mlx5e_ptp_napi_poll(struct napi_struct *napi, int budget) ch_stats->arm++; - for (i = 0; i < c->num_tc; i++) { - mlx5e_cq_arm(&c->ptpsq[i].txqsq.cq); - mlx5e_cq_arm(&c->ptpsq[i].ts_cq); + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (i = 0; i < c->num_tc; i++) { + mlx5e_cq_arm(&c->ptpsq[i].txqsq.cq); + mlx5e_cq_arm(&c->ptpsq[i].ts_cq); + } } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_cq_arm(&rq->cq); out: rcu_read_unlock(); @@ -153,7 +185,7 @@ out: return work_done; } -static int mlx5e_ptp_alloc_txqsq(struct mlx5e_port_ptp *c, int txq_ix, +static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct mlx5e_txqsq *sq, int tc, @@ -172,20 +204,18 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_port_ptp *c, int txq_ix, sq->netdev = c->netdev; sq->priv = c->priv; sq->mdev = mdev; - sq->ch_ix = c->ix; + sq->ch_ix = MLX5E_PTP_CHANNEL_IX; sq->txq_ix = txq_ix; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - sq->stats = &c->priv->port_ptp_stats.sq[tc]; + sq->stats = &c->priv->ptp_stats.sq[tc]; sq->ptpsq = ptpsq; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); sq->stop_room = param->stop_room; - sq->ptp_cyc2time = mlx5_is_real_time_sq(mdev) ? - mlx5_real_time_cyc2time : - mlx5_timecounter_cyc2time; + sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); node = dev_to_node(mlx5_core_dma_dev(mdev)); @@ -243,7 +273,7 @@ static void mlx5e_ptp_free_traffic_db(struct mlx5e_skb_fifo *skb_fifo) kvfree(skb_fifo->fifo); } -static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn, +static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn, int txq_ix, struct mlx5e_ptp_params *cparams, int tc, struct mlx5e_ptpsq *ptpsq) { @@ -293,7 +323,7 @@ static void mlx5e_ptp_close_txqsq(struct mlx5e_ptpsq *ptpsq) mlx5e_free_txqsq(sq); } -static int mlx5e_ptp_open_txqsqs(struct mlx5e_port_ptp *c, +static int mlx5e_ptp_open_txqsqs(struct mlx5e_ptp *c, struct mlx5e_ptp_params *cparams) { struct mlx5e_params *params = &cparams->params; @@ -321,7 +351,7 @@ close_txqsq: return err; } -static void mlx5e_ptp_close_txqsqs(struct mlx5e_port_ptp *c) +static void mlx5e_ptp_close_txqsqs(struct mlx5e_ptp *c) { int tc; @@ -329,8 +359,8 @@ static void mlx5e_ptp_close_txqsqs(struct mlx5e_port_ptp *c) mlx5e_ptp_close_txqsq(&c->ptpsq[tc]); } -static int mlx5e_ptp_open_cqs(struct mlx5e_port_ptp *c, - struct mlx5e_ptp_params *cparams) +static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) { struct mlx5e_params *params = &cparams->params; struct mlx5e_create_cq_param ccp = {}; @@ -342,7 +372,7 @@ static int mlx5e_ptp_open_cqs(struct mlx5e_port_ptp *c, ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev)); ccp.ch_stats = c->stats; ccp.napi = &c->napi; - ccp.ix = c->ix; + ccp.ix = MLX5E_PTP_CHANNEL_IX; cq_param = &cparams->txq_sq_param.cqp; @@ -362,7 +392,7 @@ static int mlx5e_ptp_open_cqs(struct mlx5e_port_ptp *c, if (err) goto out_err_ts_cq; - ptpsq->cq_stats = &c->priv->port_ptp_stats.cq[tc]; + ptpsq->cq_stats = &c->priv->ptp_stats.cq[tc]; } return 0; @@ -378,7 +408,25 @@ out_err_txqsq_cq: return err; } -static void mlx5e_ptp_close_cqs(struct mlx5e_port_ptp *c) +static int mlx5e_ptp_open_rx_cq(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) +{ + struct mlx5e_create_cq_param ccp = {}; + struct dim_cq_moder ptp_moder = {}; + struct mlx5e_cq_param *cq_param; + struct mlx5e_cq *cq = &c->rq.cq; + + ccp.node = dev_to_node(mlx5_core_dma_dev(c->mdev)); + ccp.ch_stats = c->stats; + ccp.napi = &c->napi; + ccp.ix = MLX5E_PTP_CHANNEL_IX; + + cq_param = &cparams->rq_param.cqp; + + return mlx5e_open_cq(c->priv, ptp_moder, cq_param, &ccp, cq); +} + +static void mlx5e_ptp_close_tx_cqs(struct mlx5e_ptp *c) { int tc; @@ -389,22 +437,36 @@ static void mlx5e_ptp_close_cqs(struct mlx5e_port_ptp *c) mlx5e_close_cq(&c->ptpsq[tc].txqsq.cq); } -static void mlx5e_ptp_build_sq_param(struct mlx5e_priv *priv, +static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq; - mlx5e_build_sq_param_common(priv, param); + mlx5e_build_sq_param_common(mdev, param); wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); param->stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); - mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); + mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); +} + +static void mlx5e_ptp_build_rq_param(struct mlx5_core_dev *mdev, + struct net_device *netdev, + u16 q_counter, + struct mlx5e_ptp_params *ptp_params) +{ + struct mlx5e_rq_param *rq_params = &ptp_params->rq_param; + struct mlx5e_params *params = &ptp_params->params; + + params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; + mlx5e_init_rq_type_params(mdev, params); + params->sw_mtu = netdev->max_mtu; + mlx5e_build_rq_param(mdev, params, NULL, q_counter, rq_params); } -static void mlx5e_ptp_build_params(struct mlx5e_port_ptp *c, +static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, struct mlx5e_ptp_params *cparams, struct mlx5e_params *orig) { @@ -417,52 +479,193 @@ static void mlx5e_ptp_build_params(struct mlx5e_port_ptp *c, params->num_tc = orig->num_tc; /* SQ */ - params->log_sq_size = orig->log_sq_size; - - mlx5e_ptp_build_sq_param(c->priv, params, &cparams->txq_sq_param); + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + params->log_sq_size = orig->log_sq_size; + mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param); + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams); } -static int mlx5e_ptp_open_queues(struct mlx5e_port_ptp *c, - struct mlx5e_ptp_params *cparams) +static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, + struct mlx5e_rq *rq) { + struct mlx5_core_dev *mdev = c->mdev; + struct mlx5e_priv *priv = c->priv; int err; - err = mlx5e_ptp_open_cqs(c, cparams); + rq->wq_type = params->rq_wq_type; + rq->pdev = mdev->device; + rq->netdev = priv->netdev; + rq->priv = priv; + rq->clock = &mdev->clock; + rq->tstamp = &priv->tstamp; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->stats = &c->priv->ptp_stats.rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + err = mlx5e_rq_set_handlers(rq, params, false); if (err) return err; - err = mlx5e_ptp_open_txqsqs(c, cparams); + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0); +} + +static int mlx5e_ptp_open_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param) +{ + int node = dev_to_node(c->mdev->device); + int err; + + err = mlx5e_init_ptp_rq(c, params, &c->rq); if (err) - goto close_cqs; + return err; + + return mlx5e_open_rq(params, rq_param, NULL, node, &c->rq); +} + +static int mlx5e_ptp_open_queues(struct mlx5e_ptp *c, + struct mlx5e_ptp_params *cparams) +{ + int err; + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + err = mlx5e_ptp_open_tx_cqs(c, cparams); + if (err) + return err; + + err = mlx5e_ptp_open_txqsqs(c, cparams); + if (err) + goto close_tx_cqs; + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + err = mlx5e_ptp_open_rx_cq(c, cparams); + if (err) + goto close_txqsq; + + err = mlx5e_ptp_open_rq(c, &cparams->params, &cparams->rq_param); + if (err) + goto close_rx_cq; + } return 0; -close_cqs: - mlx5e_ptp_close_cqs(c); +close_rx_cq: + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_close_cq(&c->rq.cq); +close_txqsq: + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) + mlx5e_ptp_close_txqsqs(c); +close_tx_cqs: + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) + mlx5e_ptp_close_tx_cqs(c); return err; } -static void mlx5e_ptp_close_queues(struct mlx5e_port_ptp *c) +static void mlx5e_ptp_close_queues(struct mlx5e_ptp *c) { - mlx5e_ptp_close_txqsqs(c); - mlx5e_ptp_close_cqs(c); + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + mlx5e_close_rq(&c->rq); + mlx5e_close_cq(&c->rq.cq); + } + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + mlx5e_ptp_close_txqsqs(c); + mlx5e_ptp_close_tx_cqs(c); + } } -int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, - u8 lag_port, struct mlx5e_port_ptp **cp) +static int mlx5e_ptp_set_state(struct mlx5e_ptp *c, struct mlx5e_params *params) +{ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS)) + __set_bit(MLX5E_PTP_STATE_TX, c->state); + + if (params->ptp_rx) + __set_bit(MLX5E_PTP_STATE_RX, c->state); + + return bitmap_empty(c->state, MLX5E_PTP_STATE_NUM_STATES) ? -EINVAL : 0; +} + +static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv) +{ + struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; + + if (!ptp_fs->valid) + return; + + mlx5e_fs_tt_redirect_del_rule(ptp_fs->l2_rule); + mlx5e_fs_tt_redirect_any_destroy(priv); + + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule); + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule); + mlx5e_fs_tt_redirect_udp_destroy(priv); + ptp_fs->valid = false; +} + +static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) +{ + struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; + struct mlx5_flow_handle *rule; + u32 tirn = priv->ptp_tir.tirn; + int err; + + if (ptp_fs->valid) + return 0; + + err = mlx5e_fs_tt_redirect_udp_create(priv); + if (err) + goto out_free; + + rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV4_UDP, + tirn, PTP_EV_PORT); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto out_destroy_fs_udp; + } + ptp_fs->udp_v4_rule = rule; + + rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV6_UDP, + tirn, PTP_EV_PORT); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto out_destroy_udp_v4_rule; + } + ptp_fs->udp_v6_rule = rule; + + err = mlx5e_fs_tt_redirect_any_create(priv); + if (err) + goto out_destroy_udp_v6_rule; + + rule = mlx5e_fs_tt_redirect_any_add_rule(priv, tirn, ETH_P_1588); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto out_destroy_fs_any; + } + ptp_fs->l2_rule = rule; + ptp_fs->valid = true; + + return 0; + +out_destroy_fs_any: + mlx5e_fs_tt_redirect_any_destroy(priv); +out_destroy_udp_v6_rule: + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v6_rule); +out_destroy_udp_v4_rule: + mlx5e_fs_tt_redirect_del_rule(ptp_fs->udp_v4_rule); +out_destroy_fs_udp: + mlx5e_fs_tt_redirect_udp_destroy(priv); +out_free: + return err; +} + +int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, + u8 lag_port, struct mlx5e_ptp **cp) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_ptp_params *cparams; - struct mlx5e_port_ptp *c; - unsigned int irq; + struct mlx5e_ptp *c; int err; - int eqn; - err = mlx5_vector2eqn(priv->mdev, 0, &eqn, &irq); - if (err) - return err; c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev))); cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL); @@ -472,14 +675,17 @@ int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, c->priv = priv; c->mdev = priv->mdev; c->tstamp = &priv->tstamp; - c->ix = 0; c->pdev = mlx5_core_dma_dev(priv->mdev); c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key); c->num_tc = params->num_tc; - c->stats = &priv->port_ptp_stats.ch; + c->stats = &priv->ptp_stats.ch; c->lag_port = lag_port; + err = mlx5e_ptp_set_state(c, params); + if (err) + goto err_free; + netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll, 64); mlx5e_ptp_build_params(c, cparams, params); @@ -488,6 +694,9 @@ int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, if (unlikely(err)) goto err_napi_del; + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + priv->rx_ptp_opened = true; + *cp = c; kvfree(cparams); @@ -496,13 +705,13 @@ int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, err_napi_del: netif_napi_del(&c->napi); - +err_free: kvfree(cparams); kvfree(c); return err; } -void mlx5e_port_ptp_close(struct mlx5e_port_ptp *c) +void mlx5e_ptp_close(struct mlx5e_ptp *c) { mlx5e_ptp_close_queues(c); netif_napi_del(&c->napi); @@ -510,22 +719,94 @@ void mlx5e_port_ptp_close(struct mlx5e_port_ptp *c) kvfree(c); } -void mlx5e_ptp_activate_channel(struct mlx5e_port_ptp *c) +void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c) { int tc; napi_enable(&c->napi); - for (tc = 0; tc < c->num_tc; tc++) - mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq); + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq); + } + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + mlx5e_ptp_rx_set_fs(c->priv); + mlx5e_activate_rq(&c->rq); + } } -void mlx5e_ptp_deactivate_channel(struct mlx5e_port_ptp *c) +void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c) { int tc; - for (tc = 0; tc < c->num_tc; tc++) - mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq); + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + mlx5e_deactivate_rq(&c->rq); + + if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq); + } napi_disable(&c->napi); } + +int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn) +{ + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) + return -EINVAL; + + *rqn = c->rq.rqn; + return 0; +} + +int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv) +{ + struct mlx5e_ptp_fs *ptp_fs; + + if (!priv->profile->rx_ptp_support) + return 0; + + ptp_fs = kzalloc(sizeof(*ptp_fs), GFP_KERNEL); + if (!ptp_fs) + return -ENOMEM; + + priv->fs.ptp_fs = ptp_fs; + return 0; +} + +void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv) +{ + struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; + + if (!priv->profile->rx_ptp_support) + return; + + mlx5e_ptp_rx_unset_fs(priv); + kfree(ptp_fs); +} + +int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set) +{ + struct mlx5e_ptp *c = priv->channels.ptp; + + if (!priv->profile->rx_ptp_support) + return 0; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + if (set) { + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) { + netdev_WARN_ONCE(priv->netdev, "Don't try to add PTP RX-FS rules"); + return -EINVAL; + } + return mlx5e_ptp_rx_set_fs(priv); + } + /* set == false */ + if (c && test_bit(MLX5E_PTP_STATE_RX, c->state)) { + netdev_WARN_ONCE(priv->netdev, "Don't try to remove PTP RX-FS rules"); + return -EINVAL; + } + mlx5e_ptp_rx_unset_fs(priv); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h index 90c98ea63b7f..ab935cce952b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -5,7 +5,6 @@ #define __MLX5_EN_PTP_H__ #include "en.h" -#include "en/params.h" #include "en_stats.h" struct mlx5e_ptpsq { @@ -17,9 +16,16 @@ struct mlx5e_ptpsq { struct mlx5e_ptp_cq_stats *cq_stats; }; -struct mlx5e_port_ptp { +enum { + MLX5E_PTP_STATE_TX, + MLX5E_PTP_STATE_RX, + MLX5E_PTP_STATE_NUM_STATES, +}; + +struct mlx5e_ptp { /* data path */ struct mlx5e_ptpsq ptpsq[MLX5E_MAX_NUM_TC]; + struct mlx5e_rq rq; struct napi_struct napi; struct device *pdev; struct net_device *netdev; @@ -34,20 +40,18 @@ struct mlx5e_port_ptp { struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; struct hwtstamp_config *tstamp; - DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); - int ix; -}; - -struct mlx5e_ptp_params { - struct mlx5e_params params; - struct mlx5e_sq_param txq_sq_param; + DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES); }; -int mlx5e_port_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, - u8 lag_port, struct mlx5e_port_ptp **cp); -void mlx5e_port_ptp_close(struct mlx5e_port_ptp *c); -void mlx5e_ptp_activate_channel(struct mlx5e_port_ptp *c); -void mlx5e_ptp_deactivate_channel(struct mlx5e_port_ptp *c); +int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, + u8 lag_port, struct mlx5e_ptp **cp); +void mlx5e_ptp_close(struct mlx5e_ptp *c); +void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c); +void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c); +int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn); +int mlx5e_ptp_alloc_rx_fs(struct mlx5e_priv *priv); +void mlx5e_ptp_free_rx_fs(struct mlx5e_priv *priv); +int mlx5e_ptp_rx_manage_fs(struct mlx5e_priv *priv, bool set); enum { MLX5E_SKB_CB_CQE_HWTSTAMP = BIT(0), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 12d7ad061237..5efe3278b0f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -232,8 +232,8 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs memset(¶m_sq, 0, sizeof(param_sq)); memset(¶m_cq, 0, sizeof(param_cq)); - mlx5e_build_sq_param(priv, params, ¶m_sq); - mlx5e_build_tx_cq_param(priv, params, ¶m_cq); + mlx5e_build_sq_param(priv->mdev, params, ¶m_sq); + mlx5e_build_tx_cq_param(priv->mdev, params, ¶m_cq); err = mlx5e_open_cq(priv, params->tx_cq_moderation, ¶m_cq, &ccp, &sq->cq); if (err) goto err_free_sq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 065126370acd..6cdc52d50a48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -17,6 +17,7 @@ #include "en/mapping.h" #include "en/tc_tun.h" #include "lib/port_tun.h" +#include "esw/sample.h" struct mlx5e_rep_indr_block_priv { struct net_device *netdev; @@ -169,6 +170,9 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); struct mlx5e_priv *priv = cb_priv; + if (!priv->netdev || !netif_device_present(priv->netdev)) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags); @@ -321,6 +325,9 @@ mlx5e_rep_indr_offload(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); int err = 0; + if (!netif_device_present(indr_priv->rpriv->netdev)) + return -EOPNOTSUPP; + switch (flower->command) { case FLOW_CLS_REPLACE: err = mlx5e_configure_flower(netdev, priv, flower, flags); @@ -605,27 +612,50 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, return true; } + +static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1, + struct mlx5e_tc_update_priv *tc_priv) +{ + struct mlx5e_priv *priv = netdev_priv(skb->dev); + u32 tunnel_id = reg_c1 >> ESW_TUN_OFFSET; + + if (chain) { + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct tc_skb_ext *tc_skb_ext; + struct mlx5_eswitch *esw; + u32 zone_restore_id; + + tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + if (!tc_skb_ext) { + WARN_ON(1); + return false; + } + tc_skb_ext->chain = chain; + zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK; + esw = priv->mdev->priv.eswitch; + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb, + zone_restore_id)) + return false; + } + return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); +} #endif /* CONFIG_NET_TC_SKB_EXT */ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, struct mlx5e_tc_update_priv *tc_priv) { -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - u32 chain = 0, reg_c0, reg_c1, tunnel_id, zone_restore_id; - struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *uplink_rpriv; - struct tc_skb_ext *tc_skb_ext; + struct mlx5_mapped_obj mapped_obj; struct mlx5_eswitch *esw; struct mlx5e_priv *priv; + u32 reg_c0, reg_c1; int err; reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); - if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) - reg_c0 = 0; - reg_c1 = be32_to_cpu(cqe->ft_metadata); - - if (!reg_c0) + if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) return true; /* If reg_c0 is not equal to the default flow tag then skb->mark @@ -633,38 +663,33 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, */ skb->mark = 0; + reg_c1 = be32_to_cpu(cqe->ft_metadata); + priv = netdev_priv(skb->dev); esw = priv->mdev->priv.eswitch; - - err = mlx5_get_chain_for_tag(esw_chains(esw), reg_c0, &chain); + err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj); if (err) { netdev_dbg(priv->netdev, - "Couldn't find chain for chain tag: %d, err: %d\n", + "Couldn't find mapped object for reg_c0: %d, err: %d\n", reg_c0, err); return false; } - if (chain) { - tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); - if (!tc_skb_ext) { - WARN_ON(1); - return false; - } - - tc_skb_ext->chain = chain; - - zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK; - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - uplink_priv = &uplink_rpriv->uplink_priv; - if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb, - zone_restore_id)) - return false; - } - - tunnel_id = reg_c1 >> ESW_TUN_OFFSET; - return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) + return mlx5e_restore_skb(skb, mapped_obj.chain, reg_c1, tc_priv); #endif /* CONFIG_NET_TC_SKB_EXT */ +#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) { + mlx5_esw_sample_skb(skb, &mapped_obj); + return false; + } +#endif /* CONFIG_MLX5_TC_SAMPLE */ + if (mapped_obj.type != MLX5_MAPPED_OBJ_SAMPLE && + mapped_obj.type != MLX5_MAPPED_OBJ_CHAIN) { + netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); + return false; + } return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index d80bbd17e5f8..0eb125316fe2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -4,6 +4,8 @@ #include "health.h" #include "params.h" #include "txrx.h" +#include "devlink.h" +#include "ptp.h" static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) { @@ -229,8 +231,9 @@ static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state, return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } -static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, - struct devlink_fmsg *fmsg) +static int +mlx5e_rx_reporter_build_diagnose_output_rq_common(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) { u16 wqe_counter; int wqes_sz; @@ -246,14 +249,6 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, wq_head = mlx5e_rqwq_get_head(rq); wqe_counter = mlx5e_rqwq_get_wqe_counter(rq); - err = devlink_fmsg_obj_nest_start(fmsg); - if (err) - return err; - - err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix); - if (err) - return err; - err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn); if (err) return err; @@ -299,61 +294,155 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, return err; } - err = devlink_fmsg_obj_nest_end(fmsg); + return 0; +} + +static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); if (err) return err; - return 0; + err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->ix); + if (err) + return err; + + err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg); + if (err) + return err; + + return devlink_fmsg_obj_nest_end(fmsg); } -static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, - struct netlink_ext_ack *extack) +static int mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) { - struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - struct mlx5e_params *params = &priv->channels.params; - struct mlx5e_rq *generic_rq; + struct mlx5e_priv *priv = rq->priv; + struct mlx5e_params *params; u32 rq_stride, rq_sz; - int i, err = 0; - - mutex_lock(&priv->state_lock); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto unlock; + bool real_time; + int err; - generic_rq = &priv->channels.c[0]->rq; - rq_sz = mlx5e_rqwq_get_size(generic_rq); + params = &priv->channels.params; + rq_sz = mlx5e_rqwq_get_size(rq); + real_time = mlx5_is_real_time_rq(priv->mdev); rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL)); - err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config"); - if (err) - goto unlock; - err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); if (err) - goto unlock; + return err; err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type); if (err) - goto unlock; + return err; err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride); if (err) - goto unlock; + return err; err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz); if (err) - goto unlock; + return err; - err = mlx5e_health_cq_common_diag_fmsg(&generic_rq->cq, fmsg); + err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); if (err) - goto unlock; + return err; - err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + err = mlx5e_health_cq_common_diag_fmsg(&rq->cq, fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx5e_ptp *ptp_ch, + struct devlink_fmsg *fmsg) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "filter_type", priv->tstamp.rx_filter); if (err) + return err; + + err = mlx5e_rx_reporter_diagnose_generic_rq(&ptp_ch->rq, fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int +mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq; + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config"); + if (err) + return err; + + err = mlx5e_rx_reporter_diagnose_generic_rq(generic_rq, fmsg); + if (err) + return err; + + if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { + err = mlx5e_rx_reporter_diagnose_common_ptp_config(priv, ptp_ch, fmsg); + if (err) + return err; + } + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq, + struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "channel", "ptp"); + if (err) + return err; + + err = mlx5e_rx_reporter_build_diagnose_output_rq_common(rq, fmsg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + int i, err = 0; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) goto unlock; - err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + err = mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg); if (err) goto unlock; @@ -368,9 +457,12 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, if (err) goto unlock; } + if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { + err = mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg); + if (err) + goto unlock; + } err = devlink_fmsg_arr_pair_nest_end(fmsg); - if (err) - goto unlock; unlock: mutex_unlock(&priv->state_lock); return err; @@ -502,6 +594,7 @@ static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fms static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; struct mlx5_rsc_key key = {}; int i, err; @@ -534,6 +627,12 @@ static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, return err; } + if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) { + err = mlx5e_health_queue_dump(priv, fmsg, ptp_ch->rq.rqn, "PTP RQ"); + if (err) + return err; + } + return devlink_fmsg_arr_pair_nest_end(fmsg); } @@ -615,9 +714,10 @@ static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { void mlx5e_reporter_rx_create(struct mlx5e_priv *priv) { + struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); struct devlink_health_reporter *reporter; - reporter = devlink_port_health_reporter_create(&priv->dl_port, &mlx5_rx_reporter_ops, + reporter = devlink_port_health_reporter_create(dl_port, &mlx5_rx_reporter_ops, MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", @@ -633,4 +733,5 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv) return; devlink_port_health_reporter_destroy(priv->rx_reporter); + priv->rx_reporter = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index d7275c84313e..9d361efd5ff7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -3,6 +3,7 @@ #include "health.h" #include "en/ptp.h" +#include "en/devlink.h" static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) { @@ -256,12 +257,14 @@ mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, struct mlx5e_txqsq *txqsq) { u32 sq_stride, sq_sz; + bool real_time; int err; err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); if (err) return err; + real_time = mlx5_is_real_time_sq(txqsq->mdev); sq_sz = mlx5_wq_cyc_get_size(&txqsq->wq); sq_stride = MLX5_SEND_WQE_BB; @@ -273,6 +276,10 @@ mlx5e_tx_reporter_diagnose_generic_txqsq(struct devlink_fmsg *fmsg, if (err) return err; + err = devlink_fmsg_string_pair_put(fmsg, "ts_format", real_time ? "RT" : "FRC"); + if (err) + return err; + err = mlx5e_health_cq_common_diag_fmsg(&txqsq->cq, fmsg); if (err) return err; @@ -303,6 +310,7 @@ mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporte { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_txqsq *generic_sq = priv->txq2sq[0]; + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; struct mlx5e_ptpsq *generic_ptpsq; int err; @@ -314,12 +322,11 @@ mlx5e_tx_reporter_diagnose_common_config(struct devlink_health_reporter *reporte if (err) return err; - generic_ptpsq = priv->channels.port_ptp ? - &priv->channels.port_ptp->ptpsq[0] : - NULL; - if (!generic_ptpsq) + if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) goto out; + generic_ptpsq = &ptp_ch->ptpsq[0]; + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "PTP"); if (err) return err; @@ -345,7 +352,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - struct mlx5e_port_ptp *ptp_ch = priv->channels.port_ptp; + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; int i, tc, err = 0; @@ -374,7 +381,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, } } - if (!ptp_ch) + if (!ptp_ch || !test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) goto close_sqs_nest; for (tc = 0; tc < priv->channels.params.num_tc; tc++) { @@ -459,7 +466,7 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { - struct mlx5e_port_ptp *ptp_ch = priv->channels.port_ptp; + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; struct mlx5_rsc_key key = {}; int i, tc, err; @@ -496,7 +503,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, } } - if (ptp_ch) { + if (ptp_ch && test_bit(MLX5E_PTP_STATE_TX, ptp_ch->state)) { for (tc = 0; tc < priv->channels.params.num_tc; tc++) { struct mlx5e_txqsq *sq = &ptp_ch->ptpsq[tc].txqsq; @@ -572,9 +579,10 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) { + struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); struct devlink_health_reporter *reporter; - reporter = devlink_port_health_reporter_create(&priv->dl_port, &mlx5_tx_reporter_ops, + reporter = devlink_port_health_reporter_create(dl_port, &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, @@ -591,4 +599,5 @@ void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) return; devlink_port_health_reporter_destroy(priv->tx_reporter); + priv->tx_reporter = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index f3f6eb081948..5da5e5323a44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -29,6 +29,8 @@ #define MLX5_CT_STATE_TRK_BIT BIT(2) #define MLX5_CT_STATE_NAT_BIT BIT(3) #define MLX5_CT_STATE_REPLY_BIT BIT(4) +#define MLX5_CT_STATE_RELATED_BIT BIT(5) +#define MLX5_CT_STATE_INVALID_BIT BIT(6) #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) @@ -186,6 +188,28 @@ mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry) } static int +mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv, + u32 *labels, u32 *id) +{ + if (!memchr_inv(labels, 0, sizeof(u32) * 4)) { + *id = 0; + return 0; + } + + if (mapping_add(ct_priv->labels_mapping, labels, id)) + return -EOPNOTSUPP; + + return 0; +} + +static void +mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id) +{ + if (id) + mapping_remove(ct_priv->labels_mapping, id); +} + +static int mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) { struct flow_match_control control; @@ -436,7 +460,7 @@ mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv, mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr); mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, zone_rule->mh); - mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); kfree(attr); } @@ -639,8 +663,8 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, if (!meta) return -EOPNOTSUPP; - err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels, - &attr->ct_attr.ct_labels_id); + err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels, + &attr->ct_attr.ct_labels_id); if (err) return -EOPNOTSUPP; if (nat) { @@ -677,7 +701,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, err_mapping: dealloc_mod_hdr_actions(&mod_acts); - mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); return err; } @@ -695,7 +719,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, zone_rule->nat = nat; - spec = kzalloc(sizeof(*spec), GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return -ENOMEM; @@ -737,7 +761,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, zone_rule->attr = attr; - kfree(spec); + kvfree(spec); ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone); return 0; @@ -745,11 +769,11 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, err_rule: mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, zone_rule->mh); - mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); err_mod_hdr: kfree(attr); err_attr: - kfree(spec); + kvfree(spec); return err; } @@ -1181,7 +1205,8 @@ int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG, &ctstate, &ctstate_mask); - if (ctstate_mask) + + if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT) return -EOPNOTSUPP; ctstate_mask |= MLX5_CT_STATE_TRK_BIT; @@ -1196,7 +1221,7 @@ void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_ if (!priv || !ct_attr->ct_labels_id) return; - mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id); + mlx5_put_label_mapping(priv, ct_attr->ct_labels_id); } int @@ -1206,8 +1231,8 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr, struct netlink_ext_ack *extack) { + bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv; struct flow_rule *rule = flow_cls_offload_flow_rule(f); - bool trk, est, untrk, unest, new, rpl, unrpl; struct flow_dissector_key_ct *mask, *key; u32 ctstate = 0, ctstate_mask = 0; u16 ct_state_on, ct_state_off; @@ -1235,7 +1260,9 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | TCA_FLOWER_KEY_CT_FLAGS_NEW | - TCA_FLOWER_KEY_CT_FLAGS_REPLY)) { + TCA_FLOWER_KEY_CT_FLAGS_REPLY | + TCA_FLOWER_KEY_CT_FLAGS_RELATED | + TCA_FLOWER_KEY_CT_FLAGS_INVALID)) { NL_SET_ERR_MSG_MOD(extack, "only ct_state trk, est, new and rpl are supported for offload"); return -EOPNOTSUPP; @@ -1247,9 +1274,13 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY; + rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED; + inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID; untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; + unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED; + uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID; ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; @@ -1257,6 +1288,20 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; + ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0; + ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0; + + if (rel) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +rel isn't supported"); + return -EOPNOTSUPP; + } + + if (inv) { + NL_SET_ERR_MSG_MOD(extack, + "matching on ct_state +inv isn't supported"); + return -EOPNOTSUPP; + } if (new) { NL_SET_ERR_MSG_MOD(extack, @@ -1279,7 +1324,7 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1]; ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2]; ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3]; - if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id)) + if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id)) return -EOPNOTSUPP; mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id, MLX5_CT_LABELS_MASK); @@ -1539,6 +1584,14 @@ mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); } +/* To avoid false lock dependency warning set the ct_entries_ht lock + * class different than the lock class of the ht being used when deleting + * last flow from a group and then deleting a group, we get into del_sw_flow_group() + * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but + * it's different than the ht->mutex here. + */ +static struct lock_class_key ct_entries_ht_lock_key; + static struct mlx5_ct_ft * mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, struct nf_flowtable *nf_ft) @@ -1573,6 +1626,8 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, if (err) goto err_init; + lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key); + err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, zone_params); if (err) @@ -1674,10 +1729,10 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft; u32 fte_id = 1; - post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL); + post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL); ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); if (!post_ct_spec || !ct_flow) { - kfree(post_ct_spec); + kvfree(post_ct_spec); kfree(ct_flow); return ERR_PTR(-ENOMEM); } @@ -1787,6 +1842,10 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, ct_flow->post_ct_attr->prio = 0; ct_flow->post_ct_attr->ft = ct_priv->post_ct; + /* Splits were handled before CT */ + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) + ct_flow->post_ct_attr->esw_attr->split_count = 0; + ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE; ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE; ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); @@ -1812,7 +1871,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, attr->ct_attr.ct_flow = ct_flow; dealloc_mod_hdr_actions(&pre_mod_acts); - kfree(post_ct_spec); + kvfree(post_ct_spec); return rule; @@ -1833,7 +1892,7 @@ err_alloc_pre: err_idr: mlx5_tc_ct_del_ft_cb(ct_priv, ft); err_ft: - kfree(post_ct_spec); + kvfree(post_ct_spec); kfree(ct_flow); netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); return ERR_PTR(err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index c223591ffc22..d1599b7b944b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -27,6 +27,7 @@ enum { MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8, MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9, MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10, + MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 11, }; struct mlx5e_tc_flow_parse_attr { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index f8075a604605..172e0474f2e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -685,14 +685,14 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, u16 vport_num; int err = 0; - if (flow_attr->ip_version == 4) { + if (flow_attr->tun_ip_version == 4) { /* Addresses are swapped for decap */ attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4; attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4; err = mlx5e_route_lookup_ipv4_get(priv, priv->netdev, &attr); } #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) - else if (flow_attr->ip_version == 6) { + else if (flow_attr->tun_ip_version == 6) { /* Addresses are swapped for decap */ attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6; attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6; @@ -718,10 +718,10 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, esw_attr->rx_tun_attr->decap_vport = vport_num; out: - if (flow_attr->ip_version == 4) + if (flow_attr->tun_ip_version == 4) mlx5e_route_lookup_ipv4_put(&attr); #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) - else if (flow_attr->ip_version == 6) + else if (flow_attr->tun_ip_version == 6) mlx5e_route_lookup_ipv6_put(&attr); #endif return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index 67de2bf36861..9350ca05ce65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -21,6 +21,11 @@ enum { MLX5E_TC_TUNNEL_TYPE_MPLSOUDP, }; +struct mlx5e_encap_key { + const struct ip_tunnel_key *ip_tun_key; + struct mlx5e_tc_tunnel *tc_tunnel; +}; + struct mlx5e_tc_tunnel { int tunnel_type; enum mlx5_flow_match_level match_level; @@ -44,6 +49,8 @@ struct mlx5e_tc_tunnel { struct flow_cls_offload *f, void *headers_c, void *headers_v); + bool (*encap_info_equal)(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b); }; extern struct mlx5e_tc_tunnel vxlan_tunnel; @@ -76,10 +83,12 @@ int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, static inline int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; } -int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, - struct net_device *mirred_dev, - struct mlx5e_encap_entry *e) + struct mlx5e_encap_entry *e) +{ return -EOPNOTSUPP; } +static inline int +mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv, + struct net_device *mirred_dev, + struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; } #endif int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, @@ -101,6 +110,9 @@ int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, void *headers_c, void *headers_v); +bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b); + #endif /* CONFIG_MLX5_ESWITCH */ #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 6a116335bb21..593503bc4d07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021 Mellanox Technologies. */ #include <net/fib_notifier.h> +#include <net/nexthop.h> #include "tc_tun_encap.h" #include "en_tc.h" #include "tc_tun.h" @@ -89,6 +90,7 @@ int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, * required to establish routing. */ flow_flag_set(flow, TUN_RX); + flow->attr->tun_ip_version = ip_version; return 0; } @@ -475,16 +477,11 @@ void mlx5e_detach_decap(struct mlx5e_priv *priv, mlx5e_decap_dealloc(priv, d); } -struct encap_key { - const struct ip_tunnel_key *ip_tun_key; - struct mlx5e_tc_tunnel *tc_tunnel; -}; - -static int cmp_encap_info(struct encap_key *a, - struct encap_key *b) +bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) { - return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || - a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; + return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 && + a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type; } static int cmp_decap_info(struct mlx5e_decap_key *a, @@ -493,7 +490,7 @@ static int cmp_decap_info(struct mlx5e_decap_key *a, return memcmp(&a->key, &b->key, sizeof(b->key)); } -static int hash_encap_info(struct encap_key *key) +static int hash_encap_info(struct mlx5e_encap_key *key) { return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), key->tc_tunnel->tunnel_type); @@ -515,18 +512,18 @@ static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) } static struct mlx5e_encap_entry * -mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, +mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key, uintptr_t hash_key) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_key e_key; struct mlx5e_encap_entry *e; - struct encap_key e_key; hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { e_key.ip_tun_key = &e->tun_info->key; e_key.tc_tunnel = e->tunnel; - if (!cmp_encap_info(&e_key, key) && + if (e->tunnel->encap_info_equal(&e_key, key) && mlx5e_encap_take(e)) return e; } @@ -693,8 +690,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr = flow->attr; const struct ip_tunnel_info *tun_info; unsigned long tbl_time_before = 0; - struct encap_key key; struct mlx5e_encap_entry *e; + struct mlx5e_encap_key key; bool entry_created = false; unsigned short family; uintptr_t hash_key; @@ -1091,7 +1088,7 @@ int mlx5e_attach_decap_route(struct mlx5e_priv *priv, if (err || !esw_attr->rx_tun_attr->decap_vport) goto out; - key.ip_version = attr->ip_version; + key.ip_version = attr->tun_ip_version; if (key.ip_version == 4) key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c index e472ed0eacfb..f5b26f5a7de4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -227,6 +227,10 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, option_key = (struct geneve_opt *)&enc_opts.key->data[0]; option_mask = (struct geneve_opt *)&enc_opts.mask->data[0]; + if (option_mask->opt_class == 0 && option_mask->type == 0 && + !memchr_inv(option_mask->opt_data, 0, option_mask->length * 4)) + return 0; + if (option_key->length > max_tlv_option_data_len) { NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options: unsupported option len"); @@ -325,6 +329,34 @@ static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv, return mlx5e_tc_tun_parse_geneve_options(priv, spec, f); } +static bool mlx5e_tc_tun_encap_info_equal_geneve(struct mlx5e_encap_key *a, + struct mlx5e_encap_key *b) +{ + struct ip_tunnel_info *a_info; + struct ip_tunnel_info *b_info; + bool a_has_opts, b_has_opts; + + if (!mlx5e_tc_tun_encap_info_equal_generic(a, b)) + return false; + + a_has_opts = !!(a->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT); + b_has_opts = !!(b->ip_tun_key->tun_flags & TUNNEL_GENEVE_OPT); + + /* keys are equal when both don't have any options attached */ + if (!a_has_opts && !b_has_opts) + return true; + + if (a_has_opts != b_has_opts) + return false; + + /* geneve options stored in memory next to ip_tunnel_info struct */ + a_info = container_of(a->ip_tun_key, struct ip_tunnel_info, key); + b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key); + + return a_info->options_len == b_info->options_len && + memcmp(a_info + 1, b_info + 1, a_info->options_len) == 0; +} + struct mlx5e_tc_tunnel geneve_tunnel = { .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE, .match_level = MLX5_MATCH_L4, @@ -334,4 +366,5 @@ struct mlx5e_tc_tunnel geneve_tunnel = { .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve, .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve, .parse_tunnel = mlx5e_tc_tun_parse_geneve, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_geneve, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c index 2805416c32a3..ada14f0574dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c @@ -94,4 +94,5 @@ struct mlx5e_tc_tunnel gre_tunnel = { .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap, .parse_udp_ports = NULL, .parse_tunnel = mlx5e_tc_tun_parse_gretap, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c index 3479672e84cf..60952b33b568 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c @@ -131,4 +131,5 @@ struct mlx5e_tc_tunnel mplsoudp_tunnel = { .generate_ip_tun_hdr = generate_ip_tun_hdr, .parse_udp_ports = parse_udp_ports, .parse_tunnel = parse_tunnel, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index 038a0f1cecec..4267f3a1059e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -150,4 +150,5 @@ struct mlx5e_tc_tunnel vxlan_tunnel = { .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan, .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan, .parse_tunnel = mlx5e_tc_tun_parse_vxlan, + .encap_info_equal = mlx5e_tc_tun_encap_info_equal_generic, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 37fc1d77ded7..86ab4e864fe6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -30,172 +30,62 @@ static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget) return work_done; } -static int mlx5e_alloc_trap_rq(struct mlx5e_priv *priv, struct mlx5e_rq_param *rqp, - struct mlx5e_rq_stats *stats, struct mlx5e_params *params, - struct mlx5e_ch_stats *ch_stats, +static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params, struct mlx5e_rq *rq) { - void *rqc_wq = MLX5_ADDR_OF(rqc, rqp->rqc, wq); - struct mlx5_core_dev *mdev = priv->mdev; - struct page_pool_params pp_params = {}; - int node = dev_to_node(mdev->device); - u32 pool_size; - int wq_sz; - int err; - int i; - - rqp->wq.db_numa_node = node; - - rq->wq_type = params->rq_wq_type; - rq->pdev = mdev->device; - rq->netdev = priv->netdev; - rq->mdev = mdev; - rq->priv = priv; - rq->stats = stats; - rq->clock = &mdev->clock; - rq->tstamp = &priv->tstamp; - rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - + struct mlx5_core_dev *mdev = t->mdev; + struct mlx5e_priv *priv = t->priv; + + rq->wq_type = params->rq_wq_type; + rq->pdev = mdev->device; + rq->netdev = priv->netdev; + rq->priv = priv; + rq->clock = &mdev->clock; + rq->tstamp = &priv->tstamp; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->stats = &priv->trap_stats.rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); xdp_rxq_info_unused(&rq->xdp_rxq); - - rq->buff.map_dir = DMA_FROM_DEVICE; - rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, NULL); - pool_size = 1 << params->log_rq_mtu_frames; - - err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl); - if (err) - return err; - - rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; - - wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); - - rq->wqe.info = rqp->frags_info; - rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride; - rq->wqe.frags = kvzalloc_node(array_size(sizeof(*rq->wqe.frags), - (wq_sz << rq->wqe.info.log_num_frags)), - GFP_KERNEL, node); - if (!rq->wqe.frags) { - err = -ENOMEM; - goto err_wq_cyc_destroy; - } - - err = mlx5e_init_di_list(rq, wq_sz, node); - if (err) - goto err_free_frags; - - rq->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); - mlx5e_rq_set_trap_handlers(rq, params); - - /* Create a page_pool and register it with rxq */ - pp_params.order = 0; - pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ - pp_params.pool_size = pool_size; - pp_params.nid = node; - pp_params.dev = mdev->device; - pp_params.dma_dir = rq->buff.map_dir; - - /* page_pool can be used even when there is no rq->xdp_prog, - * given page_pool does not handle DMA mapping there is no - * required state to clear. And page_pool gracefully handle - * elevated refcnt. - */ - rq->page_pool = page_pool_create(&pp_params); - if (IS_ERR(rq->page_pool)) { - err = PTR_ERR(rq->page_pool); - rq->page_pool = NULL; - goto err_free_di_list; - } - for (i = 0; i < wq_sz; i++) { - struct mlx5e_rx_wqe_cyc *wqe = - mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i); - int f; - - for (f = 0; f < rq->wqe.info.num_frags; f++) { - u32 frag_size = rq->wqe.info.arr[f].frag_size | - MLX5_HW_START_PADDING; - - wqe->data[f].byte_count = cpu_to_be32(frag_size); - wqe->data[f].lkey = rq->mkey_be; - } - /* check if num_frags is not a pow of two */ - if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) { - wqe->data[f].byte_count = 0; - wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY); - wqe->data[f].addr = 0; - } - } - return 0; - -err_free_di_list: - mlx5e_free_di_list(rq); -err_free_frags: - kvfree(rq->wqe.frags); -err_wq_cyc_destroy: - mlx5_wq_destroy(&rq->wq_ctrl); - - return err; } -static void mlx5e_free_trap_rq(struct mlx5e_rq *rq) -{ - page_pool_destroy(rq->page_pool); - mlx5e_free_di_list(rq); - kvfree(rq->wqe.frags); - mlx5_wq_destroy(&rq->wq_ctrl); -} - -static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct napi_struct *napi, - struct mlx5e_rq_stats *stats, struct mlx5e_params *params, - struct mlx5e_rq_param *rq_param, - struct mlx5e_ch_stats *ch_stats, - struct mlx5e_rq *rq) +static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t) { + struct mlx5e_rq_param *rq_param = &t->rq_param; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_create_cq_param ccp = {}; struct dim_cq_moder trap_moder = {}; - struct mlx5e_cq *cq = &rq->cq; + struct mlx5e_rq *rq = &t->rq; + int node; int err; - ccp.node = dev_to_node(mdev->device); - ccp.ch_stats = ch_stats; - ccp.napi = napi; + node = dev_to_node(mdev->device); + + ccp.node = node; + ccp.ch_stats = t->stats; + ccp.napi = &t->napi; ccp.ix = 0; - err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, cq); + err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, &rq->cq); if (err) return err; - err = mlx5e_alloc_trap_rq(priv, rq_param, stats, params, ch_stats, rq); + mlx5e_init_trap_rq(t, &t->params, rq); + err = mlx5e_open_rq(&t->params, rq_param, NULL, node, rq); if (err) goto err_destroy_cq; - err = mlx5e_create_rq(rq, rq_param); - if (err) - goto err_free_rq; - - err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); - if (err) - goto err_destroy_rq; - return 0; -err_destroy_rq: - mlx5e_destroy_rq(rq); - mlx5e_free_rx_descs(rq); -err_free_rq: - mlx5e_free_trap_rq(rq); err_destroy_cq: - mlx5e_close_cq(cq); + mlx5e_close_cq(&rq->cq); return err; } static void mlx5e_close_trap_rq(struct mlx5e_rq *rq) { - mlx5e_destroy_rq(rq); - mlx5e_free_rx_descs(rq); - mlx5e_free_trap_rq(rq); + mlx5e_close_rq(rq); mlx5e_close_cq(&rq->cq); } @@ -213,7 +103,7 @@ static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct ml return -ENOMEM; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.td.tdn); + MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); MLX5_SET(tirc, tirc, inline_rqn, rqn); @@ -228,24 +118,16 @@ static void mlx5e_destroy_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_destroy_tir(mdev, tir); } -static void mlx5e_activate_trap_rq(struct mlx5e_rq *rq) -{ - set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); -} - -static void mlx5e_deactivate_trap_rq(struct mlx5e_rq *rq) -{ - clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); -} - -static void mlx5e_build_trap_params(struct mlx5e_priv *priv, struct mlx5e_trap *t) +static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev, + int max_mtu, u16 q_counter, + struct mlx5e_trap *t) { struct mlx5e_params *params = &t->params; params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; - mlx5e_init_rq_type_params(priv->mdev, params); - params->sw_mtu = priv->netdev->max_mtu; - mlx5e_build_rq_param(priv, params, NULL, &t->rq_param); + mlx5e_init_rq_type_params(mdev, params); + params->sw_mtu = max_mtu; + mlx5e_build_rq_param(mdev, params, NULL, q_counter, &t->rq_param); } static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) @@ -259,23 +141,19 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) if (!t) return ERR_PTR(-ENOMEM); - mlx5e_build_trap_params(priv, t); + mlx5e_build_trap_params(priv->mdev, netdev->max_mtu, priv->q_counter, t); t->priv = priv; t->mdev = priv->mdev; t->tstamp = &priv->tstamp; t->pdev = mlx5_core_dma_dev(priv->mdev); t->netdev = priv->netdev; - t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key); t->stats = &priv->trap_stats.ch; netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64); - err = mlx5e_open_trap_rq(priv, &t->napi, - &priv->trap_stats.rq, - &t->params, &t->rq_param, - &priv->trap_stats.ch, - &t->rq); + err = mlx5e_open_trap_rq(priv, t); if (unlikely(err)) goto err_napi_del; @@ -304,15 +182,14 @@ void mlx5e_close_trap(struct mlx5e_trap *trap) static void mlx5e_activate_trap(struct mlx5e_trap *trap) { napi_enable(&trap->napi); - mlx5e_activate_trap_rq(&trap->rq); - napi_schedule(&trap->napi); + mlx5e_activate_rq(&trap->rq); } void mlx5e_deactivate_trap(struct mlx5e_priv *priv) { struct mlx5e_trap *trap = priv->en_trap; - mlx5e_deactivate_trap_rq(&trap->rq); + mlx5e_deactivate_rq(&trap->rq); napi_disable(&trap->napi); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 2371b83dad9c..055c3bc23733 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -441,4 +441,10 @@ static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size) return wqe_size * 2 - 1; } +static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size) +{ + u16 room = sq->reserved_room + mlx5e_stop_room_for_wqe(wqe_size); + + return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room); +} #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 2e3e78b0f333..2f0df5cc1a2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -500,7 +500,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_xdpsq *sq; - int drops = 0; + int nxmit = 0; int sq_num; int i; @@ -529,11 +529,8 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) { - xdp_return_frame_rx_napi(xdpf); - drops++; - continue; - } + if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) + break; xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; xdpi.frame.xdpf = xdpf; @@ -544,9 +541,9 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, if (unlikely(!ret)) { dma_unmap_single(sq->pdev, xdptxd.dma_addr, xdptxd.len, DMA_TO_DEVICE); - xdp_return_frame_rx_napi(xdpf); - drops++; + break; } + nxmit++; } if (flags & XDP_XMIT_FLUSH) { @@ -555,7 +552,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, mlx5e_xmit_xdp_doorbell(sq); } - return n - drops; + return nxmit; } void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index f4bce1365639..a8315f166696 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -35,13 +35,59 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, } } -static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv, +static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, + u16 q_counter, struct mlx5e_channel_param *cparam) { - mlx5e_build_rq_param(priv, params, xsk, &cparam->rq); - mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); + mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq); + mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); +} + +static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct xsk_buff_pool *pool, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = c->mdev; + int rq_xdp_ix; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->priv = c->priv; + rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; + rq->icosq = &c->icosq; + rq->ix = c->ix; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->xdpsq = &c->rq_xdpsq; + rq->xsk_pool = pool; + rq->stats = &c->priv->channel_stats[c->ix].xskrq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + rq_xdp_ix = c->ix + params->num_channels * MLX5E_RQ_GROUP_XSK; + err = mlx5e_rq_set_handlers(rq, params, xsk); + if (err) + return err; + + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0); +} + +static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool, + struct mlx5e_xsk_param *xsk) +{ + int err; + + err = mlx5e_init_xsk_rq(c, params, pool, xsk, &c->xskrq); + if (err) + return err; + + return mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), &c->xskrq); } int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, @@ -61,14 +107,14 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, if (!cparam) return -ENOMEM; - mlx5e_build_xsk_cparam(priv, params, xsk, cparam); + mlx5e_build_xsk_cparam(priv->mdev, params, xsk, priv->q_counter, cparam); err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, &c->xskrq.cq); if (unlikely(err)) goto err_free_cparam; - err = mlx5e_open_rq(c, params, &cparam->rq, xsk, pool, &c->xskrq); + err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk); if (unlikely(err)) goto err_close_rx_cq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index cc0efac7b812..00af0b831a28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -123,11 +123,10 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, mlx5e_udp_gso_handle_tx_skb(skb); #ifdef CONFIG_MLX5_EN_TLS - if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) { - /* May send SKBs and WQEs. */ + /* May send SKBs and WQEs. */ + if (mlx5e_tls_skb_offloaded(skb)) if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &state->tls))) return false; - } #endif #ifdef CONFIG_MLX5_EN_IPSEC @@ -186,7 +185,7 @@ static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq, struct mlx5_wqe_inline_seg *inlseg) { #ifdef CONFIG_MLX5_EN_TLS - mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls); + mlx5e_tls_handle_tx_wqe(&wqe->ctrl, &state->tls); #endif #ifdef CONFIG_MLX5_EN_IPSEC diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 381a9c8c9da9..34119ce92031 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -60,7 +60,7 @@ static int rx_err_add_rule(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec; int err = 0; - spec = kzalloc(sizeof(*spec), GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return -ENOMEM; @@ -101,7 +101,7 @@ out: if (err) mlx5_modify_header_dealloc(mdev, modify_hdr); out_spec: - kfree(spec); + kvfree(spec); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index baa58b62e8df..aaa579bf9a39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -12,6 +12,9 @@ void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv); int mlx5e_ktls_init_rx(struct mlx5e_priv *priv); void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv); int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable); +struct mlx5e_ktls_resync_resp * +mlx5e_ktls_rx_resync_create_resp_list(void); +void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list); #else static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) @@ -33,6 +36,14 @@ static inline int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enab return -EOPNOTSUPP; } +static inline struct mlx5e_ktls_resync_resp * +mlx5e_ktls_rx_resync_create_resp_list(void) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void +mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) {} #endif #endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index d06532d0baa4..4e58fade7a60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -46,7 +46,8 @@ struct mlx5e_ktls_offload_context_rx { struct tls12_crypto_info_aes_gcm_128 crypto_info; struct accel_rule rule; struct sock *sk; - struct mlx5e_rq_stats *stats; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_tls_sw_stats *sw_stats; struct completion add_ctx; u32 tirn; u32 key_id; @@ -55,6 +56,7 @@ struct mlx5e_ktls_offload_context_rx { /* resync */ struct mlx5e_ktls_rx_resync_ctx resync; + struct list_head list; }; static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx) @@ -71,6 +73,32 @@ static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx refcount_inc(&priv_rx->resync.refcnt); } +struct mlx5e_ktls_resync_resp { + /* protects list changes */ + spinlock_t lock; + struct list_head list; +}; + +void mlx5e_ktls_rx_resync_destroy_resp_list(struct mlx5e_ktls_resync_resp *resp_list) +{ + kvfree(resp_list); +} + +struct mlx5e_ktls_resync_resp * +mlx5e_ktls_rx_resync_create_resp_list(void) +{ + struct mlx5e_ktls_resync_resp *resp_list; + + resp_list = kvzalloc(sizeof(*resp_list), GFP_KERNEL); + if (!resp_list) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&resp_list->list); + spin_lock_init(&resp_list->lock); + + return resp_list; +} + static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn) { int err, inlen; @@ -84,7 +112,7 @@ static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.td.tdn); + MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); MLX5_SET(tirc, tirc, indirect_table, rqtn); @@ -118,8 +146,7 @@ out: complete(&priv_rx->add_ctx); } -static void accel_rule_init(struct accel_rule *rule, struct mlx5e_priv *priv, - struct sock *sk) +static void accel_rule_init(struct accel_rule *rule, struct mlx5e_priv *priv) { INIT_WORK(&rule->work, accel_rule_handle_work); rule->priv = priv; @@ -137,11 +164,10 @@ post_static_params(struct mlx5e_icosq *sq, { struct mlx5e_set_tls_static_params_wqe *wqe; struct mlx5e_icosq_wqe_info wi; - u16 pi, num_wqebbs, room; + u16 pi, num_wqebbs; num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS; - room = mlx5e_stop_room_for_wqe(num_wqebbs); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room))) + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs))) return ERR_PTR(-ENOSPC); pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); @@ -168,11 +194,10 @@ post_progress_params(struct mlx5e_icosq *sq, { struct mlx5e_set_tls_progress_params_wqe *wqe; struct mlx5e_icosq_wqe_info wi; - u16 pi, num_wqebbs, room; + u16 pi, num_wqebbs; num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS; - room = mlx5e_stop_room_for_wqe(num_wqebbs); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room))) + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs))) return ERR_PTR(-ENOSPC); pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs); @@ -218,7 +243,7 @@ unlock: return err; err_out: - priv_rx->stats->tls_resync_req_skip++; + priv_rx->rq_stats->tls_resync_req_skip++; err = PTR_ERR(cseg); complete(&priv_rx->add_ctx); goto unlock; @@ -277,17 +302,15 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, buf->priv_rx = priv_rx; - BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1); - spin_lock_bh(&sq->channel->async_icosq_lock); - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { + if (unlikely(!mlx5e_icosq_can_post_wqe(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS))) { spin_unlock_bh(&sq->channel->async_icosq_lock); err = -ENOSPC; goto err_dma_unmap; } - pi = mlx5e_icosq_get_next_pi(sq, 1); + pi = mlx5e_icosq_get_next_pi(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS); wqe = MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi); #define GET_PSV_DS_CNT (DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS)) @@ -307,7 +330,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, wi = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_GET_PSV_TLS, - .num_wqebbs = 1, + .num_wqebbs = MLX5E_KTLS_GET_PROGRESS_WQEBBS, .tls_get_params.buf = buf, }; icosq_fill_wi(sq, pi, &wi); @@ -322,7 +345,7 @@ err_dma_unmap: err_free: kfree(buf); err_out: - priv_rx->stats->tls_resync_req_skip++; + priv_rx->rq_stats->tls_resync_req_skip++; return err; } @@ -362,33 +385,32 @@ static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, /* Function can be called with the refcount being either elevated or not. * It does not affect the refcount. */ -static int resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx, - struct mlx5e_channel *c) +static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx, + struct mlx5e_channel *c) { struct tls12_crypto_info_aes_gcm_128 *info = &priv_rx->crypto_info; - struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5e_ktls_resync_resp *ktls_resync; struct mlx5e_icosq *sq; - int err; + bool trigger_poll; memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq)); - err = 0; sq = &c->async_icosq; - spin_lock_bh(&c->async_icosq_lock); + ktls_resync = sq->ktls_resync; - cseg = post_static_params(sq, priv_rx); - if (IS_ERR(cseg)) { - priv_rx->stats->tls_resync_res_skip++; - err = PTR_ERR(cseg); - goto unlock; - } - /* Do not increment priv_rx refcnt, CQE handling is empty */ - mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); - priv_rx->stats->tls_resync_res_ok++; -unlock: - spin_unlock_bh(&c->async_icosq_lock); + spin_lock_bh(&ktls_resync->lock); + list_add_tail(&priv_rx->list, &ktls_resync->list); + trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + spin_unlock_bh(&ktls_resync->lock); - return err; + if (!trigger_poll) + return; + + if (!napi_if_scheduled_mark_missed(&c->napi)) { + spin_lock_bh(&c->async_icosq_lock); + mlx5e_trigger_irq(sq); + spin_unlock_bh(&c->async_icosq_lock); + } } /* Function can be called with the refcount being either elevated or not. @@ -420,13 +442,13 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, auth_state = MLX5_GET(tls_progress_params, ctx, auth_state); if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING || auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) { - priv_rx->stats->tls_resync_req_skip++; + priv_rx->rq_stats->tls_resync_req_skip++; goto out; } hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); - priv_rx->stats->tls_resync_req_end++; + priv_rx->rq_stats->tls_resync_req_end++; out: mlx5e_ktls_priv_rx_put(priv_rx); dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); @@ -609,7 +631,8 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, priv_rx->rxq = rxq; priv_rx->sk = sk; - priv_rx->stats = &priv->channel_stats[rxq].rq; + priv_rx->rq_stats = &priv->channel_stats[rxq].rq; + priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); rqtn = priv->direct_tir[rxq].rqt.rqtn; @@ -620,7 +643,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, init_completion(&priv_rx->add_ctx); - accel_rule_init(&priv_rx->rule, priv, sk); + accel_rule_init(&priv_rx->rule, priv); resync = &priv_rx->resync; resync_init(resync, priv); tls_offload_ctx_rx(tls_ctx)->resync_async = &resync->core; @@ -630,7 +653,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, if (err) goto err_post_wqes; - priv_rx->stats->tls_ctx++; + atomic64_inc(&priv_rx->sw_stats->rx_tls_ctx); return 0; @@ -666,7 +689,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) if (cancel_work_sync(&resync->work)) mlx5e_ktls_priv_rx_put(priv_rx); - priv_rx->stats->tls_del++; + atomic64_inc(&priv_rx->sw_stats->rx_tls_del); if (priv_rx->rule.rule) mlx5e_accel_fs_del_sk(priv_rx->rule.rule); @@ -678,3 +701,65 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) */ mlx5e_ktls_priv_rx_put(priv_rx); } + +bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx, *tmp; + struct mlx5e_ktls_resync_resp *ktls_resync; + struct mlx5_wqe_ctrl_seg *db_cseg; + struct mlx5e_icosq *sq; + LIST_HEAD(local_list); + int i, j; + + sq = &c->async_icosq; + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + ktls_resync = sq->ktls_resync; + db_cseg = NULL; + i = 0; + + spin_lock(&ktls_resync->lock); + list_for_each_entry_safe(priv_rx, tmp, &ktls_resync->list, list) { + list_move(&priv_rx->list, &local_list); + if (++i == budget) + break; + } + if (list_empty(&ktls_resync->list)) + clear_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + spin_unlock(&ktls_resync->lock); + + spin_lock(&c->async_icosq_lock); + for (j = 0; j < i; j++) { + struct mlx5_wqe_ctrl_seg *cseg; + + priv_rx = list_first_entry(&local_list, + struct mlx5e_ktls_offload_context_rx, + list); + cseg = post_static_params(sq, priv_rx); + if (IS_ERR(cseg)) + break; + list_del(&priv_rx->list); + db_cseg = cseg; + } + if (db_cseg) + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, db_cseg); + spin_unlock(&c->async_icosq_lock); + + priv_rx->rq_stats->tls_resync_res_ok += j; + + if (!list_empty(&local_list)) { + /* This happens only if ICOSQ is full. + * There is no need to mark busy or explicitly ask for a NAPI cycle, + * it will be triggered by the outstanding ICOSQ completions. + */ + spin_lock(&ktls_resync->lock); + list_splice(&local_list, &ktls_resync->list); + set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state); + spin_unlock(&ktls_resync->lock); + priv_rx->rq_stats->tls_resync_res_retry++; + } + + return i == budget; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index d16def68ecff..51bdf71073f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2019 Mellanox Technologies. +#include "en_accel/tls.h" #include "en_accel/ktls_txrx.h" #include "en_accel/ktls_utils.h" @@ -50,6 +51,7 @@ static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) struct mlx5e_ktls_offload_context_tx { struct tls_offload_context_tx *tx_ctx; struct tls12_crypto_info_aes_gcm_128 crypto_info; + struct mlx5e_tls_sw_stats *sw_stats; u32 expected_seq; u32 tisn; u32 key_id; @@ -99,6 +101,7 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, if (err) goto err_create_key; + priv_tx->sw_stats = &priv->tls->sw_stats; priv_tx->expected_seq = start_offload_tcp_sn; priv_tx->crypto_info = *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; @@ -111,6 +114,7 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, goto err_create_tis; priv_tx->ctx_post_pending = true; + atomic64_inc(&priv_tx->sw_stats->tx_tls_ctx); return 0; @@ -452,7 +456,6 @@ bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *s if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) { mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false); - stats->tls_ctx++; } seq = ntohl(tcp_hdr(skb)->seq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index ee04e916fa21..8f79335057dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -40,6 +40,14 @@ mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, } return false; } + +bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget); + +static inline bool +mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) +{ + return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state); +} #else static inline bool mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, @@ -49,6 +57,18 @@ mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, return false; } +static inline bool +mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget) +{ + return false; +} + +static inline bool +mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) +{ + return false; +} + #endif /* CONFIG_MLX5_EN_TLS */ #endif /* __MLX5E_TLS_TXRX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h index bd270a85c804..4c9274d390da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h @@ -41,10 +41,13 @@ #include "en.h" struct mlx5e_tls_sw_stats { + atomic64_t tx_tls_ctx; atomic64_t tx_tls_drop_metadata; atomic64_t tx_tls_drop_resync_alloc; atomic64_t tx_tls_drop_no_sync_data; atomic64_t tx_tls_drop_bypass_required; + atomic64_t rx_tls_ctx; + atomic64_t rx_tls_del; atomic64_t rx_tls_drop_resync_request; atomic64_t rx_tls_resync_request; atomic64_t rx_tls_resync_reply; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index 2b51d3222ca1..82dc09aaa7fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -263,9 +263,6 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, int datalen; u32 skb_seq; - if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) - return true; - datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); if (!datalen) return true; @@ -301,12 +298,6 @@ err_out: return false; } -void mlx5e_tls_handle_tx_wqe(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg, - struct mlx5e_accel_tx_tls_state *state) -{ - cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8); -} - static int tls_update_resync_sn(struct net_device *netdev, struct sk_buff *skb, struct mlx5e_tls_metadata *mdata) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h index 9923132c9440..0ca0a023fb8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h @@ -47,8 +47,18 @@ u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state); -void mlx5e_tls_handle_tx_wqe(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg, - struct mlx5e_accel_tx_tls_state *state); + +static inline bool mlx5e_tls_skb_offloaded(struct sk_buff *skb) +{ + return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk); +} + +static inline void +mlx5e_tls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg, + struct mlx5e_accel_tx_tls_state *state) +{ + cseg->tis_tir_num = cpu_to_be32(state->tls_tisn << 8); +} void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb, u32 *cqe_bcnt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c index b949b9a7538b..29463bdb7715 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c @@ -45,49 +45,60 @@ static const struct counter_desc mlx5e_tls_sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_drop_bypass_required) }, }; +static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_ctx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_del) }, +}; + #define MLX5E_READ_CTR_ATOMIC64(ptr, dsc, i) \ atomic64_read((atomic64_t *)((char *)(ptr) + (dsc)[i].offset)) -#define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc) - -static bool is_tls_atomic_stats(struct mlx5e_priv *priv) +static const struct counter_desc *get_tls_atomic_stats(struct mlx5e_priv *priv) { - return priv->tls && !mlx5_accel_is_ktls_device(priv->mdev); + if (!priv->tls) + return NULL; + if (mlx5_accel_is_ktls_device(priv->mdev)) + return mlx5e_ktls_sw_stats_desc; + return mlx5e_tls_sw_stats_desc; } int mlx5e_tls_get_count(struct mlx5e_priv *priv) { - if (!is_tls_atomic_stats(priv)) + if (!priv->tls) return 0; - - return NUM_TLS_SW_COUNTERS; + if (mlx5_accel_is_ktls_device(priv->mdev)) + return ARRAY_SIZE(mlx5e_ktls_sw_stats_desc); + return ARRAY_SIZE(mlx5e_tls_sw_stats_desc); } int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { - unsigned int i, idx = 0; + const struct counter_desc *stats_desc; + unsigned int i, n, idx = 0; - if (!is_tls_atomic_stats(priv)) - return 0; + stats_desc = get_tls_atomic_stats(priv); + n = mlx5e_tls_get_count(priv); - for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) + for (i = 0; i < n; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - mlx5e_tls_sw_stats_desc[i].format); + stats_desc[i].format); - return NUM_TLS_SW_COUNTERS; + return n; } int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { - int i, idx = 0; + const struct counter_desc *stats_desc; + unsigned int i, n, idx = 0; - if (!is_tls_atomic_stats(priv)) - return 0; + stats_desc = get_tls_atomic_stats(priv); + n = mlx5e_tls_get_count(priv); - for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) + for (i = 0; i < n; i++) data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->tls->sw_stats, - mlx5e_tls_sw_stats_desc, i); + stats_desc, i); - return NUM_TLS_SW_COUNTERS; + return n; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 39475f6565c7..5cd466ec6492 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -36,6 +36,32 @@ #include <linux/ipv6.h> #include "en.h" +#define ARFS_HASH_SHIFT BITS_PER_BYTE +#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE) + +struct arfs_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_handle *default_rule; + struct hlist_head rules_hash[ARFS_HASH_SIZE]; +}; + +enum arfs_type { + ARFS_IPV4_TCP, + ARFS_IPV6_TCP, + ARFS_IPV4_UDP, + ARFS_IPV6_UDP, + ARFS_NUM_TYPES, +}; + +struct mlx5e_arfs_tables { + struct arfs_table arfs_tables[ARFS_NUM_TYPES]; + /* Protect aRFS rules list */ + spinlock_t arfs_lock; + struct list_head rules; + int last_filter_id; + struct workqueue_struct *wq; +}; + struct arfs_tuple { __be16 etype; u8 ip_proto; @@ -121,7 +147,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv) dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; for (i = 0; i < ARFS_NUM_TYPES; i++) { - dest.ft = priv->fs.arfs.arfs_tables[i].ft.t; + dest.ft = priv->fs.arfs->arfs_tables[i].ft.t; /* Modify ttc rules destination to point on the aRFS FTs */ err = mlx5e_ttc_fwd_dest(priv, arfs_get_tt(i), &dest); if (err) { @@ -141,25 +167,31 @@ static void arfs_destroy_table(struct arfs_table *arfs_t) mlx5e_destroy_flow_table(&arfs_t->ft); } -void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) +static void _mlx5e_cleanup_tables(struct mlx5e_priv *priv) { int i; - if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) - return; - arfs_del_rules(priv); - destroy_workqueue(priv->fs.arfs.wq); + destroy_workqueue(priv->fs.arfs->wq); for (i = 0; i < ARFS_NUM_TYPES; i++) { - if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t)) - arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]); + if (!IS_ERR_OR_NULL(priv->fs.arfs->arfs_tables[i].ft.t)) + arfs_destroy_table(&priv->fs.arfs->arfs_tables[i]); } } +void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) +{ + if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) + return; + + _mlx5e_cleanup_tables(priv); + kvfree(priv->fs.arfs); +} + static int arfs_add_default_rule(struct mlx5e_priv *priv, enum arfs_type type) { - struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; + struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type]; struct mlx5e_tir *tir = priv->indir_tir; struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); @@ -290,7 +322,7 @@ out: static int arfs_create_table(struct mlx5e_priv *priv, enum arfs_type type) { - struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct mlx5e_arfs_tables *arfs = priv->fs.arfs; struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft; struct mlx5_flow_table_attr ft_attr = {}; int err; @@ -330,20 +362,27 @@ int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) return 0; - spin_lock_init(&priv->fs.arfs.arfs_lock); - INIT_LIST_HEAD(&priv->fs.arfs.rules); - priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs"); - if (!priv->fs.arfs.wq) + priv->fs.arfs = kvzalloc(sizeof(*priv->fs.arfs), GFP_KERNEL); + if (!priv->fs.arfs) return -ENOMEM; + spin_lock_init(&priv->fs.arfs->arfs_lock); + INIT_LIST_HEAD(&priv->fs.arfs->rules); + priv->fs.arfs->wq = create_singlethread_workqueue("mlx5e_arfs"); + if (!priv->fs.arfs->wq) + goto err; + for (i = 0; i < ARFS_NUM_TYPES; i++) { err = arfs_create_table(priv, i); if (err) - goto err; + goto err_des; } return 0; + +err_des: + _mlx5e_cleanup_tables(priv); err: - mlx5e_arfs_destroy_tables(priv); + kvfree(priv->fs.arfs); return err; } @@ -353,13 +392,13 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv) { struct arfs_rule *arfs_rule; struct hlist_node *htmp; + HLIST_HEAD(del_list); int quota = 0; int i; int j; - HLIST_HEAD(del_list); - spin_lock_bh(&priv->fs.arfs.arfs_lock); - mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) { + spin_lock_bh(&priv->fs.arfs->arfs_lock); + mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs->arfs_tables, i, j) { if (!work_pending(&arfs_rule->arfs_work) && rps_may_expire_flow(priv->netdev, arfs_rule->rxq, arfs_rule->flow_id, @@ -370,7 +409,7 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv) break; } } - spin_unlock_bh(&priv->fs.arfs.arfs_lock); + spin_unlock_bh(&priv->fs.arfs->arfs_lock); hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) { if (arfs_rule->rule) mlx5_del_flow_rules(arfs_rule->rule); @@ -383,16 +422,16 @@ static void arfs_del_rules(struct mlx5e_priv *priv) { struct hlist_node *htmp; struct arfs_rule *rule; + HLIST_HEAD(del_list); int i; int j; - HLIST_HEAD(del_list); - spin_lock_bh(&priv->fs.arfs.arfs_lock); - mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) { + spin_lock_bh(&priv->fs.arfs->arfs_lock); + mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs->arfs_tables, i, j) { hlist_del_init(&rule->hlist); hlist_add_head(&rule->hlist, &del_list); } - spin_unlock_bh(&priv->fs.arfs.arfs_lock); + spin_unlock_bh(&priv->fs.arfs->arfs_lock); hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) { cancel_work_sync(&rule->arfs_work); @@ -436,7 +475,7 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, struct arfs_rule *arfs_rule) { - struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct mlx5e_arfs_tables *arfs = priv->fs.arfs; struct arfs_tuple *tuple = &arfs_rule->tuple; struct mlx5_flow_handle *rule = NULL; struct mlx5_flow_destination dest = {}; @@ -554,9 +593,9 @@ static void arfs_handle_work(struct work_struct *work) mutex_lock(&priv->state_lock); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - spin_lock_bh(&priv->fs.arfs.arfs_lock); + spin_lock_bh(&priv->fs.arfs->arfs_lock); hlist_del(&arfs_rule->hlist); - spin_unlock_bh(&priv->fs.arfs.arfs_lock); + spin_unlock_bh(&priv->fs.arfs->arfs_lock); mutex_unlock(&priv->state_lock); kfree(arfs_rule); @@ -609,7 +648,7 @@ static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, tuple->dst_port = fk->ports.dst; rule->flow_id = flow_id; - rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER; + rule->filter_id = priv->fs.arfs->last_filter_id++ % RPS_NO_FILTER; hlist_add_head(&rule->hlist, arfs_hash_bucket(arfs_t, tuple->src_port, @@ -653,7 +692,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct mlx5e_arfs_tables *arfs = priv->fs.arfs; struct arfs_table *arfs_t; struct arfs_rule *arfs_rule; struct flow_keys fk; @@ -687,7 +726,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, return -ENOMEM; } } - queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work); + queue_work(priv->fs.arfs->wq, &arfs_rule->arfs_work); spin_unlock_bh(&arfs->arfs_lock); return arfs_rule->filter_id; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index a6cf008057b5..8c166ee56d8b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -38,15 +38,16 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in) { + struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; int err; err = mlx5_core_create_tir(mdev, in, &tir->tirn); if (err) return err; - mutex_lock(&mdev->mlx5e_res.td.list_lock); - list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list); - mutex_unlock(&mdev->mlx5e_res.td.list_lock); + mutex_lock(&res->td.list_lock); + list_add(&tir->list, &res->td.tirs_list); + mutex_unlock(&res->td.list_lock); return 0; } @@ -54,10 +55,12 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in) void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir) { - mutex_lock(&mdev->mlx5e_res.td.list_lock); + struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; + + mutex_lock(&res->td.list_lock); mlx5_core_destroy_tir(mdev, tir->tirn); list_del(&tir->list); - mutex_unlock(&mdev->mlx5e_res.td.list_lock); + mutex_unlock(&res->td.list_lock); } void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc) @@ -99,7 +102,7 @@ static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) { - struct mlx5e_resources *res = &mdev->mlx5e_res; + struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; int err; err = mlx5_core_alloc_pd(mdev, &res->pdn); @@ -126,8 +129,8 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) goto err_destroy_mkey; } - INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); - mutex_init(&mdev->mlx5e_res.td.list_lock); + INIT_LIST_HEAD(&res->td.tirs_list); + mutex_init(&res->td.list_lock); return 0; @@ -142,7 +145,7 @@ err_dealloc_pd: void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) { - struct mlx5e_resources *res = &mdev->mlx5e_res; + struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; mlx5_free_bfreg(mdev, &res->bfreg); mlx5_core_destroy_mkey(mdev, &res->mkey); @@ -180,8 +183,8 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); - mutex_lock(&mdev->mlx5e_res.td.list_lock); - list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { + mutex_lock(&mdev->mlx5e_res.hw_objs.td.list_lock); + list_for_each_entry(tir, &mdev->mlx5e_res.hw_objs.td.tirs_list, list) { tirn = tir->tirn; err = mlx5_core_modify_tir(mdev, tirn, in); if (err) @@ -192,7 +195,7 @@ out: kvfree(in); if (err) netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); - mutex_unlock(&mdev->mlx5e_res.td.list_lock); + mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index f23c67575073..a4c8d8d00d5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1149,35 +1149,23 @@ static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context) static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) { - struct mlx5e_channels new_channels = {}; - bool reset_channels = true; - bool opened; - int err = 0; + struct mlx5e_params new_params; + bool reset = true; + int err; mutex_lock(&priv->state_lock); - new_channels.params = priv->channels.params; - mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_channels.params, + new_params = priv->channels.params; + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_params, trust_state); - opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (!opened) - reset_channels = false; - /* Skip if tx_min_inline is the same */ - if (new_channels.params.tx_min_inline_mode == - priv->channels.params.tx_min_inline_mode) - reset_channels = false; - - if (reset_channels) { - err = mlx5e_safe_switch_channels(priv, &new_channels, - mlx5e_update_trust_state_hw, - &trust_state); - } else { - err = mlx5e_update_trust_state_hw(priv, &trust_state); - if (!err && !opened) - priv->channels.params = new_channels.params; - } + if (new_params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode) + reset = false; + + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_update_trust_state_hw, + &trust_state, reset); mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index abdf721bb264..8360289813f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -34,6 +34,7 @@ #include "en/port.h" #include "en/params.h" #include "en/xsk/pool.h" +#include "en/ptp.h" #include "lib/clock.h" void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, @@ -325,7 +326,7 @@ static void mlx5e_get_ringparam(struct net_device *dev, int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, struct ethtool_ringparam *param) { - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; u8 log_rq_size; u8 log_sq_size; int err = 0; @@ -364,20 +365,15 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); - new_channels.params = priv->channels.params; - new_channels.params.log_rq_mtu_frames = log_rq_size; - new_channels.params.log_sq_size = log_sq_size; + new_params = priv->channels.params; + new_params.log_rq_mtu_frames = log_rq_size; + new_params.log_sq_size = log_sq_size; - err = mlx5e_validate_params(priv, &new_channels.params); + err = mlx5e_validate_params(priv->mdev, &new_params); if (err) goto unlock; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - goto unlock; - } - - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); unlock: mutex_unlock(&priv->state_lock); @@ -422,8 +418,9 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, { struct mlx5e_params *cur_params = &priv->channels.params; unsigned int count = ch->combined_count; - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; bool arfs_enabled; + bool opened; int err = 0; if (!count) { @@ -458,28 +455,18 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, goto out; } - new_channels.params = *cur_params; - new_channels.params.num_channels = count; - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - struct mlx5e_params old_params; + new_params = *cur_params; + new_params.num_channels = count; - old_params = *cur_params; - *cur_params = new_channels.params; - err = mlx5e_num_channels_changed(priv); - if (err) - *cur_params = old_params; - - goto out; - } + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - arfs_enabled = priv->netdev->features & NETIF_F_NTUPLE; + arfs_enabled = opened && (priv->netdev->features & NETIF_F_NTUPLE); if (arfs_enabled) mlx5e_arfs_disable(priv); /* Switch to new channels, set new parameters and close old ones */ - err = mlx5e_safe_switch_channels(priv, &new_channels, - mlx5e_num_channels_changed_ctx, NULL); + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_num_channels_changed_ctx, NULL, true); if (arfs_enabled) { int err2 = mlx5e_arfs_enable(priv); @@ -574,8 +561,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, { struct dim_cq_moder *rx_moder, *tx_moder; struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; bool reset_rx, reset_tx; + bool reset = true; int err = 0; if (!MLX5_CAP_GEN(mdev, cq_moderation)) @@ -596,51 +584,47 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, } mutex_lock(&priv->state_lock); - new_channels.params = priv->channels.params; + new_params = priv->channels.params; - rx_moder = &new_channels.params.rx_cq_moderation; + rx_moder = &new_params.rx_cq_moderation; rx_moder->usec = coal->rx_coalesce_usecs; rx_moder->pkts = coal->rx_max_coalesced_frames; - new_channels.params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; + new_params.rx_dim_enabled = !!coal->use_adaptive_rx_coalesce; - tx_moder = &new_channels.params.tx_cq_moderation; + tx_moder = &new_params.tx_cq_moderation; tx_moder->usec = coal->tx_coalesce_usecs; tx_moder->pkts = coal->tx_max_coalesced_frames; - new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; + new_params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled; if (reset_rx) { - u8 mode = MLX5E_GET_PFLAG(&new_channels.params, + u8 mode = MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_BASED_MODER); - mlx5e_reset_rx_moderation(&new_channels.params, mode); + mlx5e_reset_rx_moderation(&new_params, mode); } if (reset_tx) { - u8 mode = MLX5E_GET_PFLAG(&new_channels.params, + u8 mode = MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_TX_CQE_BASED_MODER); - mlx5e_reset_tx_moderation(&new_channels.params, mode); + mlx5e_reset_tx_moderation(&new_params, mode); } - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - goto out; - } - - if (!reset_rx && !reset_tx) { + /* If DIM state hasn't changed, it's possible to modify interrupt + * moderation parameters on the fly, even if the channels are open. + */ + if (!reset_rx && !reset_tx && test_bit(MLX5E_STATE_OPENED, &priv->state)) { if (!coal->use_adaptive_rx_coalesce) mlx5e_set_priv_channels_rx_coalesce(priv, coal); if (!coal->use_adaptive_tx_coalesce) mlx5e_set_priv_channels_tx_coalesce(priv, coal); - priv->channels.params = new_channels.params; - goto out; + reset = false; } - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); -out: mutex_unlock(&priv->state_lock); return err; } @@ -758,11 +742,11 @@ static int get_fec_supported_advertised(struct mlx5_core_dev *dev, return 0; } -static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings, - u32 eth_proto_cap, - u8 connector_type, bool ext) +static void ptys2ethtool_supported_advertised_port(struct mlx5_core_dev *mdev, + struct ethtool_link_ksettings *link_ksettings, + u32 eth_proto_cap, u8 connector_type) { - if ((!connector_type && !ext) || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) { + if (!MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) { if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) @@ -898,9 +882,9 @@ static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = { [MLX5E_PORT_OTHER] = PORT_OTHER, }; -static u8 get_connector_port(u32 eth_proto, u8 connector_type, bool ext) +static u8 get_connector_port(struct mlx5_core_dev *mdev, u32 eth_proto, u8 connector_type) { - if ((connector_type || ext) && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER) + if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) return ptys2connector_type[connector_type]; if (eth_proto & @@ -1001,11 +985,11 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, data_rate_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; - - link_ksettings->base.port = get_connector_port(eth_proto_oper, - connector_type, ext); - ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin, - connector_type, ext); + connector_type = connector_type < MLX5E_CONNECTOR_TYPE_NUMBER ? + connector_type : MLX5E_PORT_UNKNOWN; + link_ksettings->base.port = get_connector_port(mdev, eth_proto_oper, connector_type); + ptys2ethtool_supported_advertised_port(mdev, link_ksettings, eth_proto_admin, + connector_type); get_lp_advertising(mdev, eth_proto_lp, link_ksettings); if (an_status == MLX5_AN_COMPLETE) @@ -1601,6 +1585,14 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return mlx5_set_port_wol(mdev, mlx5_wol_mode); } +static void mlx5e_get_fec_stats(struct net_device *netdev, + struct ethtool_fec_stats *fec_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_fec_get(priv, fec_stats); +} + static int mlx5e_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) { @@ -1769,6 +1761,49 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, return 0; } +static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev, + const struct ethtool_module_eeprom *page_data, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_module_eeprom_query_params query; + struct mlx5_core_dev *mdev = priv->mdev; + u8 *data = page_data->data; + int size_read; + int i = 0; + + if (!page_data->length) + return -EINVAL; + + memset(data, 0, page_data->length); + + query.offset = page_data->offset; + query.i2c_address = page_data->i2c_address; + query.bank = page_data->bank; + query.page = page_data->page; + while (i < page_data->length) { + query.size = page_data->length - i; + size_read = mlx5_query_module_eeprom_by_page(mdev, &query, data + i); + + /* Done reading, return how many bytes was read */ + if (!size_read) + return i; + + if (size_read == -EINVAL) + return -EINVAL; + if (size_read < 0) { + netdev_err(priv->netdev, "%s: mlx5_query_module_eeprom_by_page failed:0x%x\n", + __func__, size_read); + return i; + } + + i += size_read; + query.offset += size_read; + } + + return i; +} + int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash) { @@ -1808,7 +1843,7 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; bool mode_changed; u8 cq_period_mode, current_cq_period_mode; @@ -1827,18 +1862,13 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, if (!mode_changed) return 0; - new_channels.params = priv->channels.params; + new_params = priv->channels.params; if (is_rx_cq) - mlx5e_set_rx_cq_mode_params(&new_channels.params, cq_period_mode); + mlx5e_set_rx_cq_mode_params(&new_params, cq_period_mode); else - mlx5e_set_tx_cq_mode_params(&new_channels.params, cq_period_mode); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - return 0; - } + mlx5e_set_tx_cq_mode_params(&new_params, cq_period_mode); - return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); } static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) @@ -1854,7 +1884,7 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val) { bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS); - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; int err = 0; if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) @@ -1863,15 +1893,16 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val if (curr_val == new_val) return 0; - new_channels.params = priv->channels.params; - MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); + new_params = priv->channels.params; + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); + if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) + new_params.ptp_rx = new_val; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - return 0; - } - - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + if (new_params.ptp_rx == priv->channels.params.ptp_rx) + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); + else + err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx, + &new_params.ptp_rx, true); if (err) return err; @@ -1887,16 +1918,15 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + int err; if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -EOPNOTSUPP; - if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) { - netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n"); - return -EINVAL; - } + err = mlx5e_modify_rx_cqe_compression_locked(priv, enable); + if (err) + return err; - mlx5e_modify_rx_cqe_compression_locked(priv, enable); priv->channels.params.rx_cqe_compress_def = enable; return 0; @@ -1906,7 +1936,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; if (enable) { if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) @@ -1918,17 +1948,12 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) return -EINVAL; } - new_channels.params = priv->channels.params; + new_params = priv->channels.params; - MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_STRIDING_RQ, enable); - mlx5e_set_rq_type(mdev, &new_channels.params); + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ, enable); + mlx5e_set_rq_type(mdev, &new_params); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - return 0; - } - - return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); } static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) @@ -1957,23 +1982,16 @@ static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool e { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; - int err; + struct mlx5e_params new_params; if (enable && !MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)) return -EOPNOTSUPP; - new_channels.params = priv->channels.params; + new_params = priv->channels.params; - MLX5E_SET_PFLAG(&new_channels.params, flag, enable); + MLX5E_SET_PFLAG(&new_params, flag, enable); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - return 0; - } - - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); - return err; + return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); } static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable) @@ -1990,7 +2008,7 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; int err; if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) @@ -2006,24 +2024,17 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) return -EINVAL; } - new_channels.params = priv->channels.params; - MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_TX_PORT_TS, enable); + new_params = priv->channels.params; + MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_TX_PORT_TS, enable); /* No need to verify SQ stop room as * ptpsq.txqsq.stop_room <= generic_sq->stop_room, and both * has the same log_sq_size. */ - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - err = mlx5e_num_channels_changed(priv); - goto out; - } - - err = mlx5e_safe_switch_channels(priv, &new_channels, - mlx5e_num_channels_changed_ctx, NULL); -out: + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_num_channels_changed_ctx, NULL, true); if (!err) - priv->port_ptp_opened = true; + priv->tx_ptp_opened = true; return err; } @@ -2114,12 +2125,216 @@ int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return mlx5e_ethtool_set_rxnfc(dev, cmd); } +static int query_port_status_opcode(struct mlx5_core_dev *mdev, u32 *status_opcode) +{ + struct mlx5_ifc_pddr_troubleshooting_page_bits *pddr_troubleshooting_page; + u32 in[MLX5_ST_SZ_DW(pddr_reg)] = {}; + u32 out[MLX5_ST_SZ_DW(pddr_reg)]; + int err; + + MLX5_SET(pddr_reg, in, local_port, 1); + MLX5_SET(pddr_reg, in, page_select, + MLX5_PDDR_REG_PAGE_SELECT_TROUBLESHOOTING_INFO_PAGE); + + pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, in, page_data); + MLX5_SET(pddr_troubleshooting_page, pddr_troubleshooting_page, + group_opcode, MLX5_PDDR_REG_TRBLSH_GROUP_OPCODE_MONITOR); + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_PDDR, 0, 0); + if (err) + return err; + + pddr_troubleshooting_page = MLX5_ADDR_OF(pddr_reg, out, page_data); + *status_opcode = MLX5_GET(pddr_troubleshooting_page, pddr_troubleshooting_page, + status_opcode); + return 0; +} + +struct mlx5e_ethtool_link_ext_state_opcode_mapping { + u32 status_opcode; + enum ethtool_link_ext_state link_ext_state; + u8 link_ext_substate; +}; + +static const struct mlx5e_ethtool_link_ext_state_opcode_mapping +mlx5e_link_ext_state_opcode_map[] = { + /* States relating to the autonegotiation or issues therein */ + {2, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED}, + {3, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED}, + {4, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NEXT_PAGE_EXCHANGE_FAILED}, + {36, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED_FORCE_MODE}, + {38, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_FEC_MISMATCH_DURING_OVERRIDE}, + {39, ETHTOOL_LINK_EXT_STATE_AUTONEG, + ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD}, + + /* Failure during link training */ + {5, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED}, + {6, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_INHIBIT_TIMEOUT}, + {7, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_LINK_PARTNER_DID_NOT_SET_RECEIVER_READY}, + {8, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, 0}, + {14, ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE, + ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT}, + + /* Logical mismatch in physical coding sublayer or forward error correction sublayer */ + {9, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK}, + {10, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_AM_LOCK}, + {11, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_GET_ALIGN_STATUS}, + {12, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_FC_FEC_IS_NOT_LOCKED}, + {13, ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH, + ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED}, + + /* Signal integrity issues */ + {15, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, 0}, + {17, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS}, + {42, ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE}, + + /* No cable connected */ + {1024, ETHTOOL_LINK_EXT_STATE_NO_CABLE, 0}, + + /* Failure is related to cable, e.g., unsupported cable */ + {16, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {20, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {29, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1025, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1029, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, + ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE}, + {1031, ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE, 0}, + + /* Failure is related to EEPROM, e.g., failure during reading or parsing the data */ + {1027, ETHTOOL_LINK_EXT_STATE_EEPROM_ISSUE, 0}, + + /* Failure during calibration algorithm */ + {23, ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, 0}, + + /* The hardware is not able to provide the power required from cable or module */ + {1032, ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, 0}, + + /* The module is overheated */ + {1030, ETHTOOL_LINK_EXT_STATE_OVERHEAT, 0}, +}; + +static void +mlx5e_set_link_ext_state(struct mlx5e_ethtool_link_ext_state_opcode_mapping + link_ext_state_mapping, + struct ethtool_link_ext_state_info *link_ext_state_info) +{ + switch (link_ext_state_mapping.link_ext_state) { + case ETHTOOL_LINK_EXT_STATE_AUTONEG: + link_ext_state_info->autoneg = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE: + link_ext_state_info->link_training = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH: + link_ext_state_info->link_logical_mismatch = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY: + link_ext_state_info->bad_signal_integrity = + link_ext_state_mapping.link_ext_substate; + break; + case ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE: + link_ext_state_info->cable_issue = + link_ext_state_mapping.link_ext_substate; + break; + default: + break; + } + + link_ext_state_info->link_ext_state = link_ext_state_mapping.link_ext_state; +} + +static int +mlx5e_get_link_ext_state(struct net_device *dev, + struct ethtool_link_ext_state_info *link_ext_state_info) +{ + struct mlx5e_ethtool_link_ext_state_opcode_mapping link_ext_state_mapping; + struct mlx5e_priv *priv = netdev_priv(dev); + u32 status_opcode = 0; + int i; + + /* Exit without data if the interface state is OK, since no extended data is + * available in such case + */ + if (netif_carrier_ok(dev)) + return -ENODATA; + + if (query_port_status_opcode(priv->mdev, &status_opcode) || + !status_opcode) + return -ENODATA; + + for (i = 0; i < ARRAY_SIZE(mlx5e_link_ext_state_opcode_map); i++) { + link_ext_state_mapping = mlx5e_link_ext_state_opcode_map[i]; + if (link_ext_state_mapping.status_opcode == status_opcode) { + mlx5e_set_link_ext_state(link_ext_state_mapping, + link_ext_state_info); + return 0; + } + } + + return -ENODATA; +} + +static void mlx5e_get_eth_phy_stats(struct net_device *netdev, + struct ethtool_eth_phy_stats *phy_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_eth_phy_get(priv, phy_stats); +} + +static void mlx5e_get_eth_mac_stats(struct net_device *netdev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_eth_mac_get(priv, mac_stats); +} + +static void mlx5e_get_eth_ctrl_stats(struct net_device *netdev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_eth_ctrl_get(priv, ctrl_stats); +} + +static void mlx5e_get_rmon_stats(struct net_device *netdev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_rmon_get(priv, rmon_stats, ranges); +} + const struct ethtool_ops mlx5e_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, + .get_link_ext_state = mlx5e_get_link_ext_state, .get_strings = mlx5e_get_strings, .get_sset_count = mlx5e_get_sset_count, .get_ethtool_stats = mlx5e_get_ethtool_stats, @@ -2148,12 +2363,18 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_wol = mlx5e_set_wol, .get_module_info = mlx5e_get_module_info, .get_module_eeprom = mlx5e_get_module_eeprom, + .get_module_eeprom_by_page = mlx5e_get_module_eeprom_by_page, .flash_device = mlx5e_flash_device, .get_priv_flags = mlx5e_get_priv_flags, .set_priv_flags = mlx5e_set_priv_flags, .self_test = mlx5e_self_test, .get_msglevel = mlx5e_get_msglevel, .set_msglevel = mlx5e_set_msglevel, + .get_fec_stats = mlx5e_get_fec_stats, .get_fecparam = mlx5e_get_fecparam, .set_fecparam = mlx5e_set_fecparam, + .get_eth_phy_stats = mlx5e_get_eth_phy_stats, + .get_eth_mac_stats = mlx5e_get_eth_mac_stats, + .get_eth_ctrl_stats = mlx5e_get_eth_ctrl_stats, + .get_rmon_stats = mlx5e_get_rmon_stats, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 16ce7756ac43..0d571a0c76d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -36,7 +36,9 @@ #include <linux/tcp.h> #include <linux/mlx5/fs.h> #include "en.h" +#include "en_rep.h" #include "lib/mpfs.h" +#include "en/ptp.h" static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type); @@ -106,6 +108,29 @@ static void mlx5e_del_l2_from_hash(struct mlx5e_l2_hash_node *hn) kfree(hn); } +struct mlx5e_vlan_table { + struct mlx5e_flow_table ft; + DECLARE_BITMAP(active_cvlans, VLAN_N_VID); + DECLARE_BITMAP(active_svlans, VLAN_N_VID); + struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *untagged_rule; + struct mlx5_flow_handle *any_cvlan_rule; + struct mlx5_flow_handle *any_svlan_rule; + struct mlx5_flow_handle *trap_rule; + bool cvlan_filter_disabled; +}; + +unsigned long *mlx5e_vlan_get_active_svlans(struct mlx5e_vlan_table *vlan) +{ + return vlan->active_svlans; +} + +struct mlx5_flow_table *mlx5e_vlan_get_flowtable(struct mlx5e_vlan_table *vlan) +{ + return vlan->ft.t; +} + static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) { struct net_device *ndev = priv->netdev; @@ -117,7 +142,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) int i; list_size = 0; - for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) + for_each_set_bit(vlan, priv->fs.vlan->active_cvlans, VLAN_N_VID) list_size++; max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); @@ -134,7 +159,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) return -ENOMEM; i = 0; - for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) { + for_each_set_bit(vlan, priv->fs.vlan->active_cvlans, VLAN_N_VID) { if (i >= list_size) break; vlans[i++] = vlan; @@ -161,7 +186,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid, struct mlx5_flow_spec *spec) { - struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; + struct mlx5_flow_table *ft = priv->fs.vlan->ft.t; struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle **rule_p; MLX5_DECLARE_FLOW_ACT(flow_act); @@ -178,24 +203,24 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, * disabled in match value means both S & C tags * don't exist (untagged of both) */ - rule_p = &priv->fs.vlan.untagged_rule; + rule_p = &priv->fs.vlan->untagged_rule; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); break; case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: - rule_p = &priv->fs.vlan.any_cvlan_rule; + rule_p = &priv->fs.vlan->any_cvlan_rule; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); break; case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: - rule_p = &priv->fs.vlan.any_svlan_rule; + rule_p = &priv->fs.vlan->any_svlan_rule; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.svlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); break; case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: - rule_p = &priv->fs.vlan.active_svlans_rule[vid]; + rule_p = &priv->fs.vlan->active_svlans_rule[vid]; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.svlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); @@ -205,7 +230,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, vid); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */ - rule_p = &priv->fs.vlan.active_cvlans_rule[vid]; + rule_p = &priv->fs.vlan->active_cvlans_rule[vid]; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); @@ -255,33 +280,33 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, { switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - if (priv->fs.vlan.untagged_rule) { - mlx5_del_flow_rules(priv->fs.vlan.untagged_rule); - priv->fs.vlan.untagged_rule = NULL; + if (priv->fs.vlan->untagged_rule) { + mlx5_del_flow_rules(priv->fs.vlan->untagged_rule); + priv->fs.vlan->untagged_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID: - if (priv->fs.vlan.any_cvlan_rule) { - mlx5_del_flow_rules(priv->fs.vlan.any_cvlan_rule); - priv->fs.vlan.any_cvlan_rule = NULL; + if (priv->fs.vlan->any_cvlan_rule) { + mlx5_del_flow_rules(priv->fs.vlan->any_cvlan_rule); + priv->fs.vlan->any_cvlan_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID: - if (priv->fs.vlan.any_svlan_rule) { - mlx5_del_flow_rules(priv->fs.vlan.any_svlan_rule); - priv->fs.vlan.any_svlan_rule = NULL; + if (priv->fs.vlan->any_svlan_rule) { + mlx5_del_flow_rules(priv->fs.vlan->any_svlan_rule); + priv->fs.vlan->any_svlan_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: - if (priv->fs.vlan.active_svlans_rule[vid]) { - mlx5_del_flow_rules(priv->fs.vlan.active_svlans_rule[vid]); - priv->fs.vlan.active_svlans_rule[vid] = NULL; + if (priv->fs.vlan->active_svlans_rule[vid]) { + mlx5_del_flow_rules(priv->fs.vlan->active_svlans_rule[vid]); + priv->fs.vlan->active_svlans_rule[vid] = NULL; } break; case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID: - if (priv->fs.vlan.active_cvlans_rule[vid]) { - mlx5_del_flow_rules(priv->fs.vlan.active_cvlans_rule[vid]); - priv->fs.vlan.active_cvlans_rule[vid] = NULL; + if (priv->fs.vlan->active_cvlans_rule[vid]) { + mlx5_del_flow_rules(priv->fs.vlan->active_cvlans_rule[vid]); + priv->fs.vlan->active_cvlans_rule[vid] = NULL; } mlx5e_vport_context_update_vlans(priv); break; @@ -328,27 +353,27 @@ mlx5e_add_trap_rule(struct mlx5_flow_table *ft, int trap_id, int tir_num) int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num) { - struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; + struct mlx5_flow_table *ft = priv->fs.vlan->ft.t; struct mlx5_flow_handle *rule; int err; rule = mlx5e_add_trap_rule(ft, trap_id, tir_num); if (IS_ERR(rule)) { err = PTR_ERR(rule); - priv->fs.vlan.trap_rule = NULL; + priv->fs.vlan->trap_rule = NULL; netdev_err(priv->netdev, "%s: add VLAN trap rule failed, err %d\n", __func__, err); return err; } - priv->fs.vlan.trap_rule = rule; + priv->fs.vlan->trap_rule = rule; return 0; } void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv) { - if (priv->fs.vlan.trap_rule) { - mlx5_del_flow_rules(priv->fs.vlan.trap_rule); - priv->fs.vlan.trap_rule = NULL; + if (priv->fs.vlan->trap_rule) { + mlx5_del_flow_rules(priv->fs.vlan->trap_rule); + priv->fs.vlan->trap_rule = NULL; } } @@ -380,10 +405,10 @@ void mlx5e_remove_mac_trap(struct mlx5e_priv *priv) void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv) { - if (!priv->fs.vlan.cvlan_filter_disabled) + if (!priv->fs.vlan->cvlan_filter_disabled) return; - priv->fs.vlan.cvlan_filter_disabled = false; + priv->fs.vlan->cvlan_filter_disabled = false; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); @@ -391,10 +416,10 @@ void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv) void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv) { - if (priv->fs.vlan.cvlan_filter_disabled) + if (priv->fs.vlan->cvlan_filter_disabled) return; - priv->fs.vlan.cvlan_filter_disabled = true; + priv->fs.vlan->cvlan_filter_disabled = true; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); @@ -404,11 +429,11 @@ static int mlx5e_vlan_rx_add_cvid(struct mlx5e_priv *priv, u16 vid) { int err; - set_bit(vid, priv->fs.vlan.active_cvlans); + set_bit(vid, priv->fs.vlan->active_cvlans); err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); if (err) - clear_bit(vid, priv->fs.vlan.active_cvlans); + clear_bit(vid, priv->fs.vlan->active_cvlans); return err; } @@ -418,11 +443,11 @@ static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid) struct net_device *netdev = priv->netdev; int err; - set_bit(vid, priv->fs.vlan.active_svlans); + set_bit(vid, priv->fs.vlan->active_svlans); err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); if (err) { - clear_bit(vid, priv->fs.vlan.active_svlans); + clear_bit(vid, priv->fs.vlan->active_svlans); return err; } @@ -435,6 +460,9 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct mlx5e_priv *priv = netdev_priv(dev); + if (mlx5e_is_uplink_rep(priv)) + return 0; /* no vlan table for uplink rep */ + if (be16_to_cpu(proto) == ETH_P_8021Q) return mlx5e_vlan_rx_add_cvid(priv, vid); else if (be16_to_cpu(proto) == ETH_P_8021AD) @@ -447,11 +475,14 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct mlx5e_priv *priv = netdev_priv(dev); + if (mlx5e_is_uplink_rep(priv)) + return 0; /* no vlan table for uplink rep */ + if (be16_to_cpu(proto) == ETH_P_8021Q) { - clear_bit(vid, priv->fs.vlan.active_cvlans); + clear_bit(vid, priv->fs.vlan->active_cvlans); mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); } else if (be16_to_cpu(proto) == ETH_P_8021AD) { - clear_bit(vid, priv->fs.vlan.active_svlans); + clear_bit(vid, priv->fs.vlan->active_svlans); mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); netdev_update_features(dev); } @@ -465,14 +496,14 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) { + for_each_set_bit(i, priv->fs.vlan->active_cvlans, VLAN_N_VID) { mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); } - for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) + for_each_set_bit(i, priv->fs.vlan->active_svlans, VLAN_N_VID) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); - if (priv->fs.vlan.cvlan_filter_disabled) + if (priv->fs.vlan->cvlan_filter_disabled) mlx5e_add_any_vid_rules(priv); } @@ -482,11 +513,11 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) { + for_each_set_bit(i, priv->fs.vlan->active_cvlans, VLAN_N_VID) { mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); } - for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) + for_each_set_bit(i, priv->fs.vlan->active_svlans, VLAN_N_VID) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state))); @@ -496,7 +527,7 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) /* must be called after DESTROY bit is set and * set_rx_mode is called and flushed */ - if (priv->fs.vlan.cvlan_filter_disabled) + if (priv->fs.vlan->cvlan_filter_disabled) mlx5e_del_any_vid_rules(priv); } @@ -1684,10 +1715,15 @@ static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft) static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) { - struct mlx5e_flow_table *ft = &priv->fs.vlan.ft; struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_flow_table *ft; int err; + priv->fs.vlan = kvzalloc(sizeof(*priv->fs.vlan), GFP_KERNEL); + if (!priv->fs.vlan) + return -ENOMEM; + + ft = &priv->fs.vlan->ft; ft->num_groups = 0; ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE; @@ -1695,12 +1731,11 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) ft_attr.prio = MLX5E_NIC_PRIO; ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); - if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); - ft->t = NULL; - return err; + goto err_free_t; } + ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); if (!ft->g) { err = -ENOMEM; @@ -1719,7 +1754,9 @@ err_free_g: kfree(ft->g); err_destroy_vlan_table: mlx5_destroy_flow_table(ft->t); - ft->t = NULL; +err_free_t: + kvfree(priv->fs.vlan); + priv->fs.vlan = NULL; return err; } @@ -1727,7 +1764,8 @@ err_destroy_vlan_table: static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) { mlx5e_del_vlan_rules(priv); - mlx5e_destroy_flow_table(&priv->fs.vlan.ft); + mlx5e_destroy_flow_table(&priv->fs.vlan->ft); + kvfree(priv->fs.vlan); } int mlx5e_create_flow_steering(struct mlx5e_priv *priv) @@ -1785,10 +1823,16 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) goto err_destroy_l2_table; } + err = mlx5e_ptp_alloc_rx_fs(priv); + if (err) + goto err_destory_vlan_table; + mlx5e_ethtool_init_steering(priv); return 0; +err_destory_vlan_table: + mlx5e_destroy_vlan_table(priv); err_destroy_l2_table: mlx5e_destroy_l2_table(priv); err_destroy_ttc_table: @@ -1803,6 +1847,7 @@ err_destroy_arfs_tables: void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) { + mlx5e_ptp_free_rx_fs(priv); mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ec2fcb2a2977..bca832cdc4cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -87,51 +87,6 @@ bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) return true; } -void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - params->log_rq_mtu_frames = is_kdump_kernel() ? - MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : - MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - - mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) : - BIT(params->log_rq_mtu_frames), - BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); -} - -bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) - return false; - - if (mlx5_fpga_is_ipsec_device(mdev)) - return false; - - if (params->xdp_prog) { - /* XSK params are not considered here. If striding RQ is in use, - * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will - * be called with the known XSK params. - */ - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) - return false; - } - - return true; -} - -void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) -{ - params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) && - MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ? - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_CYCLIC; -} - void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -259,18 +214,17 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } -static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, - struct mlx5e_channel *c) +static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) { int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); rq->mpwqe.info = kvzalloc_node(array_size(wq_sz, sizeof(*rq->mpwqe.info)), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, node); if (!rq->mpwqe.info) return -ENOMEM; - mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe); + mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe); return 0; } @@ -302,7 +256,7 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); mlx5e_mkey_set_relaxed_ordering(mdev, mkc); MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); MLX5_SET64(mkc, mkc, len, npages << page_shift); MLX5_SET(mkc, mkc, translations_octword_size, MLX5_MTT_OCTW(npages)); @@ -334,9 +288,9 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq rq->wqe_overflow.addr); } -static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix) +static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix) { - return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT; + return MLX5E_REQUIRED_MTTS(wqe_ix) << PAGE_SHIFT; } static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) @@ -419,58 +373,53 @@ static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq) __free_page(rq->wqe_overflow.page); } -static int mlx5e_alloc_rq(struct mlx5e_channel *c, - struct mlx5e_params *params, +static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = c->mdev; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->priv = c->priv; + rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; + rq->icosq = &c->icosq; + rq->ix = c->ix; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->xdpsq = &c->rq_xdpsq; + rq->stats = &c->priv->channel_stats[c->ix].rq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + err = mlx5e_rq_set_handlers(rq, params, NULL); + if (err) + return err; + + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0); +} + +static int mlx5e_alloc_rq(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, - struct xsk_buff_pool *xsk_pool, struct mlx5e_rq_param *rqp, - struct mlx5e_rq *rq) + int node, struct mlx5e_rq *rq) { struct page_pool_params pp_params = { 0 }; - struct mlx5_core_dev *mdev = c->mdev; + struct mlx5_core_dev *mdev = rq->mdev; void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); - u32 rq_xdp_ix; u32 pool_size; int wq_sz; int err; int i; - rqp->wq.db_numa_node = cpu_to_node(c->cpu); - - rq->wq_type = params->rq_wq_type; - rq->pdev = c->pdev; - rq->netdev = c->netdev; - rq->priv = c->priv; - rq->tstamp = c->tstamp; - rq->clock = &mdev->clock; - rq->icosq = &c->icosq; - rq->ix = c->ix; - rq->mdev = mdev; - rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - rq->xdpsq = &c->rq_xdpsq; - rq->xsk_pool = xsk_pool; - rq->ptp_cyc2time = mlx5_is_real_time_rq(mdev) ? - mlx5_real_time_cyc2time : - mlx5_timecounter_cyc2time; - - if (rq->xsk_pool) - rq->stats = &c->priv->channel_stats[c->ix].xskrq; - else - rq->stats = &c->priv->channel_stats[c->ix].rq; + rqp->wq.db_numa_node = node; INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); if (params->xdp_prog) bpf_prog_inc(params->xdp_prog); RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog); - rq_xdp_ix = rq->ix; - if (xsk) - rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK; - err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0); - if (err < 0) - goto err_rq_xdp_prog; - rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); pool_size = 1 << params->log_rq_mtu_frames; @@ -480,7 +429,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, &rq->wq_ctrl); if (err) - goto err_rq_xdp; + goto err_rq_xdp_prog; err = mlx5e_alloc_mpwqe_rq_drop_page(rq); if (err) @@ -504,7 +453,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_drop_page; rq->mkey_be = cpu_to_be32(rq->umr_mkey.key); - err = mlx5e_rq_alloc_mpwqe_info(rq, c); + err = mlx5e_rq_alloc_mpwqe_info(rq, node); if (err) goto err_rq_mkey; break; @@ -512,7 +461,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl); if (err) - goto err_rq_xdp; + goto err_rq_xdp_prog; rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; @@ -524,23 +473,19 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->wqe.frags = kvzalloc_node(array_size(sizeof(*rq->wqe.frags), (wq_sz << rq->wqe.info.log_num_frags)), - GFP_KERNEL, cpu_to_node(c->cpu)); + GFP_KERNEL, node); if (!rq->wqe.frags) { err = -ENOMEM; goto err_rq_wq_destroy; } - err = mlx5e_init_di_list(rq, wq_sz, cpu_to_node(c->cpu)); + err = mlx5e_init_di_list(rq, wq_sz, node); if (err) goto err_rq_frags; - rq->mkey_be = c->mkey_be; + rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key); } - err = mlx5e_rq_set_handlers(rq, params, xsk); - if (err) - goto err_free_by_rq_type; - if (xsk) { err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); @@ -550,8 +495,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, pp_params.order = 0; pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ pp_params.pool_size = pool_size; - pp_params.nid = cpu_to_node(c->cpu); - pp_params.dev = c->pdev; + pp_params.nid = node; + pp_params.dev = rq->pdev; pp_params.dma_dir = rq->buff.map_dir; /* page_pool can be used even when there is no rq->xdp_prog, @@ -565,8 +510,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->page_pool = NULL; goto err_free_by_rq_type; } - err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, - MEM_TYPE_PAGE_POOL, rq->page_pool); + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_POOL, rq->page_pool); } if (err) goto err_free_by_rq_type; @@ -577,7 +523,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i); u32 byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; - u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i); + u64 dma_offset = mlx5e_get_mpwqe_offset(i); wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom); wqe->data[0].byte_count = cpu_to_be32(byte_count); @@ -635,8 +581,6 @@ err_rq_frags: } err_rq_wq_destroy: mlx5_wq_destroy(&rq->wq_ctrl); -err_rq_xdp: - xdp_rxq_info_unreg(&rq->xdp_rxq); err_rq_xdp_prog: if (params->xdp_prog) bpf_prog_put(params->xdp_prog); @@ -649,10 +593,12 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) struct bpf_prog *old_prog; int i; - old_prog = rcu_dereference_protected(rq->xdp_prog, - lockdep_is_held(&rq->priv->state_lock)); - if (old_prog) - bpf_prog_put(old_prog); + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { + old_prog = rcu_dereference_protected(rq->xdp_prog, + lockdep_is_held(&rq->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); + } switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -888,13 +834,14 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) } -int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, - struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq) +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param, + struct mlx5e_xsk_param *xsk, int node, + struct mlx5e_rq *rq) { + struct mlx5_core_dev *mdev = rq->mdev; int err; - err = mlx5e_alloc_rq(c, params, xsk, xsk_pool, param, rq); + err = mlx5e_alloc_rq(params, xsk, param, node, rq); if (err) return err; @@ -906,28 +853,28 @@ int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, if (err) goto err_destroy_rq; - if (mlx5e_is_tls_on(c->priv) && !mlx5_accel_is_ktls_device(c->mdev)) - __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &c->rq.state); /* must be FPGA */ + if (mlx5e_is_tls_on(rq->priv) && !mlx5_accel_is_ktls_device(mdev)) + __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */ - if (MLX5_CAP_ETH(c->mdev, cqe_checksum_full)) - __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &c->rq.state); + if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) + __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state); if (params->rx_dim_enabled) - __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + __set_bit(MLX5E_RQ_STATE_AM, &rq->state); /* We disable csum_complete when XDP is enabled since * XDP programs might manipulate packets which will render * skb->checksum incorrect. */ - if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) - __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state); /* For CQE compression on striding RQ, use stride index provided by * HW if capability is supported. */ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) && - MLX5_CAP_GEN(c->mdev, mini_cqe_resp_stride_index)) - __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &c->rq.state); + MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index)) + __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state); return 0; @@ -942,7 +889,10 @@ err_free_rq: void mlx5e_activate_rq(struct mlx5e_rq *rq) { set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - mlx5e_trigger_irq(rq->icosq); + if (rq->icosq) + mlx5e_trigger_irq(rq->icosq); + else + napi_schedule(rq->cq.napi); } void mlx5e_deactivate_rq(struct mlx5e_rq *rq) @@ -954,7 +904,8 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); - cancel_work_sync(&rq->icosq->recover_work); + if (rq->icosq) + cancel_work_sync(&rq->icosq->recover_work); cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); @@ -1019,7 +970,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->pdev = c->pdev; sq->mkey_be = c->mkey_be; sq->channel = c; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->xsk_pool = xsk_pool; @@ -1090,7 +1041,8 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, int err; sq->channel = c; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->reserved_room = param->stop_room; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1174,7 +1126,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->priv = c->priv; sq->ch_ix = c->ix; sq->txq_ix = txq_ix; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); @@ -1182,14 +1134,10 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); - if (mlx5_accel_is_tls_device(c->priv->mdev)) - set_bit(MLX5E_SQ_STATE_TLS, &sq->state); if (param->is_mpw) set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); sq->stop_room = param->stop_room; - sq->ptp_cyc2time = mlx5_is_real_time_sq(mdev) ? - mlx5_real_time_cyc2time : - mlx5_timecounter_cyc2time; + sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1257,7 +1205,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index); + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index); MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); @@ -1461,8 +1409,17 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, if (err) goto err_free_icosq; + if (param->is_tls) { + sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list(); + if (IS_ERR(sq->ktls_resync)) { + err = PTR_ERR(sq->ktls_resync); + goto err_destroy_icosq; + } + } return 0; +err_destroy_icosq: + mlx5e_destroy_sq(c->mdev, sq->sqn); err_free_icosq: mlx5e_free_icosq(sq); @@ -1484,6 +1441,8 @@ void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; + if (sq->ktls_resync) + mlx5e_ktls_rx_resync_destroy_resp_list(sq->ktls_resync); mlx5e_destroy_sq(c->mdev, sq->sqn); mlx5e_free_icosq_descs(sq); mlx5e_free_icosq(sq); @@ -1860,14 +1819,16 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } -void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c) +static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_params) { - *ccp = (struct mlx5e_create_cq_param) { - .napi = &c->napi, - .ch_stats = c->stats, - .node = cpu_to_node(c->cpu), - .ix = c->ix, - }; + int err; + + err = mlx5e_init_rxq_rq(c, params, &c->rq); + if (err) + return err; + + return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq); } static int mlx5e_open_queues(struct mlx5e_channel *c, @@ -1930,7 +1891,7 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, goto err_close_sqs; } - err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq); + err = mlx5e_open_rxq_rq(c, params, &cparam->rq); if (err) goto err_close_xdp_sq; @@ -2032,7 +1993,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->cpu = cpu; c->pdev = mlx5_core_dma_dev(priv->mdev); c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key); c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; @@ -2111,295 +2072,6 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) kvfree(c); } -#define DEFAULT_FRAG_SIZE (2048) - -static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_frags_info *info) -{ - u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); - int frag_size_max = DEFAULT_FRAG_SIZE; - u32 buf_size = 0; - int i; - - if (mlx5_fpga_is_ipsec_device(mdev)) - byte_count += MLX5E_METADATA_ETHER_LEN; - - if (mlx5e_rx_is_linear_skb(params, xsk)) { - int frag_stride; - - frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk); - frag_stride = roundup_pow_of_two(frag_stride); - - info->arr[0].frag_size = byte_count; - info->arr[0].frag_stride = frag_stride; - info->num_frags = 1; - info->wqe_bulk = PAGE_SIZE / frag_stride; - goto out; - } - - if (byte_count > PAGE_SIZE + - (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max) - frag_size_max = PAGE_SIZE; - - i = 0; - while (buf_size < byte_count) { - int frag_size = byte_count - buf_size; - - if (i < MLX5E_MAX_RX_FRAGS - 1) - frag_size = min(frag_size, frag_size_max); - - info->arr[i].frag_size = frag_size; - info->arr[i].frag_stride = roundup_pow_of_two(frag_size); - - buf_size += frag_size; - i++; - } - info->num_frags = i; - /* number of different wqes sharing a page */ - info->wqe_bulk = 1 + (info->num_frags % 2); - -out: - info->wqe_bulk = max_t(u8, info->wqe_bulk, 8); - info->log_num_frags = order_base_2(info->num_frags); -} - -static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs) -{ - int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs; - - switch (wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - sz += sizeof(struct mlx5e_rx_wqe_ll); - break; - default: /* MLX5_WQ_TYPE_CYCLIC */ - sz += sizeof(struct mlx5e_rx_wqe_cyc); - } - - return order_base_2(sz); -} - -static u8 mlx5e_get_rq_log_wq_sz(void *rqc) -{ - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - - return MLX5_GET(wq, wq, log_wq_sz); -} - -void mlx5e_build_rq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_param *param) -{ - struct mlx5_core_dev *mdev = priv->mdev; - void *rqc = param->rqc; - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - int ndsegs = 1; - - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - MLX5_SET(wq, wq, log_wqe_num_of_strides, - mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) - - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); - MLX5_SET(wq, wq, log_wqe_stride_size, - mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) - - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); - MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); - break; - default: /* MLX5_WQ_TYPE_CYCLIC */ - MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); - ndsegs = param->frags_info.num_frags; - } - - MLX5_SET(wq, wq, wq_type, params->rq_wq_type); - MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); - MLX5_SET(wq, wq, log_wq_stride, - mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); - MLX5_SET(wq, wq, pd, mdev->mlx5e_res.pdn); - MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); - MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); - MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); - - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); - mlx5e_build_rx_cq_param(priv, params, xsk, ¶m->cqp); -} - -static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, - struct mlx5e_rq_param *param) -{ - struct mlx5_core_dev *mdev = priv->mdev; - void *rqc = param->rqc; - void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, log_wq_stride, - mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1)); - MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter); - - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); -} - -void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); - MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); - - param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev)); -} - -void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - bool allow_swp; - - allow_swp = mlx5_geneve_tx_allowed(priv->mdev) || - !!MLX5_IPSEC_DEV(priv->mdev); - mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - MLX5_SET(sqc, sqc, allow_swp, allow_swp); - param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); - param->stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params); - mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); -} - -static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, - struct mlx5e_cq_param *param) -{ - void *cqc = param->cqc; - - MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); - if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128) - MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); -} - -void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, - struct mlx5e_cq_param *param) -{ - struct mlx5_core_dev *mdev = priv->mdev; - bool hw_stridx = false; - void *cqc = param->cqc; - u8 log_cq_size; - - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + - mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); - hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index); - break; - default: /* MLX5_WQ_TYPE_CYCLIC */ - log_cq_size = params->log_rq_mtu_frames; - } - - MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); - if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { - MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ? - MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM); - MLX5_SET(cqc, cqc, cqe_comp_en, 1); - } - - mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; -} - -void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param) -{ - void *cqc = param->cqc; - - MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); - - mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; -} - -void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_cq_param *param) -{ - void *cqc = param->cqc; - - MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); - - mlx5e_build_common_cq_param(priv, param); - - param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; -} - -void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - mlx5e_build_sq_param_common(priv, param); - - MLX5_SET(wq, wq, log_wq_sz, log_wq_size); - MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - mlx5e_build_ico_cq_param(priv, log_wq_size, ¶m->cqp); -} - -void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_sq_param *param) -{ - void *sqc = param->sqc; - void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - - mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); - param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); - mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); -} - -static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params, - struct mlx5e_rq_param *rqp) -{ - switch (params->rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return order_base_2(MLX5E_UMR_WQEBBS) + - mlx5e_get_rq_log_wq_sz(rqp->rqc); - default: /* MLX5_WQ_TYPE_CYCLIC */ - return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - } -} - -static u8 mlx5e_build_async_icosq_log_wq_sz(struct net_device *netdev) -{ - if (netdev->hw_features & NETIF_F_HW_TLS_RX) - return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - - return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; -} - -static void mlx5e_build_channel_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_channel_param *cparam) -{ - u8 icosq_log_wq_sz, async_icosq_log_wq_sz; - - mlx5e_build_rq_param(priv, params, NULL, &cparam->rq); - - icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq); - async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(priv->netdev); - - mlx5e_build_sq_param(priv, params, &cparam->txq_sq); - mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); - mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq); - mlx5e_build_icosq_param(priv, async_icosq_log_wq_sz, &cparam->async_icosq); -} - int mlx5e_open_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { @@ -2414,7 +2086,10 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, if (!chs->c || !cparam) goto err_free; - mlx5e_build_channel_param(priv, &chs->params, cparam); + err = mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam); + if (err) + goto err_free; + for (i = 0; i < chs->num; i++) { struct xsk_buff_pool *xsk_pool = NULL; @@ -2426,9 +2101,8 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, goto err_close_channels; } - if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS)) { - err = mlx5e_port_ptp_open(priv, &chs->params, chs->c[0]->lag_port, - &chs->port_ptp); + if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) { + err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); if (err) goto err_close_channels; } @@ -2442,8 +2116,8 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, return 0; err_close_ptp: - if (chs->port_ptp) - mlx5e_port_ptp_close(chs->port_ptp); + if (chs->ptp) + mlx5e_ptp_close(chs->ptp); err_close_channels: for (i--; i >= 0; i--) @@ -2463,8 +2137,8 @@ static void mlx5e_activate_channels(struct mlx5e_channels *chs) for (i = 0; i < chs->num; i++) mlx5e_activate_channel(chs->c[i]); - if (chs->port_ptp) - mlx5e_ptp_activate_channel(chs->port_ptp); + if (chs->ptp) + mlx5e_ptp_activate_channel(chs->ptp); } #define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */ @@ -2491,8 +2165,8 @@ static void mlx5e_deactivate_channels(struct mlx5e_channels *chs) { int i; - if (chs->port_ptp) - mlx5e_ptp_deactivate_channel(chs->port_ptp); + if (chs->ptp) + mlx5e_ptp_deactivate_channel(chs->ptp); for (i = 0; i < chs->num; i++) mlx5e_deactivate_channel(chs->c[i]); @@ -2502,9 +2176,10 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) { int i; - if (chs->port_ptp) - mlx5e_port_ptp_close(chs->port_ptp); - + if (chs->ptp) { + mlx5e_ptp_close(chs->ptp); + chs->ptp = NULL; + } for (i = 0; i < chs->num; i++) mlx5e_close_channel(chs->c[i]); @@ -2560,12 +2235,12 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) return err; } -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { int err; int ix; - for (ix = 0; ix < priv->max_nch; ix++) { + for (ix = 0; ix < n; ix++) { err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt); if (unlikely(err)) goto err_destroy_rqts; @@ -2581,11 +2256,11 @@ err_destroy_rqts: return err; } -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { int i; - for (i = 0; i < priv->max_nch; i++) + for (i = 0; i < n; i++) mlx5e_destroy_rqt(priv, &tirs[i].rqt); } @@ -2668,7 +2343,8 @@ static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix, } static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, - struct mlx5e_redirect_rqt_param rrp) + struct mlx5e_redirect_rqt_param rrp, + struct mlx5e_redirect_rqt_param *ptp_rrp) { u32 rqtn; int ix; @@ -2694,11 +2370,17 @@ static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, rqtn = priv->direct_tir[ix].rqt.rqtn; mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); } + if (ptp_rrp) { + rqtn = priv->ptp_tir.rqt.rqtn; + mlx5e_redirect_rqt(priv, rqtn, 1, *ptp_rrp); + } } static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { + bool rx_ptp_support = priv->profile->rx_ptp_support; + struct mlx5e_redirect_rqt_param *ptp_rrp_p = NULL; struct mlx5e_redirect_rqt_param rrp = { .is_rss = true, { @@ -2708,12 +2390,22 @@ static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, } }, }; + struct mlx5e_redirect_rqt_param ptp_rrp; - mlx5e_redirect_rqts(priv, rrp); + if (rx_ptp_support) { + u32 ptp_rqn; + + ptp_rrp.is_rss = false; + ptp_rrp.rqn = mlx5e_ptp_get_rqn(priv->channels.ptp, &ptp_rqn) ? + priv->drop_rq.rqn : ptp_rqn; + ptp_rrp_p = &ptp_rrp; + } + mlx5e_redirect_rqts(priv, rrp, ptp_rrp_p); } static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) { + bool rx_ptp_support = priv->profile->rx_ptp_support; struct mlx5e_redirect_rqt_param drop_rrp = { .is_rss = false, { @@ -2721,7 +2413,7 @@ static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) }, }; - mlx5e_redirect_rqts(priv, drop_rrp); + mlx5e_redirect_rqts(priv, drop_rrp, rx_ptp_support ? &drop_rrp : NULL); } static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = { @@ -3010,6 +2702,8 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) nch = priv->channels.params.num_channels; ntc = priv->channels.params.num_tc; num_rxqs = nch * priv->profile->rq_groups; + if (priv->channels.params.ptp_rx) + num_rxqs++; mlx5e_netdev_set_tcs(netdev, nch, ntc); @@ -3095,11 +2789,14 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv) } } - if (!priv->channels.port_ptp) + if (!priv->channels.ptp) + return; + + if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state)) return; for (tc = 0; tc < num_tc; tc++) { - struct mlx5e_port_ptp *c = priv->channels.port_ptp; + struct mlx5e_ptp *c = priv->channels.ptp; struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq; priv->txq2sq[sq->txq_ix] = sq; @@ -3150,6 +2847,29 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) mlx5e_deactivate_channels(&priv->channels); } +static int mlx5e_switch_priv_params(struct mlx5e_priv *priv, + struct mlx5e_params *new_params, + mlx5e_fp_preactivate preactivate, + void *context) +{ + struct mlx5e_params old_params; + + old_params = priv->channels.params; + priv->channels.params = *new_params; + + if (preactivate) { + int err; + + err = preactivate(priv, context); + if (err) { + priv->channels.params = old_params; + return err; + } + } + + return 0; +} + static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, struct mlx5e_channels *new_chs, mlx5e_fp_preactivate preactivate, @@ -3192,35 +2912,32 @@ out: return err; } -int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *new_chs, - mlx5e_fp_preactivate preactivate, - void *context) +int mlx5e_safe_switch_params(struct mlx5e_priv *priv, + struct mlx5e_params *params, + mlx5e_fp_preactivate preactivate, + void *context, bool reset) { + struct mlx5e_channels new_chs = {}; int err; - err = mlx5e_open_channels(priv, new_chs); + reset &= test_bit(MLX5E_STATE_OPENED, &priv->state); + if (!reset) + return mlx5e_switch_priv_params(priv, params, preactivate, context); + + new_chs.params = *params; + err = mlx5e_open_channels(priv, &new_chs); if (err) return err; - - err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context); + err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context); if (err) - goto err_close; - - return 0; - -err_close: - mlx5e_close_channels(new_chs); + mlx5e_close_channels(&new_chs); return err; } int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv) { - struct mlx5e_channels new_channels = {}; - - new_channels.params = priv->channels.params; - return mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + return mlx5e_safe_switch_params(priv, &priv->channels.params, NULL, NULL, true); } void mlx5e_timestamp_init(struct mlx5e_priv *priv) @@ -3373,7 +3090,7 @@ int mlx5e_open_drop_rq(struct mlx5e_priv *priv, struct mlx5e_cq *cq = &drop_rq->cq; int err; - mlx5e_build_drop_rq_param(priv, &rq_param); + mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param); err = mlx5e_alloc_drop_cq(priv, cq, &cq_param); if (err) @@ -3421,10 +3138,10 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn) { void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); - MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); if (MLX5_GET(tisc, tisc, tls_en)) - MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn); + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn); if (mlx5_lag_is_lacp_owner(mdev)) MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); @@ -3494,7 +3211,7 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) { - MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.hw_objs.td.tdn); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, rqtn); MLX5_SET(tirc, tirc, tunneled_offload_en, @@ -3586,7 +3303,7 @@ err_destroy_inner_tirs: return err; } -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { struct mlx5e_tir *tir; void *tirc; @@ -3600,7 +3317,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) if (!in) return -ENOMEM; - for (ix = 0; ix < priv->max_nch; ix++) { + for (ix = 0; ix < n; ix++) { memset(in, 0, inlen); tir = &tirs[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); @@ -3638,11 +3355,11 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); } -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n) { int i; - for (i = 0; i < priv->max_nch; i++) + for (i = 0; i < n; i++) mlx5e_destroy_tir(priv->mdev, &tirs[i]); } @@ -3677,7 +3394,7 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, struct tc_mqprio_qopt *mqprio) { - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; u8 tc = mqprio->num_tc; int err = 0; @@ -3696,23 +3413,11 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, goto out; } - new_channels.params = priv->channels.params; - new_channels.params.num_tc = tc ? tc : 1; - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - struct mlx5e_params old_params; - - old_params = priv->channels.params; - priv->channels.params = new_channels.params; - err = mlx5e_num_channels_changed(priv); - if (err) - priv->channels.params = old_params; - - goto out; - } + new_params = priv->channels.params; + new_params.num_tc = tc ? tc : 1; - err = mlx5e_safe_switch_channels(priv, &new_channels, - mlx5e_num_channels_changed_ctx, NULL); + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_num_channels_changed_ctx, NULL, true); out: priv->max_opened_tc = max_t(u8, priv->max_opened_tc, @@ -3769,8 +3474,16 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + bool tc_unbind = false; int err; + if (type == TC_SETUP_BLOCK && + ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND) + tc_unbind = true; + + if (!netif_device_present(dev) && !tc_unbind) + return -ENODEV; + switch (type) { case TC_SETUP_BLOCK: { struct flow_block_offload *f = type_data; @@ -3815,6 +3528,22 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) s->tx_dropped += sq_stats->dropped; } } + if (priv->tx_ptp_opened) { + for (i = 0; i < priv->max_opened_tc; i++) { + struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i]; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_dropped += sq_stats->dropped; + } + } + if (priv->rx_ptp_opened) { + struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + s->multicast += rq_stats->mcast_packets; + } } void @@ -3823,6 +3552,9 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_pport_stats *pstats = &priv->stats.pport; + if (!netif_device_present(dev)) + return; + /* In switchdev mode, monitor counters doesn't monitor * rx/tx stats of 802_3. The update stats mechanism * should keep the 802_3 layout counters updated @@ -3834,10 +3566,17 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) } if (mlx5e_is_uplink_rep(priv)) { + struct mlx5e_vport_stats *vstats = &priv->stats.vport; + stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok); stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); + + /* vport multicast also counts packets that are dropped due to steering + * or rx out of buffer + */ + stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); } else { mlx5e_fold_sw_stats64(priv, stats); } @@ -3857,11 +3596,19 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; } +static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv) +{ + if (mlx5e_is_uplink_rep(priv)) + return; /* no rx mode for uplink rep */ + + queue_work(priv->wq, &priv->set_rx_mode_work); +} + static void mlx5e_set_rx_mode(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv); } static int mlx5e_set_mac(struct net_device *netdev, void *addr) @@ -3876,7 +3623,7 @@ static int mlx5e_set_mac(struct net_device *netdev, void *addr) ether_addr_copy(netdev->dev_addr, saddr->sa_data); netif_addr_unlock_bh(netdev); - queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv); return 0; } @@ -3895,10 +3642,10 @@ static int set_feature_lro(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channels new_channels = {}; struct mlx5e_params *cur_params; + struct mlx5e_params new_params; + bool reset = true; int err = 0; - bool reset; mutex_lock(&priv->state_lock); @@ -3916,30 +3663,17 @@ static int set_feature_lro(struct net_device *netdev, bool enable) goto out; } - reset = test_bit(MLX5E_STATE_OPENED, &priv->state); - - new_channels.params = *cur_params; - new_channels.params.lro_en = enable; + new_params = *cur_params; + new_params.lro_en = enable; - if (cur_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { + if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) == - mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL)) + mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL)) reset = false; } - if (!reset) { - struct mlx5e_params old_params; - - old_params = *cur_params; - *cur_params = new_channels.params; - err = mlx5e_modify_tirs_lro(priv); - if (err) - *cur_params = old_params; - goto out; - } - - err = mlx5e_safe_switch_channels(priv, &new_channels, - mlx5e_modify_tirs_lro_ctx, NULL); + err = mlx5e_safe_switch_params(priv, &new_params, + mlx5e_modify_tirs_lro_ctx, NULL, reset); out: mutex_unlock(&priv->state_lock); return err; @@ -4098,7 +3832,8 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, mutex_lock(&priv->state_lock); params = &priv->channels.params; - if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) { + if (!priv->fs.vlan || + !bitmap_empty(mlx5e_vlan_get_active_svlans(priv->fs.vlan), VLAN_N_VID)) { /* HW strips the outer C-tag header, this is a problem * for S-tag traffic. */ @@ -4167,26 +3902,23 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, mlx5e_fp_preactivate preactivate) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; struct mlx5e_params *params; + bool reset = true; int err = 0; - bool reset; mutex_lock(&priv->state_lock); params = &priv->channels.params; - reset = !params->lro_en; - reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state); - - new_channels.params = *params; - new_channels.params.sw_mtu = new_mtu; - err = mlx5e_validate_params(priv, &new_channels.params); + new_params = *params; + new_params.sw_mtu = new_mtu; + err = mlx5e_validate_params(priv->mdev, &new_params); if (err) goto out; if (params->xdp_prog && - !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { + !mlx5e_rx_is_linear_skb(&new_params, NULL)) { netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n", new_mtu, mlx5e_xdp_max_mtu(params, NULL)); err = -EINVAL; @@ -4195,47 +3927,34 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, if (priv->xsk.refcnt && !mlx5e_xsk_validate_mtu(netdev, &priv->channels, - &new_channels.params, priv->mdev)) { + &new_params, priv->mdev)) { err = -EINVAL; goto out; } + if (params->lro_en) + reset = false; + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { - bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, - &new_channels.params, - NULL); + bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL); + bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, + &new_params, NULL); u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL); - u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL); - - /* If XSK is active, XSK RQs are linear. */ - is_linear |= priv->xsk.refcnt; - - /* Always reset in linear mode - hw_mtu is used in data path. */ - reset = reset && (is_linear || (ppw_old != ppw_new)); - } - - if (!reset) { - unsigned int old_mtu = params->sw_mtu; + u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_params, NULL); - params->sw_mtu = new_mtu; - if (preactivate) { - err = preactivate(priv, NULL); - if (err) { - params->sw_mtu = old_mtu; - goto out; - } - } - netdev->mtu = params->sw_mtu; - goto out; + /* Always reset in linear mode - hw_mtu is used in data path. + * Check that the mode was non-linear and didn't change. + * If XSK is active, XSK RQs are linear. + */ + if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt && + ppw_old == ppw_new) + reset = false; } - err = mlx5e_safe_switch_channels(priv, &new_channels, preactivate, NULL); - if (err) - goto out; - - netdev->mtu = new_channels.params.sw_mtu; + err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, reset); out: + netdev->mtu = params->sw_mtu; mutex_unlock(&priv->state_lock); return err; } @@ -4245,9 +3964,18 @@ static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu) return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); } +int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx) +{ + bool set = *(bool *)ctx; + + return mlx5e_ptp_rx_manage_fs(priv, set); +} + int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) { + struct mlx5e_params new_params; struct hwtstamp_config config; + bool rx_cqe_compress_def; int err; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || @@ -4267,11 +3995,13 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) } mutex_lock(&priv->state_lock); + new_params = priv->channels.params; + rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; + /* RX HW timestamp */ switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: - /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); + new_params.ptp_rx = false; break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -4288,15 +4018,7 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: - /* Disable CQE compression */ - if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) - netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); - err = mlx5e_modify_rx_cqe_compression_locked(priv, false); - if (err) { - netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); - mutex_unlock(&priv->state_lock); - return err; - } + new_params.ptp_rx = rx_cqe_compress_def; config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: @@ -4304,6 +4026,16 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) return -ERANGE; } + if (new_params.ptp_rx == priv->channels.params.ptp_rx) + goto out; + + err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx, + &new_params.ptp_rx, true); + if (err) { + mutex_unlock(&priv->state_lock); + return err; + } +out: memcpy(&priv->tstamp, &config, sizeof(config)); mutex_unlock(&priv->state_lock); @@ -4414,6 +4146,9 @@ static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + if (mlx5e_is_uplink_rep(priv)) + return -EOPNOTSUPP; + return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, mlx5_ifla_link2vport(link_state)); } @@ -4425,6 +4160,9 @@ int mlx5e_get_vf_config(struct net_device *dev, struct mlx5_core_dev *mdev = priv->mdev; int err; + if (!netif_device_present(dev)) + return -EOPNOTSUPP; + err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); if (err) return err; @@ -4441,6 +4179,32 @@ int mlx5e_get_vf_stats(struct net_device *dev, return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, vf_stats); } + +static bool +mlx5e_has_offload_stats(const struct net_device *dev, int attr_id) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!netif_device_present(dev)) + return false; + + if (!mlx5e_is_uplink_rep(priv)) + return false; + + return mlx5e_rep_has_offload_stats(dev, attr_id); +} + +static int +mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!mlx5e_is_uplink_rep(priv)) + return -EOPNOTSUPP; + + return mlx5e_rep_get_offload_stats(attr_id, dev, sp); +} #endif static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type) @@ -4584,7 +4348,7 @@ static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) { struct net_device *netdev = priv->netdev; - struct mlx5e_channels new_channels = {}; + struct mlx5e_params new_params; if (priv->channels.params.lro_en) { netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n"); @@ -4597,16 +4361,16 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) return -EINVAL; } - new_channels.params = priv->channels.params; - new_channels.params.xdp_prog = prog; + new_params = priv->channels.params; + new_params.xdp_prog = prog; /* No XSK params: AF_XDP can't be enabled yet at the point of setting * the XDP program. */ - if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { + if (!mlx5e_rx_is_linear_skb(&new_params, NULL)) { netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n", - new_channels.params.sw_mtu, - mlx5e_xdp_max_mtu(&new_channels.params, NULL)); + new_params.sw_mtu, + mlx5e_xdp_max_mtu(&new_params, NULL)); return -EINVAL; } @@ -4626,9 +4390,10 @@ static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_params new_params; struct bpf_prog *old_prog; - bool reset, was_opened; int err = 0; + bool reset; int i; mutex_lock(&priv->state_lock); @@ -4639,52 +4404,37 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) goto unlock; } - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); /* no need for full reset when exchanging programs */ reset = (!priv->channels.params.xdp_prog || !prog); - if (was_opened && !reset) - /* num_channels is invariant here, so we can take the - * batched reference right upfront. - */ - bpf_prog_add(prog, priv->channels.num); - - if (was_opened && reset) { - struct mlx5e_channels new_channels = {}; - - new_channels.params = priv->channels.params; - new_channels.params.xdp_prog = prog; - mlx5e_set_rq_type(priv->mdev, &new_channels.params); - old_prog = priv->channels.params.xdp_prog; + new_params = priv->channels.params; + new_params.xdp_prog = prog; + if (reset) + mlx5e_set_rq_type(priv->mdev, &new_params); + old_prog = priv->channels.params.xdp_prog; - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); - if (err) - goto unlock; - } else { - /* exchange programs, extra prog reference we got from caller - * as long as we don't fail from this point onwards. - */ - old_prog = xchg(&priv->channels.params.xdp_prog, prog); - } + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset); + if (err) + goto unlock; if (old_prog) bpf_prog_put(old_prog); - if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */ - mlx5e_set_rq_type(priv->mdev, &priv->channels.params); - - if (!was_opened || reset) + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) goto unlock; /* exchanging programs w/o reset, we update ref counts on behalf * of the channels RQs here. */ + bpf_prog_add(prog, priv->channels.num); for (i = 0; i < priv->channels.num; i++) { struct mlx5e_channel *c = priv->channels.c[i]; mlx5e_rq_replace_xdp_prog(&c->rq, prog); - if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) { + bpf_prog_inc(prog); mlx5e_rq_replace_xdp_prog(&c->xskrq, prog); + } } unlock: @@ -4797,6 +4547,8 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_has_offload_stats = mlx5e_has_offload_stats, + .ndo_get_offload_stats = mlx5e_get_offload_stats, #endif .ndo_get_devlink_port = mlx5e_get_devlink_port, }; @@ -4810,93 +4562,6 @@ void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, indirection_rqt[i] = i % num_channels; } -static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) -{ - u32 link_speed = 0; - u32 pci_bw = 0; - - mlx5e_port_max_linkspeed(mdev, &link_speed); - pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL); - mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n", - link_speed, pci_bw); - -#define MLX5E_SLOW_PCI_RATIO (2) - - return link_speed && pci_bw && - link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; -} - -static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) -{ - struct dim_cq_moder moder; - - moder.cq_period_mode = cq_period_mode; - moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; - moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) - moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; - - return moder; -} - -static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) -{ - struct dim_cq_moder moder; - - moder.cq_period_mode = cq_period_mode; - moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; - moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; - if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) - moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; - - return moder; -} - -static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) -{ - return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? - DIM_CQ_PERIOD_MODE_START_FROM_CQE : - DIM_CQ_PERIOD_MODE_START_FROM_EQE; -} - -void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode) -{ - if (params->tx_dim_enabled) { - u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); - - params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode); - } else { - params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode); - } -} - -void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode) -{ - if (params->rx_dim_enabled) { - u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode); - - params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode); - } else { - params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode); - } -} - -void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) -{ - mlx5e_reset_tx_moderation(params, cq_period_mode); - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, - params->tx_cq_moderation.cq_period_mode == - MLX5_CQ_PERIOD_MODE_START_FROM_CQE); -} - -void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) -{ - mlx5e_reset_rx_moderation(params, cq_period_mode); - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, - params->rx_cq_moderation.cq_period_mode == - MLX5_CQ_PERIOD_MODE_START_FROM_CQE); -} - static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) { int i; @@ -4909,25 +4574,6 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); } -void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - /* Prefer Striding RQ, unless any of the following holds: - * - Striding RQ configuration is not possible/supported. - * - Slow PCI heuristic. - * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. - * - * No XSK params: checking the availability of striding RQ in general. - */ - if (!slow_pci_heuristic(mdev) && - mlx5e_striding_rq_possible(mdev, params) && - (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || - !mlx5e_rx_is_linear_skb(params, NULL))) - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); - mlx5e_set_rq_type(mdev, params); - mlx5e_init_rq_type_params(mdev, params); -} - void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels) { @@ -4958,6 +4604,11 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 priv->max_nch); params->num_tc = 1; + /* Set an initial non-zero value, so that mlx5e_select_queue won't + * divide by zero if called before first activating channels. + */ + priv->num_tc_x_num_ch = params->num_channels * params->num_tc; + /* SQ */ params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : @@ -5238,6 +4889,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct devlink_port *dl_port; int err; mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu); @@ -5253,19 +4905,19 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (err) mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); - err = mlx5e_devlink_port_register(priv); - if (err) - mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); - - mlx5e_health_create_reporters(priv); + dl_port = mlx5e_devlink_get_dl_port(priv); + if (dl_port->registered) + mlx5e_health_create_reporters(priv); return 0; } static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { - mlx5e_health_destroy_reporters(priv); - mlx5e_devlink_port_unregister(priv); + struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); + + if (dl_port->registered) + mlx5e_health_destroy_reporters(priv); mlx5e_tls_cleanup(priv); mlx5e_ipsec_cleanup(priv); } @@ -5273,6 +4925,7 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u16 max_nch = priv->max_nch; int err; mlx5e_create_q_counters(priv); @@ -5287,7 +4940,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv, priv->direct_tir); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_rqts; @@ -5295,22 +4948,30 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv, priv->direct_tir); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_tirs; - err = mlx5e_create_direct_rqts(priv, priv->xsk_tir); + err = mlx5e_create_direct_rqts(priv, priv->xsk_tir, max_nch); if (unlikely(err)) goto err_destroy_direct_tirs; - err = mlx5e_create_direct_tirs(priv, priv->xsk_tir); + err = mlx5e_create_direct_tirs(priv, priv->xsk_tir, max_nch); if (unlikely(err)) goto err_destroy_xsk_rqts; + err = mlx5e_create_direct_rqts(priv, &priv->ptp_tir, 1); + if (err) + goto err_destroy_xsk_tirs; + + err = mlx5e_create_direct_tirs(priv, &priv->ptp_tir, 1); + if (err) + goto err_destroy_ptp_rqt; + err = mlx5e_create_flow_steering(priv); if (err) { mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_xsk_tirs; + goto err_destroy_ptp_direct_tir; } err = mlx5e_tc_nic_init(priv); @@ -5331,16 +4992,20 @@ err_tc_nic_cleanup: mlx5e_tc_nic_cleanup(priv); err_destroy_flow_steering: mlx5e_destroy_flow_steering(priv); +err_destroy_ptp_direct_tir: + mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1); +err_destroy_ptp_rqt: + mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1); err_destroy_xsk_tirs: - mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch); err_destroy_xsk_rqts: - mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -5352,14 +5017,18 @@ err_destroy_q_counters: static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { + u16 max_nch = priv->max_nch; + mlx5e_accel_cleanup_rx(priv); mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); - mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1); + mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1); + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@ -5405,7 +5074,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) return; mlx5e_dcbnl_init_app(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv); rtnl_lock(); if (netif_running(netdev)) @@ -5428,7 +5097,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) netif_device_detach(priv->netdev); rtnl_unlock(); - queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv); mlx5e_hv_vhca_stats_destroy(priv); if (mlx5e_monitor_counter_supported(priv)) @@ -5467,6 +5136,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_nic_stats_grps, .stats_grps_num = mlx5e_nic_stats_grps_num, + .rx_ptp_support = true, }; /* mlx5e generic netdev management API (move to en_common.c) */ @@ -5474,8 +5144,6 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, struct net_device *netdev, struct mlx5_core_dev *mdev) { - memset(priv, 0, sizeof(*priv)); - /* priv init */ priv->mdev = mdev; priv->netdev = netdev; @@ -5508,12 +5176,18 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) { int i; + /* bail if change profile failed and also rollback failed */ + if (!priv->mdev) + return; + destroy_workqueue(priv->wq); free_cpumask_var(priv->scratchpad.cpumask); for (i = 0; i < priv->htb.max_qos_sqs; i++) kfree(priv->htb.qos_sq_stats[i]); kvfree(priv->htb.qos_sq_stats); + + memset(priv, 0, sizeof(*priv)); } struct net_device * @@ -5630,11 +5304,10 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv) } static int -mlx5e_netdev_attach_profile(struct mlx5e_priv *priv, +mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, const struct mlx5e_profile *new_profile, void *new_ppriv) { - struct net_device *netdev = priv->netdev; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_priv *priv = netdev_priv(netdev); int err; err = mlx5e_priv_init(priv, netdev, mdev); @@ -5647,10 +5320,16 @@ mlx5e_netdev_attach_profile(struct mlx5e_priv *priv, priv->ppriv = new_ppriv; err = new_profile->init(priv->mdev, priv->netdev); if (err) - return err; + goto priv_cleanup; err = mlx5e_attach_netdev(priv); if (err) - new_profile->cleanup(priv); + goto profile_cleanup; + return err; + +profile_cleanup: + new_profile->cleanup(priv); +priv_cleanup: + mlx5e_priv_cleanup(priv); return err; } @@ -5659,13 +5338,14 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, { unsigned int new_max_nch = mlx5e_calc_max_nch(priv, new_profile); const struct mlx5e_profile *orig_profile = priv->profile; + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; void *orig_ppriv = priv->ppriv; int err, rollback_err; /* sanity */ if (new_max_nch != priv->max_nch) { - netdev_warn(priv->netdev, - "%s: Replacing profile with different max channels\n", + netdev_warn(netdev, "%s: Replacing profile with different max channels\n", __func__); return -EINVAL; } @@ -5675,25 +5355,27 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, priv->profile->cleanup(priv); mlx5e_priv_cleanup(priv); - err = mlx5e_netdev_attach_profile(priv, new_profile, new_ppriv); + err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv); if (err) { /* roll back to original profile */ - netdev_warn(priv->netdev, "%s: new profile init failed, %d\n", - __func__, err); + netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err); goto rollback; } return 0; rollback: - rollback_err = mlx5e_netdev_attach_profile(priv, orig_profile, orig_ppriv); - if (rollback_err) { - netdev_err(priv->netdev, - "%s: failed to rollback to orig profile, %d\n", + rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv); + if (rollback_err) + netdev_err(netdev, "%s: failed to rollback to orig profile, %d\n", __func__, rollback_err); - } return err; } +void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv) +{ + mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL); +} + void mlx5e_destroy_netdev(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; @@ -5776,10 +5458,17 @@ static int mlx5e_probe(struct auxiliary_device *adev, priv->profile = profile; priv->ppriv = NULL; + + err = mlx5e_devlink_port_register(priv); + if (err) { + mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); + goto err_destroy_netdev; + } + err = profile->init(mdev, netdev); if (err) { mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); - goto err_destroy_netdev; + goto err_devlink_cleanup; } err = mlx5e_resume(adev); @@ -5797,12 +5486,15 @@ static int mlx5e_probe(struct auxiliary_device *adev, mlx5e_devlink_port_type_eth_set(priv); mlx5e_dcbnl_init_app(priv); + mlx5_uplink_netdev_set(mdev, netdev); return 0; err_resume: mlx5e_suspend(adev, state); err_profile_cleanup: profile->cleanup(priv); +err_devlink_cleanup: + mlx5e_devlink_port_unregister(priv); err_destroy_netdev: mlx5e_destroy_netdev(priv); return err; @@ -5817,6 +5509,7 @@ static void mlx5e_remove(struct auxiliary_device *adev) unregister_netdev(priv->netdev); mlx5e_suspend(adev, state); priv->profile->cleanup(priv); + mlx5e_devlink_port_unregister(priv); mlx5e_destroy_netdev(priv); } @@ -5842,18 +5535,18 @@ int mlx5e_init(void) mlx5e_ipsec_build_inverse_table(); mlx5e_build_ptys2ethtool_map(); - ret = mlx5e_rep_init(); + ret = auxiliary_driver_register(&mlx5e_driver); if (ret) return ret; - ret = auxiliary_driver_register(&mlx5e_driver); + ret = mlx5e_rep_init(); if (ret) - mlx5e_rep_cleanup(); + auxiliary_driver_unregister(&mlx5e_driver); return ret; } void mlx5e_cleanup(void) { - auxiliary_driver_unregister(&mlx5e_driver); mlx5e_rep_cleanup(); + auxiliary_driver_unregister(&mlx5e_driver); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index a132fff7a980..34eb1118670f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -40,17 +40,19 @@ #include "eswitch.h" #include "en.h" #include "en_rep.h" +#include "en/params.h" #include "en/txrx.h" #include "en_tc.h" #include "en/rep/tc.h" #include "en/rep/neigh.h" +#include "en/devlink.h" #include "fs_core.h" #include "lib/mlx5.h" #define CREATE_TRACE_POINTS #include "diag/en_rep_tracepoint.h" #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ - max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) + max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) #define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1 static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -69,16 +71,6 @@ static void mlx5e_rep_get_drvinfo(struct net_device *dev, fw_rev_sub(mdev), mdev->board_id); } -static void mlx5e_uplink_rep_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *drvinfo) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - - mlx5e_rep_get_drvinfo(dev, drvinfo); - strlcpy(drvinfo->bus_info, pci_name(priv->mdev->pdev), - sizeof(drvinfo->bus_info)); -} - static const struct counter_desc sw_rep_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, @@ -285,46 +277,6 @@ static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev) return mlx5e_ethtool_get_rxfh_indir_size(priv); } -static void mlx5e_uplink_rep_get_pause_stats(struct net_device *netdev, - struct ethtool_pause_stats *stats) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - - mlx5e_stats_pause_get(priv, stats); -} - -static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pauseparam) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - - mlx5e_ethtool_get_pauseparam(priv, pauseparam); -} - -static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pauseparam) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - - return mlx5e_ethtool_set_pauseparam(priv, pauseparam); -} - -static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *link_ksettings) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - - return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); -} - -static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *link_ksettings) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - - return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); -} - static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | @@ -344,34 +296,6 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, }; -static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_MAX_FRAMES | - ETHTOOL_COALESCE_USE_ADAPTIVE, - .get_drvinfo = mlx5e_uplink_rep_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_strings = mlx5e_rep_get_strings, - .get_sset_count = mlx5e_rep_get_sset_count, - .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, - .get_ringparam = mlx5e_rep_get_ringparam, - .set_ringparam = mlx5e_rep_set_ringparam, - .get_channels = mlx5e_rep_get_channels, - .set_channels = mlx5e_rep_set_channels, - .get_coalesce = mlx5e_rep_get_coalesce, - .set_coalesce = mlx5e_rep_set_coalesce, - .get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings, - .set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings, - .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, - .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, - .get_rxfh = mlx5e_get_rxfh, - .set_rxfh = mlx5e_set_rxfh, - .get_rxnfc = mlx5e_get_rxnfc, - .set_rxnfc = mlx5e_set_rxnfc, - .get_pause_stats = mlx5e_uplink_rep_get_pause_stats, - .get_pauseparam = mlx5e_uplink_rep_get_pauseparam, - .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, -}; - static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { @@ -411,8 +335,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, } /* Add re-inject rule to the PF/representor sqs */ - flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, - rep->vport, + flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep, sqns_array[i]); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -522,7 +445,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) return (rep->vport == MLX5_VPORT_UPLINK); } -static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) +bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@ -542,8 +465,8 @@ mlx5e_get_sw_stats64(const struct net_device *dev, return 0; } -static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp) +int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) { switch (attr_id) { case IFLA_OFFLOAD_XSTATS_CPU_HIT: @@ -568,34 +491,6 @@ static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) return mlx5e_change_mtu(netdev, new_mtu, NULL); } -static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu) -{ - return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx); -} - -static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr) -{ - struct sockaddr *saddr = addr; - - if (!is_valid_ether_addr(saddr->sa_data)) - return -EADDRNOTAVAIL; - - ether_addr_copy(netdev->dev_addr, saddr->sa_data); - return 0; -} - -static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, - __be16 vlan_proto) -{ - netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n"); - - if (vlan != 0) - return -EOPNOTSUPP; - - /* allow setting 0-vid for compatibility with libvirt */ - return 0; -} - static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -641,29 +536,10 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_change_carrier = mlx5e_rep_change_carrier, }; -static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { - .ndo_open = mlx5e_open, - .ndo_stop = mlx5e_close, - .ndo_start_xmit = mlx5e_xmit, - .ndo_set_mac_address = mlx5e_uplink_rep_set_mac, - .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_devlink_port = mlx5e_rep_get_devlink_port, - .ndo_get_stats64 = mlx5e_get_stats, - .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, - .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, - .ndo_change_mtu = mlx5e_uplink_rep_change_mtu, - .ndo_features_check = mlx5e_features_check, - .ndo_set_vf_mac = mlx5e_set_vf_mac, - .ndo_set_vf_rate = mlx5e_set_vf_rate, - .ndo_get_vf_config = mlx5e_get_vf_config, - .ndo_get_vf_stats = mlx5e_get_vf_stats, - .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, - .ndo_set_features = mlx5e_set_features, -}; - bool mlx5e_eswitch_uplink_rep(struct net_device *netdev) { - return netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep; + return netdev->netdev_ops == &mlx5e_netdev_ops && + mlx5e_is_uplink_rep(netdev_priv(netdev)); } bool mlx5e_eswitch_vf_rep(struct net_device *netdev) @@ -713,26 +589,15 @@ static void mlx5e_build_rep_params(struct net_device *netdev) } static void mlx5e_build_rep_netdev(struct net_device *netdev, - struct mlx5_core_dev *mdev, - struct mlx5_eswitch_rep *rep) + struct mlx5_core_dev *mdev) { SET_NETDEV_DEV(netdev, mdev->device); - if (rep->vport == MLX5_VPORT_UPLINK) { - netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; - /* we want a persistent mac for the uplink rep */ - mlx5_query_mac_address(mdev, netdev->dev_addr); - netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops; - mlx5e_dcbnl_build_rep_netdev(netdev); - } else { - netdev->netdev_ops = &mlx5e_netdev_ops_rep; - eth_hw_addr_random(netdev); - netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; - } + netdev->netdev_ops = &mlx5e_netdev_ops_rep; + eth_hw_addr_random(netdev); + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; netdev->watchdog_timeo = 15 * HZ; - netdev->features |= NETIF_F_NETNS_LOCAL; - #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) netdev->hw_features |= NETIF_F_HW_TC; #endif @@ -744,12 +609,9 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev, netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM; - if (rep->vport == MLX5_VPORT_UPLINK) - netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; - else - netdev->features |= NETIF_F_VLAN_CHALLENGED; - netdev->features |= netdev->hw_features; + netdev->features |= NETIF_F_VLAN_CHALLENGED; + netdev->features |= NETIF_F_NETNS_LOCAL; } static int mlx5e_init_rep(struct mlx5_core_dev *mdev, @@ -890,6 +752,7 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup) static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u16 max_nch = priv->max_nch; int err; mlx5e_init_l2_addr(priv); @@ -904,7 +767,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv, priv->direct_tir); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_rqts; @@ -912,7 +775,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv, priv->direct_tir); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_tirs; @@ -937,11 +800,11 @@ err_destroy_root_ft: err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -951,13 +814,15 @@ err_close_drop_rq: static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { + u16 max_nch = priv->max_nch; + mlx5e_ethtool_cleanup_steering(priv); rep_vport_rx_rule_destroy(priv); mlx5e_destroy_rep_root_ft(priv); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); } @@ -1107,14 +972,23 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) mlx5e_rep_tc_enable(priv); - mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK, - 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO); + if (MLX5_CAP_GEN(mdev, uplink_follow)) + mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK, + 0, 0, MLX5_VPORT_ADMIN_STATE_AUTO); mlx5_lag_add(mdev, netdev); priv->events_nb.notifier_call = uplink_rep_async_event; mlx5_notifier_register(mdev, &priv->events_nb); mlx5e_dcbnl_initialize(priv); mlx5e_dcbnl_init_app(priv); mlx5e_rep_neigh_init(rpriv); + + netdev->wanted_features |= NETIF_F_HW_TC; + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + netif_device_attach(netdev); + rtnl_unlock(); } static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) @@ -1122,6 +996,12 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_core_dev *mdev = priv->mdev; + rtnl_lock(); + if (netif_running(priv->netdev)) + mlx5e_close(priv->netdev); + netif_device_detach(priv->netdev); + rtnl_unlock(); + mlx5e_rep_neigh_cleanup(rpriv); mlx5e_dcbnl_delete_app(priv); mlx5_notifier_unregister(mdev, &priv->events_nb); @@ -1182,6 +1062,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5e_rep_stats_grps, .stats_grps_num = mlx5e_rep_stats_grps_num, + .rx_ptp_support = false, }; static const struct mlx5e_profile mlx5e_uplink_rep_profile = { @@ -1198,33 +1079,65 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .update_carrier = mlx5e_update_carrier, .rx_handlers = &mlx5e_rx_handlers_rep, .max_tc = MLX5E_MAX_NUM_TC, - .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), + /* XSK is needed so we can replace profile with NIC netdev */ + .rq_groups = MLX5E_NUM_RQ_GROUPS(XSK), .stats_grps = mlx5e_ul_rep_stats_grps, .stats_grps_num = mlx5e_ul_rep_stats_grps_num, + .rx_ptp_support = false, }; /* e-Switch vport representors */ static int -mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = netdev_priv(mlx5_uplink_netdev_get(dev)); + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + struct devlink_port *dl_port; + int err; + + rpriv->netdev = priv->netdev; + + err = mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile, + rpriv); + if (err) + return err; + + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); + if (dl_port) + devlink_port_type_eth_set(dl_port, rpriv->netdev); + + return 0; +} + +static void +mlx5e_vport_uplink_rep_unload(struct mlx5e_rep_priv *rpriv) +{ + struct net_device *netdev = rpriv->netdev; + struct devlink_port *dl_port; + struct mlx5_core_dev *dev; + struct mlx5e_priv *priv; + + priv = netdev_priv(netdev); + dev = priv->mdev; + + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); + if (dl_port) + devlink_port_type_clear(dl_port); + mlx5e_netdev_attach_nic_profile(priv); +} + +static int +mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { + struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); const struct mlx5e_profile *profile; - struct mlx5e_rep_priv *rpriv; struct devlink_port *dl_port; struct net_device *netdev; struct mlx5e_priv *priv; unsigned int txqs, rxqs; int nch, err; - rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); - if (!rpriv) - return -ENOMEM; - - /* rpriv->rep to be looked up when profile->init() is called */ - rpriv->rep = rep; - - profile = (rep->vport == MLX5_VPORT_UPLINK) ? - &mlx5e_uplink_rep_profile : &mlx5e_rep_profile; - + profile = &mlx5e_rep_profile; nch = mlx5e_get_max_num_channels(dev); txqs = nch * profile->max_tc; rxqs = nch * profile->rq_groups; @@ -1233,21 +1146,11 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) mlx5_core_warn(dev, "Failed to create representor netdev for vport %d\n", rep->vport); - kfree(rpriv); return -EINVAL; } - mlx5e_build_rep_netdev(netdev, dev, rep); - + mlx5e_build_rep_netdev(netdev, dev); rpriv->netdev = netdev; - rep->rep_data[REP_ETH].priv = rpriv; - INIT_LIST_HEAD(&rpriv->vport_sqs_list); - - if (rep->vport == MLX5_VPORT_UPLINK) { - err = mlx5e_create_mdev_resources(dev); - if (err) - goto err_destroy_netdev; - } priv = netdev_priv(netdev); priv->profile = profile; @@ -1255,7 +1158,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) err = profile->init(dev, netdev); if (err) { netdev_warn(netdev, "rep profile init failed, %d\n", err); - goto err_destroy_mdev_resources; + goto err_destroy_netdev; } err = mlx5e_attach_netdev(netdev_priv(netdev)); @@ -1285,13 +1188,34 @@ err_detach_netdev: err_cleanup_profile: priv->profile->cleanup(priv); -err_destroy_mdev_resources: - if (rep->vport == MLX5_VPORT_UPLINK) - mlx5e_destroy_mdev_resources(dev); - err_destroy_netdev: mlx5e_destroy_netdev(netdev_priv(netdev)); - kfree(rpriv); + return err; +} + +static int +mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; + int err; + + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return -ENOMEM; + + /* rpriv->rep to be looked up when profile->init() is called */ + rpriv->rep = rep; + rep->rep_data[REP_ETH].priv = rpriv; + INIT_LIST_HEAD(&rpriv->vport_sqs_list); + + if (rep->vport == MLX5_VPORT_UPLINK) + err = mlx5e_vport_uplink_rep_load(dev, rep); + else + err = mlx5e_vport_vf_rep_load(dev, rep); + + if (err) + kfree(rpriv); + return err; } @@ -1305,15 +1229,19 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) struct devlink_port *dl_port; void *ppriv = priv->ppriv; + if (rep->vport == MLX5_VPORT_UPLINK) { + mlx5e_vport_uplink_rep_unload(rpriv); + goto free_ppriv; + } + dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); if (dl_port) devlink_port_type_clear(dl_port); unregister_netdev(netdev); mlx5e_detach_netdev(priv); priv->profile->cleanup(priv); - if (rep->vport == MLX5_VPORT_UPLINK) - mlx5e_destroy_mdev_resources(priv->mdev); mlx5e_destroy_netdev(priv); +free_ppriv: kfree(ppriv); /* mlx5e_rep_priv */ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index d1696404cca9..22585015c7a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -89,6 +89,7 @@ struct mlx5_rep_uplink_priv { struct mapping_ctx *tunnel_enc_opts_mapping; struct mlx5_tc_ct_priv *ct_priv; + struct mlx5_esw_psample *esw_psample; /* support eswitch vports bonding */ struct mlx5e_rep_bond *bond; @@ -220,6 +221,10 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, const struct net_device *lag_dev); int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup); +bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id); +int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp); + bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); @@ -240,6 +245,11 @@ static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {} static inline int mlx5e_rep_init(void) { return 0; }; static inline void mlx5e_rep_cleanup(void) {}; +static inline bool mlx5e_rep_has_offload_stats(const struct net_device *dev, + int attr_id) { return false; } +static inline int mlx5e_rep_get_offload_stats(int attr_id, + const struct net_device *dev, + void *sp) { return -EOPNOTSUPP; } #endif static inline bool mlx5e_is_vport_rep(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1b6ad94ebb10..f90894eea9e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -52,6 +52,7 @@ #include "en/health.h" #include "en/params.h" #include "devlink.h" +#include "en/devlink.h" static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, @@ -500,7 +501,6 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) struct mlx5e_icosq *sq = rq->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *umr_wqe; - u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1); u16 pi; int err; int i; @@ -531,7 +531,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) umr_wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); - umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset); + umr_wqe->uctrl.xlt_offset = + cpu_to_be16(MLX5_ALIGNED_MTTS_OCTW(MLX5E_REQUIRED_MTTS(ix))); sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, @@ -669,6 +670,7 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) get_cqe_opcode(cqe)); mlx5e_dump_error_cqe(&sq->cq, sq->sqn, (struct mlx5_err_cqe *)cqe); + mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) queue_work(cq->priv->wq, &sq->recover_work); break; @@ -1822,6 +1824,7 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe struct mlx5e_priv *priv = netdev_priv(rq->netdev); struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; + struct devlink_port *dl_port; struct sk_buff *skb; u32 cqe_bcnt; u16 trap_id; @@ -1844,7 +1847,8 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); skb_push(skb, ETH_HLEN); - mlx5_devlink_trap_report(rq->mdev, trap_id, skb, &priv->dl_port); + dl_port = mlx5e_devlink_get_dl_port(priv); + mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port); dev_kfree_skb_any(skb); free_wqe: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 92c5b81427b9..e4f5b6395148 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -116,7 +116,6 @@ static const struct counter_desc sw_stats_desc[] = { #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, @@ -180,13 +179,12 @@ static const struct counter_desc sw_stats_desc[] = { #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_ctx) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_del) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_pkt) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_start) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_end) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_skip) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_ok) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_retry) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_res_skip) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_err) }, #endif @@ -342,13 +340,12 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, #ifdef CONFIG_MLX5_EN_TLS s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets; s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes; - s->rx_tls_ctx += rq_stats->tls_ctx; - s->rx_tls_del += rq_stats->tls_del; s->rx_tls_resync_req_pkt += rq_stats->tls_resync_req_pkt; s->rx_tls_resync_req_start += rq_stats->tls_resync_req_start; s->rx_tls_resync_req_end += rq_stats->tls_resync_req_end; s->rx_tls_resync_req_skip += rq_stats->tls_resync_req_skip; s->rx_tls_resync_res_ok += rq_stats->tls_resync_res_ok; + s->rx_tls_resync_res_retry += rq_stats->tls_resync_res_retry; s->rx_tls_resync_res_skip += rq_stats->tls_resync_res_skip; s->rx_tls_err += rq_stats->tls_err; #endif @@ -390,7 +387,6 @@ static void mlx5e_stats_grp_sw_update_stats_sq(struct mlx5e_sw_stats *s, #ifdef CONFIG_MLX5_EN_TLS s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets; s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes; - s->tx_tls_ctx += sq_stats->tls_ctx; s->tx_tls_ooo += sq_stats->tls_ooo; s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; s->tx_tls_dump_packets += sq_stats->tls_dump_packets; @@ -407,13 +403,21 @@ static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv, { int i; - if (!priv->port_ptp_opened) + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) return; - mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->port_ptp_stats.ch); + mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->ptp_stats.ch); - for (i = 0; i < priv->max_opened_tc; i++) { - mlx5e_stats_grp_sw_update_stats_sq(s, &priv->port_ptp_stats.sq[i]); + if (priv->tx_ptp_opened) { + for (i = 0; i < priv->max_opened_tc; i++) { + mlx5e_stats_grp_sw_update_stats_sq(s, &priv->ptp_stats.sq[i]); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } + } + if (priv->rx_ptp_opened) { + mlx5e_stats_grp_sw_update_stats_rq_stats(s, &priv->ptp_stats.rq); /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ barrier(); @@ -766,35 +770,112 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(802_3) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } -#define MLX5E_READ_CTR64_BE_F(ptr, c) \ +#define MLX5E_READ_CTR64_BE_F(ptr, set, c) \ be64_to_cpu(*(__be64 *)((char *)ptr + \ MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_802_3_cntrs_grp_data_layout.c##_high))) + counter_set.set.c##_high))) -void mlx5e_stats_pause_get(struct mlx5e_priv *priv, - struct ethtool_pause_stats *pause_stats) +static int mlx5e_stats_get_ieee(struct mlx5_core_dev *mdev, + u32 *ppcnt_ieee_802_3) { - u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; - struct mlx5_core_dev *mdev = priv->mdev; u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); if (!MLX5_BASIC_PPCNT_SUPPORTED(mdev)) - return; + return -EOPNOTSUPP; MLX5_SET(ppcnt_reg, in, local_port, 1); MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3, - sz, MLX5_REG_PPCNT, 0, 0); + return mlx5_core_access_reg(mdev, in, sz, ppcnt_ieee_802_3, + sz, MLX5_REG_PPCNT, 0, 0); +} + +void mlx5e_stats_pause_get(struct mlx5e_priv *priv, + struct ethtool_pause_stats *pause_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; pause_stats->tx_pause_frames = MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, a_pause_mac_ctrl_frames_transmitted); pause_stats->rx_pause_frames = MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, a_pause_mac_ctrl_frames_received); } +void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv, + struct ethtool_eth_phy_stats *phy_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + + phy_stats->SymbolErrorDuringCarrier = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_symbol_error_during_carrier); +} + +void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv, + struct ethtool_eth_mac_stats *mac_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + +#define RD(name) \ + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, \ + eth_802_3_cntrs_grp_data_layout, \ + name) + + mac_stats->FramesTransmittedOK = RD(a_frames_transmitted_ok); + mac_stats->FramesReceivedOK = RD(a_frames_received_ok); + mac_stats->FrameCheckSequenceErrors = RD(a_frame_check_sequence_errors); + mac_stats->OctetsTransmittedOK = RD(a_octets_transmitted_ok); + mac_stats->OctetsReceivedOK = RD(a_octets_received_ok); + mac_stats->MulticastFramesXmittedOK = RD(a_multicast_frames_xmitted_ok); + mac_stats->BroadcastFramesXmittedOK = RD(a_broadcast_frames_xmitted_ok); + mac_stats->MulticastFramesReceivedOK = RD(a_multicast_frames_received_ok); + mac_stats->BroadcastFramesReceivedOK = RD(a_broadcast_frames_received_ok); + mac_stats->InRangeLengthErrors = RD(a_in_range_length_errors); + mac_stats->OutOfRangeLengthField = RD(a_out_of_range_length_field); + mac_stats->FrameTooLongErrors = RD(a_frame_too_long_errors); +#undef RD +} + +void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + u32 ppcnt_ieee_802_3[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_stats_get_ieee(mdev, ppcnt_ieee_802_3)) + return; + + ctrl_stats->MACControlFramesTransmitted = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_mac_control_frames_transmitted); + ctrl_stats->MACControlFramesReceived = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_mac_control_frames_received); + ctrl_stats->UnsupportedOpcodesReceived = + MLX5E_READ_CTR64_BE_F(ppcnt_ieee_802_3, + eth_802_3_cntrs_grp_data_layout, + a_unsupported_opcodes_received); +} + #define PPORT_2863_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_2863_cntrs_grp_data_layout.c##_high) @@ -906,6 +987,59 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(2819) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } +static const struct ethtool_rmon_hist_range mlx5e_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 2047 }, + { 2048, 4095 }, + { 4096, 8191 }, + { 8192, 10239 }, + {} +}; + +void mlx5e_stats_rmon_get(struct mlx5e_priv *priv, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges) +{ + u32 ppcnt_RFC_2819_counters[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, ppcnt_RFC_2819_counters, + sz, MLX5_REG_PPCNT, 0, 0)) + return; + +#define RD(name) \ + MLX5E_READ_CTR64_BE_F(ppcnt_RFC_2819_counters, \ + eth_2819_cntrs_grp_data_layout, \ + name) + + rmon->undersize_pkts = RD(ether_stats_undersize_pkts); + rmon->fragments = RD(ether_stats_fragments); + rmon->jabbers = RD(ether_stats_jabbers); + + rmon->hist[0] = RD(ether_stats_pkts64octets); + rmon->hist[1] = RD(ether_stats_pkts65to127octets); + rmon->hist[2] = RD(ether_stats_pkts128to255octets); + rmon->hist[3] = RD(ether_stats_pkts256to511octets); + rmon->hist[4] = RD(ether_stats_pkts512to1023octets); + rmon->hist[5] = RD(ether_stats_pkts1024to1518octets); + rmon->hist[6] = RD(ether_stats_pkts1519to2047octets); + rmon->hist[7] = RD(ether_stats_pkts2048to4095octets); + rmon->hist[8] = RD(ether_stats_pkts4096to8191octets); + rmon->hist[9] = RD(ether_stats_pkts8192to10239octets); +#undef RD + + *ranges = mlx5e_rmon_ranges; +} + #define PPORT_PHY_STATISTICAL_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.phys_layer_statistical_cntrs.c##_high) @@ -1013,6 +1147,29 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } +void mlx5e_stats_fec_get(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats) +{ + u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)]; + struct mlx5_core_dev *mdev = priv->mdev; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) + return; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, ppcnt_phy_statistical, + sz, MLX5_REG_PPCNT, 0, 0)) + return; + + fec_stats->corrected_bits.total = + MLX5E_READ_CTR64_BE_F(ppcnt_phy_statistical, + phys_layer_statistical_cntrs, + phy_corrected_bits); +} + #define PPORT_ETH_EXT_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.eth_extended_cntrs_grp_data_layout.c##_high) @@ -1622,13 +1779,12 @@ static const struct counter_desc rq_stats_desc[] = { #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_ctx) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_del) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_pkt) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_start) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_end) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_skip) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_ok) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_retry) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_res_skip) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_err) }, #endif @@ -1650,7 +1806,6 @@ static const struct counter_desc sq_stats_desc[] = { #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, @@ -1760,6 +1915,38 @@ static const struct counter_desc ptp_cq_stats_desc[] = { { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, }; +static const struct counter_desc ptp_rq_stats_desc[] = { + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) }, + { MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) }, +}; + static const struct counter_desc qos_sq_stats_desc[] = { { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) }, @@ -1776,7 +1963,6 @@ static const struct counter_desc qos_sq_stats_desc[] = { #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, - { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, @@ -1805,6 +1991,7 @@ static const struct counter_desc qos_sq_stats_desc[] = { #define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc) #define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc) #define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc) +#define NUM_PTP_RQ_STATS ARRAY_SIZE(ptp_rq_stats_desc) #define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos) @@ -1851,32 +2038,46 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; } static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp) { - return priv->port_ptp_opened ? - NUM_PTP_CH_STATS + - ((NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc) : - 0; + int num = NUM_PTP_CH_STATS; + + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) + return 0; + + if (priv->tx_ptp_opened) + num += (NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc; + if (priv->rx_ptp_opened) + num += NUM_PTP_RQ_STATS; + + return num; } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) { int i, tc; - if (!priv->port_ptp_opened) + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) return idx; for (i = 0; i < NUM_PTP_CH_STATS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, ptp_ch_stats_desc[i].format); - for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_SQ_STATS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_sq_stats_desc[i].format, tc); + if (priv->tx_ptp_opened) { + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_SQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_sq_stats_desc[i].format, tc); - for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_CQ_STATS; i++) + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_CQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + ptp_cq_stats_desc[i].format, tc); + } + if (priv->rx_ptp_opened) { + for (i = 0; i < NUM_PTP_RQ_STATS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, - ptp_cq_stats_desc[i].format, tc); + ptp_rq_stats_desc[i].format); + } return idx; } @@ -1884,26 +2085,33 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ptp) { int i, tc; - if (!priv->port_ptp_opened) + if (!priv->tx_ptp_opened && !priv->rx_ptp_opened) return idx; for (i = 0; i < NUM_PTP_CH_STATS; i++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.ch, + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch, ptp_ch_stats_desc, i); - for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_SQ_STATS; i++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.sq[tc], - ptp_sq_stats_desc, i); + if (priv->tx_ptp_opened) { + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_SQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc], + ptp_sq_stats_desc, i); - for (tc = 0; tc < priv->max_opened_tc; tc++) - for (i = 0; i < NUM_PTP_CQ_STATS; i++) + for (tc = 0; tc < priv->max_opened_tc; tc++) + for (i = 0; i < NUM_PTP_CQ_STATS; i++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc], + ptp_cq_stats_desc, i); + } + if (priv->rx_ptp_opened) { + for (i = 0; i < NUM_PTP_RQ_STATS; i++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.cq[tc], - ptp_cq_stats_desc, i); - + MLX5E_READ_CTR64_CPU(&priv->ptp_stats.rq, + ptp_rq_stats_desc, i); + } return idx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 93c41312fb03..139e59f30db0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -54,6 +54,7 @@ #define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld) @@ -113,6 +114,18 @@ void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv); void mlx5e_stats_pause_get(struct mlx5e_priv *priv, struct ethtool_pause_stats *pause_stats); +void mlx5e_stats_fec_get(struct mlx5e_priv *priv, + struct ethtool_fec_stats *fec_stats); + +void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv, + struct ethtool_eth_phy_stats *phy_stats); +void mlx5e_stats_eth_mac_get(struct mlx5e_priv *priv, + struct ethtool_eth_mac_stats *mac_stats); +void mlx5e_stats_eth_ctrl_get(struct mlx5e_priv *priv, + struct ethtool_eth_ctrl_stats *ctrl_stats); +void mlx5e_stats_rmon_get(struct mlx5e_priv *priv, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges); /* Concrete NIC Stats */ @@ -191,7 +204,6 @@ struct mlx5e_sw_stats { #ifdef CONFIG_MLX5_EN_TLS u64 tx_tls_encrypted_packets; u64 tx_tls_encrypted_bytes; - u64 tx_tls_ctx; u64 tx_tls_ooo; u64 tx_tls_dump_packets; u64 tx_tls_dump_bytes; @@ -202,13 +214,12 @@ struct mlx5e_sw_stats { u64 rx_tls_decrypted_packets; u64 rx_tls_decrypted_bytes; - u64 rx_tls_ctx; - u64 rx_tls_del; u64 rx_tls_resync_req_pkt; u64 rx_tls_resync_req_start; u64 rx_tls_resync_req_end; u64 rx_tls_resync_req_skip; u64 rx_tls_resync_res_ok; + u64 rx_tls_resync_res_retry; u64 rx_tls_resync_res_skip; u64 rx_tls_err; #endif @@ -334,13 +345,12 @@ struct mlx5e_rq_stats { #ifdef CONFIG_MLX5_EN_TLS u64 tls_decrypted_packets; u64 tls_decrypted_bytes; - u64 tls_ctx; - u64 tls_del; u64 tls_resync_req_pkt; u64 tls_resync_req_start; u64 tls_resync_req_end; u64 tls_resync_req_skip; u64 tls_resync_res_ok; + u64 tls_resync_res_retry; u64 tls_resync_res_skip; u64 tls_err; #endif @@ -364,7 +374,6 @@ struct mlx5e_sq_stats { #ifdef CONFIG_MLX5_EN_TLS u64 tls_encrypted_packets; u64 tls_encrypted_bytes; - u64 tls_ctx; u64 tls_ooo; u64 tls_dump_packets; u64 tls_dump_bytes; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0da69b98f38f..47a9c49b25fd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -47,6 +47,7 @@ #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_csum.h> #include <net/tc_act/tc_mpls.h> +#include <net/psample.h> #include <net/arp.h> #include <net/ipv6_stubs.h> #include <net/bareudp.h> @@ -65,6 +66,7 @@ #include "en/mod_hdr.h" #include "en/tc_priv.h" #include "en/tc_tun_encap.h" +#include "esw/sample.h" #include "lib/devcom.h" #include "lib/geneve.h" #include "lib/fs_chains.h" @@ -221,6 +223,25 @@ get_ct_priv(struct mlx5e_priv *priv) return priv->fs.tc.ct; } +#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) +static struct mlx5_esw_psample * +get_sample_priv(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->esw_psample; + } + + return NULL; +} +#endif + struct mlx5_flow_handle * mlx5_tc_rule_insert(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, @@ -445,11 +466,15 @@ static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp) mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); } -static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) +static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) { - u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn; struct mlx5e_priv *priv = hp->func_priv; int i, ix, sz = MLX5E_INDIR_RQT_SIZE; + u32 *indirection_rqt, rqn; + + indirection_rqt = kcalloc(sz, sizeof(*indirection_rqt), GFP_KERNEL); + if (!indirection_rqt) + return -ENOMEM; mlx5e_build_default_indir_rqt(indirection_rqt, sz, hp->num_channels); @@ -462,6 +487,9 @@ static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) rqn = hp->pair->rqn[ix]; MLX5_SET(rqtc, rqtc, rq_num[i], rqn); } + + kfree(indirection_rqt); + return 0; } static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) @@ -482,12 +510,15 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz); - mlx5e_hairpin_fill_rqt_rqns(hp, rqtc); + err = mlx5e_hairpin_fill_rqt_rqns(hp, rqtc); + if (err) + goto out; err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn); if (!err) hp->indir_rqt.enabled = true; +out: kvfree(in); return err; } @@ -896,7 +927,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, if (IS_ERR(dest[dest_ix].ft)) return ERR_CAST(dest[dest_ix].ft); } else { - dest[dest_ix].ft = priv->fs.vlan.ft.t; + dest[dest_ix].ft = mlx5e_vlan_get_flowtable(priv->fs.vlan); } dest_ix++; } @@ -1077,19 +1108,27 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, if (flow_flag_test(flow, CT)) { mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; - return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv), + rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv), flow, spec, attr, mod_hdr_acts); +#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + } else if (flow_flag_test(flow, SAMPLE)) { + rule = mlx5_esw_sample_offload(get_sample_priv(flow->priv), spec, attr); +#endif + } else { + rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); } - rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); if (IS_ERR(rule)) return rule; if (attr->esw_attr->split_count) { flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); if (IS_ERR(flow->rule[1])) { - mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + if (flow_flag_test(flow, CT)) + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); + else + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); return flow->rule[1]; } } @@ -1111,6 +1150,13 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, return; } +#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + if (flow_flag_test(flow, SAMPLE)) { + mlx5_esw_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); + return; + } +#endif + if (attr->esw_attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); @@ -1467,6 +1513,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); + kfree(flow->attr->esw_attr->sample); kfree(flow->attr); } @@ -1895,6 +1942,9 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, return 0; flow_rule_match_meta(rule, &match); + if (!match.mask->ingress_ifindex) + return 0; + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); return -EOPNOTSUPP; @@ -1947,6 +1997,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, misc_parameters); void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_3); + void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_3); struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; @@ -1976,6 +2030,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_CT) | BIT(FLOW_DISSECTOR_KEY_ENC_IP) | BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | + BIT(FLOW_DISSECTOR_KEY_ICMP) | BIT(FLOW_DISSECTOR_KEY_MPLS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n", @@ -2295,6 +2350,58 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, if (match.mask->flags) *match_level = MLX5_MATCH_L4; } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) { + struct flow_match_icmp match; + + flow_rule_match_icmp(rule, &match); + switch (ip_proto) { + case IPPROTO_ICMP: + if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_ICMP)) + return -EOPNOTSUPP; + MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type, + match.mask->type); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type, + match.key->type); + MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code, + match.mask->code); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code, + match.key->code); + break; + case IPPROTO_ICMPV6: + if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_ICMPV6)) + return -EOPNOTSUPP; + MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type, + match.mask->type); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type, + match.key->type); + MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code, + match.mask->code); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code, + match.key->code); + break; + default: + NL_SET_ERR_MSG_MOD(extack, + "Code and type matching only with ICMP and ICMPv6"); + netdev_err(priv->netdev, + "Code and type matching only with ICMP and ICMPv6\n"); + return -EINVAL; + } + if (match.mask->code || match.mask->type) { + *match_level = MLX5_MATCH_L4; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; + } + } + /* Currenlty supported only for MPLS over UDP */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) && + !netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on MPLS is supported only for MPLS over UDP"); + netdev_err(priv->netdev, + "Matching on MPLS is supported only for MPLS over UDP\n"); + return -EOPNOTSUPP; + } return 0; } @@ -2899,6 +3006,37 @@ static int is_action_keys_supported(const struct flow_action_entry *act, return 0; } +static bool modify_tuple_supported(bool modify_tuple, bool ct_clear, + bool ct_flow, struct netlink_ext_ack *extack, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec) +{ + if (!modify_tuple || ct_clear) + return true; + + if (ct_flow) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload tuple modification with non-clear ct()"); + netdev_info(priv->netdev, + "can't offload tuple modification with non-clear ct()"); + return false; + } + + /* Add ct_state=-trk match so it will be offloaded for non ct flows + * (or after clear action), as otherwise, since the tuple is changed, + * we can't restore ct state + */ + if (mlx5_tc_ct_add_no_trk_match(spec)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload tuple modification with ct matches and no ct(clear) action"); + netdev_info(priv->netdev, + "can't offload tuple modification with ct matches and no ct(clear) action"); + return false; + } + + return true; +} + static bool modify_header_match_supported(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_action *flow_action, @@ -2937,18 +3075,9 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv, return err; } - /* Add ct_state=-trk match so it will be offloaded for non ct flows - * (or after clear action), as otherwise, since the tuple is changed, - * we can't restore ct state - */ - if (!ct_clear && modify_tuple && - mlx5_tc_ct_add_no_trk_match(spec)) { - NL_SET_ERR_MSG_MOD(extack, - "can't offload tuple modify header with ct matches"); - netdev_info(priv->netdev, - "can't offload tuple modify header with ct matches"); + if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack, + priv, spec)) return false; - } ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); if (modify_ip_header && ip_proto != IPPROTO_TCP && @@ -2979,7 +3108,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv, actions = flow->attr->action; if (mlx5e_is_eswitch_flow(flow)) { - if (flow->attr->esw_attr->split_count && ct_flow) { + if (flow->attr->esw_attr->split_count && ct_flow && + !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) { /* All registers used by ct are cleared when using * split rules. */ @@ -3017,6 +3147,13 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) return (fsystem_guid == psystem_guid); } +static bool same_vf_reps(struct mlx5e_priv *priv, + struct net_device *out_dev) +{ + return mlx5e_eswitch_vf_rep(priv->netdev) && + priv->netdev == out_dev; +} + static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace, const struct flow_action_entry *act, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -3526,6 +3663,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, bool ft_flow = mlx5e_is_ft_flow(flow); const struct flow_action_entry *act; struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_sample_attr sample = {}; bool encap = false, decap = false; u32 action = attr->action; int err, i, if_count = 0; @@ -3702,6 +3840,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + if (same_vf_reps(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "can't forward from a VF to itself"); + return -EOPNOTSUPP; + } + out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; @@ -3774,11 +3918,27 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, attr->dest_chain = act->chain_index; break; case FLOW_ACTION_CT: + if (flow_flag_test(flow, SAMPLE)) { + NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported"); + return -EOPNOTSUPP; + } err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack); if (err) return err; flow_flag_set(flow, CT); + esw_attr->split_count = esw_attr->out_count; + break; + case FLOW_ACTION_SAMPLE: + if (flow_flag_test(flow, CT)) { + NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported"); + return -EOPNOTSUPP; + } + sample.rate = act->sample.rate; + sample.group_num = act->sample.psample_group->group_num; + if (act->sample.truncate) + sample.trunc_size = act->sample.trunc_size; + flow_flag_set(flow, SAMPLE); break; default: NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); @@ -3841,11 +4001,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - NL_SET_ERR_MSG_MOD(extack, - "Mirroring goto chain rules isn't supported"); - return -EOPNOTSUPP; - } attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } @@ -3863,6 +4018,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + /* Allocate sample attribute only when there is a sample action and + * no errors after parsing. + */ + if (flow_flag_test(flow, SAMPLE)) { + esw_attr->sample = kzalloc(sizeof(*esw_attr->sample), GFP_KERNEL); + if (!esw_attr->sample) + return -ENOMEM; + *esw_attr->sample = sample; + } + return 0; } @@ -4265,6 +4430,11 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow; int err = 0; + if (!mlx5_esw_hold(priv->mdev)) + return -EAGAIN; + + mlx5_esw_get(priv->mdev); + rcu_read_lock(); flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); if (flow) { @@ -4302,11 +4472,14 @@ rcu_unlock: if (err) goto err_free; + mlx5_esw_release(priv->mdev); return 0; err_free: mlx5e_flow_put(priv, flow); out: + mlx5_esw_put(priv->mdev); + mlx5_esw_release(priv->mdev); return err; } @@ -4346,6 +4519,7 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, trace_mlx5e_delete_flower(f); mlx5e_flow_put(priv, flow); + mlx5_esw_put(priv->mdev); return 0; errout: @@ -4445,7 +4619,8 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate, */ if (rate) { rate = (rate * BITS_PER_BYTE) + 500000; - rate_mbps = max_t(u64, do_div(rate, 1000000), 1); + do_div(rate, 1000000); + rate_mbps = max_t(u32, rate, 1); } err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); @@ -4480,6 +4655,10 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_POLICE: + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } err = apply_police_params(priv, act->police.rate_bytes_ps, extack); if (err) return err; @@ -4614,6 +4793,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = &priv->fs.tc; struct mlx5_core_dev *dev = priv->mdev; + struct mapping_ctx *chains_mapping; struct mlx5_chains_attr attr = {}; int err; @@ -4628,15 +4808,22 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); - if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { + chains_mapping = mapping_create(sizeof(struct mlx5_mapped_obj), + MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); + if (IS_ERR(chains_mapping)) { + err = PTR_ERR(chains_mapping); + goto err_mapping; + } + tc->mapping = chains_mapping; + + if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; - attr.max_restore_tag = MLX5E_TC_TABLE_CHAIN_TAG_MASK; - } attr.ns = MLX5_FLOW_NAMESPACE_KERNEL; attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev); attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS; - attr.default_ft = priv->fs.vlan.ft.t; + attr.default_ft = mlx5e_vlan_get_flowtable(priv->fs.vlan); + attr.mapping = chains_mapping; tc->chains = mlx5_chains_create(dev, &attr); if (IS_ERR(tc->chains)) { @@ -4646,10 +4833,6 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr, MLX5_FLOW_NAMESPACE_KERNEL); - if (IS_ERR(tc->ct)) { - err = PTR_ERR(tc->ct); - goto err_ct; - } tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; err = register_netdevice_notifier_dev_net(priv->netdev, @@ -4665,9 +4848,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) err_reg: mlx5_tc_ct_clean(tc->ct); -err_ct: mlx5_chains_destroy(tc->chains); err_chains: + mapping_destroy(chains_mapping); +err_mapping: rhashtable_destroy(&tc->ht); return err; } @@ -4702,6 +4886,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) mutex_destroy(&tc->t_lock); mlx5_tc_ct_clean(tc->ct); + mapping_destroy(tc->mapping); mlx5_chains_destroy(tc->chains); } @@ -4724,8 +4909,10 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) esw_chains(esw), &esw->offloads.mod_hdr, MLX5_FLOW_NAMESPACE_FDB); - if (IS_ERR(uplink_priv->ct_priv)) - goto err_ct; + +#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev)); +#endif mapping = mapping_create(sizeof(struct tunnel_match_key), TUNNEL_INFO_BITS_MASK, true); @@ -4764,8 +4951,10 @@ err_ht_init: err_enc_opts_mapping: mapping_destroy(uplink_priv->tunnel_mapping); err_tun_mapping: +#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + mlx5_esw_sample_cleanup(uplink_priv->esw_psample); +#endif mlx5_tc_ct_clean(uplink_priv->ct_priv); -err_ct: netdev_warn(priv->netdev, "Failed to initialize tc (eswitch), err: %d", err); return err; @@ -4783,6 +4972,9 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht) mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); mapping_destroy(uplink_priv->tunnel_mapping); +#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE) + mlx5_esw_sample_cleanup(uplink_priv->esw_psample); +#endif mlx5_tc_ct_clean(uplink_priv->ct_priv); } @@ -4838,9 +5030,17 @@ static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { - unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD); + unsigned long flags = MLX5_TC_FLAG(INGRESS); struct mlx5e_priv *priv = cb_priv; + if (!priv->netdev || !netif_device_present(priv->netdev)) + return -EOPNOTSUPP; + + if (mlx5e_is_uplink_rep(priv)) + flags |= MLX5_TC_FLAG(ESW_OFFLOAD); + else + flags |= MLX5_TC_FLAG(NIC_OFFLOAD); + switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_setup_tc_cls_flower(priv, type_data, flags); @@ -4856,6 +5056,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, u32 chain = 0, chain_tag, reg_b, zone_restore_id; struct mlx5e_priv *priv = netdev_priv(skb->dev); struct mlx5e_tc_table *tc = &priv->fs.tc; + struct mlx5_mapped_obj mapped_obj; struct tc_skb_ext *tc_skb_ext; int err; @@ -4863,7 +5064,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; - err = mlx5_get_chain_for_tag(nic_chains(priv), chain_tag, &chain); + err = mapping_find(tc->mapping, chain_tag, &mapped_obj); if (err) { netdev_dbg(priv->netdev, "Couldn't find chain for chain tag: %d, err: %d\n", @@ -4871,7 +5072,8 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, return false; } - if (chain) { + if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { + chain = mapped_obj.chain; tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); if (WARN_ON(!tc_skb_ext)) return false; @@ -4884,6 +5086,9 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, if (!mlx5e_tc_ct_restore_flow(tc->ct, skb, zone_restore_id)) return false; + } else { + netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); + return false; } #endif /* CONFIG_NET_TC_SKB_EXT */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 89003ae7775a..25c091795bcd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -79,6 +79,7 @@ struct mlx5_flow_attr { u8 inner_match_level; u8 outer_match_level; u8 ip_version; + u8 tun_ip_version; u32 flags; union { struct mlx5_esw_flow_attr esw_attr[0]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index bdbffe484fce..8ba62671f5f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -133,6 +133,8 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */ num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch); if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) { + struct mlx5e_ptp *ptp_channel; + /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id); @@ -142,10 +144,11 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, return txq_ix; } - if (unlikely(priv->channels.port_ptp)) - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - mlx5e_use_ptpsq(skb)) - return mlx5e_select_ptpsq(dev, skb); + ptp_channel = READ_ONCE(priv->channels.ptp); + if (unlikely(ptp_channel) && + test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) && + mlx5e_use_ptpsq(skb)) + return mlx5e_select_ptpsq(dev, skb); txq_ix = netdev_pick_tx(dev, skb, NULL); /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. @@ -576,7 +579,7 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); wqe = MLX5E_TX_FETCH_WQE(sq, pi); - prefetchw(wqe->data); + net_prefetchw(wqe->data); *session = (struct mlx5e_tx_mpwqe) { .wqe = wqe, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index d54da3797c30..833be29170a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -36,6 +36,7 @@ #include "en/xdp.h" #include "en/xsk/rx.h" #include "en/xsk/tx.h" +#include "en_accel/ktls_txrx.h" static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) { @@ -171,6 +172,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) */ clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state); + /* Keep after async ICOSQ CQ poll */ + if (unlikely(mlx5e_ktls_rx_pending_resync_list(c, budget))) + busy |= mlx5e_ktls_rx_handle_resync_list(c, budget); + busy |= INDIRECT_CALL_2(rq->post_wqes, mlx5e_post_rx_mpwqes, mlx5e_post_rx_wqes, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 174dfbc996c6..77c0ca655975 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -271,7 +271,7 @@ static void init_eq_buf(struct mlx5_eq *eq) struct mlx5_eqe *eqe; int i; - for (i = 0; i < eq->nent; i++) { + for (i = 0; i < eq_get_size(eq); i++) { eqe = get_eqe(eq, i); eqe->owner = MLX5_EQE_OWNER_INIT_VAL; } @@ -281,8 +281,10 @@ static int create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { + u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE); struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; + u8 log_eq_stride = ilog2(MLX5_EQE_SIZE); struct mlx5_priv *priv = &dev->priv; u8 vecidx = param->irq_index; __be64 *pas; @@ -297,16 +299,18 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, spin_lock_init(&cq_table->lock); INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); - eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; - err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf); + + err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride), + &eq->frag_buf, dev->priv.numa_node); if (err) return err; + mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc); init_eq_buf(eq); inlen = MLX5_ST_SZ_BYTES(create_eq_in) + - MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages; + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages; in = kvzalloc(inlen, GFP_KERNEL); if (!in) { @@ -315,7 +319,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, } pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); - mlx5_fill_page_array(&eq->buf, pas); + mlx5_fill_page_frag_array(&eq->frag_buf, pas); MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx)) @@ -326,11 +330,11 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, param->mask[i]); eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); - MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); + MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz); MLX5_SET(eqc, eqc, uar_page, priv->uar->index); MLX5_SET(eqc, eqc, intr, vecidx); MLX5_SET(eqc, eqc, log_page_size, - eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) @@ -356,7 +360,7 @@ err_in: kvfree(in); err_buf: - mlx5_buf_free(dev, &eq->buf); + mlx5_frag_buf_free(dev, &eq->frag_buf); return err; } @@ -413,7 +417,7 @@ static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) eq->eqn); synchronize_irq(eq->irqn); - mlx5_buf_free(dev, &eq->buf); + mlx5_frag_buf_free(dev, &eq->frag_buf); return err; } @@ -764,10 +768,11 @@ EXPORT_SYMBOL(mlx5_eq_destroy_generic); struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc) { u32 ci = eq->cons_index + cc; + u32 nent = eq_get_size(eq); struct mlx5_eqe *eqe; - eqe = get_eqe(eq, ci & (eq->nent - 1)); - eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe; + eqe = get_eqe(eq, ci & (nent - 1)); + eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe; /* Make sure we read EQ entry contents after we've * checked the ownership bit. */ @@ -931,13 +936,24 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) mutex_unlock(&table->lock); } +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING +#define MLX5_MAX_ASYNC_EQS 4 +#else +#define MLX5_MAX_ASYNC_EQS 3 +#endif + int mlx5_eq_table_create(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); int err; eq_table->num_comp_eqs = - mlx5_irq_get_num_comp(eq_table->irq_table); + min_t(int, + mlx5_irq_get_num_comp(eq_table->irq_table), + num_eqs - MLX5_MAX_ASYNC_EQS); err = create_async_eqs(dev); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c index 3e19b1721303..0399a396d166 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -96,7 +96,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, } if (!vport->egress.acl) { - vport->egress.acl = esw_acl_table_create(esw, vport->vport, + vport->egress.acl = esw_acl_table_create(esw, vport, MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size); if (IS_ERR(vport->egress.acl)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c index 26b37a0f8762..505bf811984a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -148,7 +148,7 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport) esw_acl_egress_vlan_grp_destroy(vport); } -static bool esw_acl_egress_needed(const struct mlx5_eswitch *esw, u16 vport_num) +static bool esw_acl_egress_needed(struct mlx5_eswitch *esw, u16 vport_num) { return mlx5_eswitch_is_vf_vport(esw, vport_num) || mlx5_esw_is_sf_vport(esw, vport_num); } @@ -171,7 +171,7 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport table_size++; if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) table_size++; - vport->egress.acl = esw_acl_table_create(esw, vport->vport, + vport->egress.acl = esw_acl_table_create(esw, vport, MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size); if (IS_ERR(vport->egress.acl)) { err = PTR_ERR(vport->egress.acl); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c index 4a369669e51e..45b839116212 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c @@ -6,14 +6,14 @@ #include "helper.h" struct mlx5_flow_table * -esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size) +esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *acl; int acl_supported; - int vport_index; + u16 vport_num; int err; acl_supported = (ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS) ? @@ -23,11 +23,11 @@ esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size) if (!acl_supported) return ERR_PTR(-EOPNOTSUPP); + vport_num = vport->vport; esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num, ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress"); - vport_index = mlx5_eswitch_vport_num_to_index(esw, vport_num); - root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport_index); + root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport->index); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n", vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h index 8dc4cab66a71..a47063fab57e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h @@ -8,7 +8,7 @@ /* General acl helper functions */ struct mlx5_flow_table * -esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size); +esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, int size); /* Egress acl helper functions */ void esw_acl_egress_table_destroy(struct mlx5_vport *vport); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c index d64fad2823e7..f75b86abaf1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -177,7 +177,7 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, } if (!vport->ingress.acl) { - vport->ingress.acl = esw_acl_table_create(esw, vport->vport, + vport->ingress.acl = esw_acl_table_create(esw, vport, MLX5_FLOW_NAMESPACE_ESW_INGRESS, table_size); if (IS_ERR(vport->ingress.acl)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c index 548c005ea633..39e948bc1204 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -7,7 +7,7 @@ #include "ofld.h" static bool -esw_acl_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw, +esw_acl_ingress_prio_tag_enabled(struct mlx5_eswitch *esw, const struct mlx5_vport *vport) { return (MLX5_CAP_GEN(esw->dev, prio_tag_required) && @@ -255,7 +255,7 @@ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, if (esw_acl_ingress_prio_tag_enabled(esw, vport)) num_ftes++; - vport->ingress.acl = esw_acl_table_create(esw, vport->vport, + vport->ingress.acl = esw_acl_table_create(esw, vport, MLX5_FLOW_NAMESPACE_ESW_INGRESS, num_ftes); if (IS_ERR(vport->ingress.acl)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index cb1e181f4c6a..1703384eca95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -14,8 +14,7 @@ mlx5_esw_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_i memcpy(ppid->id, &parent_id, sizeof(parent_id)); } -static bool -mlx5_esw_devlink_port_supported(const struct mlx5_eswitch *esw, u16 vport_num) +static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_num) { return vport_num == MLX5_VPORT_UPLINK || (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) || @@ -120,11 +119,11 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1 struct mlx5_vport *vport; vport = mlx5_eswitch_get_vport(esw, vport_num); - return vport->dl_port; + return IS_ERR(vport) ? ERR_CAST(vport) : vport->dl_port; } int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, - u16 vport_num, u32 sfnum) + u16 vport_num, u32 controller, u32 sfnum) { struct mlx5_core_dev *dev = esw->dev; struct netdev_phys_item_id ppid = {}; @@ -142,7 +141,7 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p mlx5_esw_get_port_parent_id(dev, &ppid); memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len); dl_port->attrs.switch_id.id_len = ppid.id_len; - devlink_port_attrs_pci_sf_set(dl_port, 0, pfnum, sfnum); + devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller); devlink = priv_to_devlink(dev); dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); err = devlink_port_register(devlink, dl_port, dl_port_index); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c index 6f6772bf61a2..3da7becc1069 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c @@ -248,7 +248,7 @@ err_mod_hdr_regc0: err_ethertype: kfree(rule); out: - kfree(rule_spec); + kvfree(rule_spec); return err; } @@ -328,7 +328,7 @@ static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw, e->recirc_cnt = 0; out: - kfree(in); + kvfree(in); return err; } @@ -347,7 +347,7 @@ static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { - kfree(in); + kvfree(in); return -ENOMEM; } @@ -371,8 +371,8 @@ static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, } err_out: - kfree(spec); - kfree(in); + kvfree(spec); + kvfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h index cb9eafd1b4ee..21d56b49d14b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h @@ -30,13 +30,13 @@ mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr); #else /* indir API stubs */ -struct mlx5_esw_indir_table * +static inline struct mlx5_esw_indir_table * mlx5_esw_indir_table_init(void) { return NULL; } -void +static inline void mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) { } @@ -57,7 +57,7 @@ mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, { } -bool +static inline bool mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, u16 vport_num, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c new file mode 100644 index 000000000000..d9041b16611d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -0,0 +1,510 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies Ltd */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "esw/acl/lgcy.h" +#include "esw/legacy.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "fs_core.h" + +enum { + LEGACY_VEPA_PRIO = 0, + LEGACY_FDB_PRIO, +}; + +static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + int err; + + root_ns = mlx5_get_fdb_sub_ns(dev, 0); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + /* num FTE 2, num FG 2 */ + ft_attr.prio = LEGACY_VEPA_PRIO; + ft_attr.max_fte = 2; + ft_attr.autogroup.max_num_groups = 2; + fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create VEPA FDB err %d\n", err); + return err; + } + esw->fdb_table.legacy.vepa_fdb = fdb; + + return 0; +} + +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) +{ + esw_debug(esw->dev, "Destroy FDB Table\n"); + if (!esw->fdb_table.legacy.fdb) + return; + + if (esw->fdb_table.legacy.promisc_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + if (esw->fdb_table.legacy.allmulti_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + if (esw->fdb_table.legacy.addr_grp) + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb); + + esw->fdb_table.legacy.fdb = NULL; + esw->fdb_table.legacy.addr_grp = NULL; + esw->fdb_table.legacy.allmulti_grp = NULL; + esw->fdb_table.legacy.promisc_grp = NULL; + atomic64_set(&esw->user_count, 0); +} + +static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *g; + void *match_criteria; + int table_size; + u32 *flow_group_in; + u8 *dmac; + int err = 0; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + root_ns = mlx5_get_fdb_sub_ns(dev, 0); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + ft_attr.max_fte = table_size; + ft_attr.prio = LEGACY_FDB_PRIO; + fdb = mlx5_create_flow_table(root_ns, &ft_attr); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create FDB Table err %d\n", err); + goto out; + } + esw->fdb_table.legacy.fdb = fdb; + + /* Addresses group : Full match unicast/multicast addresses */ + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + /* Preserve 2 entries for allmulti and promisc rules*/ + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3); + eth_broadcast_addr(dmac); + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.addr_grp = g; + + /* Allmulti group : One rule that forwards any mcast traffic */ + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2); + eth_zero_addr(dmac); + dmac[0] = 0x01; + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.allmulti_grp = g; + + /* Promiscuous group : + * One rule that forward all unmatched traffic from previous groups + */ + eth_zero_addr(dmac); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.legacy.promisc_grp = g; + +out: + if (err) + esw_destroy_legacy_fdb_table(esw); + + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw) +{ + esw_debug(esw->dev, "Destroy VEPA Table\n"); + if (!esw->fdb_table.legacy.vepa_fdb) + return; + + mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb); + esw->fdb_table.legacy.vepa_fdb = NULL; +} + +static int esw_create_legacy_table(struct mlx5_eswitch *esw) +{ + int err; + + memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb)); + atomic64_set(&esw->user_count, 0); + + err = esw_create_legacy_vepa_table(esw); + if (err) + return err; + + err = esw_create_legacy_fdb_table(esw); + if (err) + esw_destroy_legacy_vepa_table(esw); + + return err; +} + +static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw) +{ + if (esw->fdb_table.legacy.vepa_uplink_rule) + mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule); + + if (esw->fdb_table.legacy.vepa_star_rule) + mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule); + + esw->fdb_table.legacy.vepa_uplink_rule = NULL; + esw->fdb_table.legacy.vepa_star_rule = NULL; +} + +static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) +{ + esw_cleanup_vepa_rules(esw); + esw_destroy_legacy_fdb_table(esw); + esw_destroy_legacy_vepa_table(esw); +} + +#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ + MLX5_VPORT_MC_ADDR_CHANGE | \ + MLX5_VPORT_PROMISC_CHANGE) + +int esw_legacy_enable(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + int ret; + + ret = esw_create_legacy_table(esw); + if (ret) + return ret; + + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + + ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); + if (ret) + esw_destroy_legacy_table(esw); + return ret; +} + +void esw_legacy_disable(struct mlx5_eswitch *esw) +{ + struct esw_mc_addr *mc_promisc; + + mlx5_eswitch_disable_pf_vf_vports(esw); + + mc_promisc = &esw->mc_promisc; + if (mc_promisc->uplink_rule) + mlx5_del_flow_rules(mc_promisc->uplink_rule); + + esw_destroy_legacy_table(esw); +} + +static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, + u8 setting) +{ + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + int err = 0; + void *misc; + + if (!setting) { + esw_cleanup_vepa_rules(esw); + return 0; + } + + if (esw->fdb_table.legacy.vepa_uplink_rule) + return 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Uplink rule forward uplink traffic to FDB */ + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = esw->fdb_table.legacy.fdb; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } else { + esw->fdb_table.legacy.vepa_uplink_rule = flow_rule; + } + + /* Star rule to forward all traffic to uplink vport */ + memset(&dest, 0, sizeof(dest)); + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = MLX5_VPORT_UPLINK; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto out; + } else { + esw->fdb_table.legacy.vepa_star_rule = flow_rule; + } + +out: + kvfree(spec); + if (err) + esw_cleanup_vepa_rules(esw); + return err; +} + +int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) +{ + int err = 0; + + if (!esw) + return -EOPNOTSUPP; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto out; + } + + err = _mlx5_eswitch_set_vepa_locked(esw, setting); + +out: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) +{ + if (!esw) + return -EOPNOTSUPP; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + if (esw->mode != MLX5_ESWITCH_LEGACY) + return -EOPNOTSUPP; + + *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; + return 0; +} + +int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int ret; + + /* Only non manager vports need ACL in legacy mode */ + if (mlx5_esw_is_manager_vport(esw, vport->vport)) + return 0; + + ret = esw_acl_ingress_lgcy_setup(esw, vport); + if (ret) + goto ingress_err; + + ret = esw_acl_egress_lgcy_setup(esw, vport); + if (ret) + goto egress_err; + + return 0; + +egress_err: + esw_acl_ingress_lgcy_cleanup(esw, vport); +ingress_err: + return ret; +} + +void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + if (mlx5_esw_is_manager_vport(esw, vport->vport)) + return; + + esw_acl_egress_lgcy_cleanup(esw, vport); + esw_acl_ingress_lgcy_cleanup(esw, vport); +} + +int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev, + struct mlx5_vport *vport, + struct mlx5_vport_drop_stats *stats) +{ + u64 rx_discard_vport_down, tx_discard_vport_down; + struct mlx5_eswitch *esw = dev->priv.eswitch; + u64 bytes = 0; + int err = 0; + + if (esw->mode != MLX5_ESWITCH_LEGACY) + return 0; + + mutex_lock(&esw->state_lock); + if (!vport->enabled) + goto unlock; + + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter)) + mlx5_fc_query(dev, vport->egress.legacy.drop_counter, + &stats->rx_dropped, &bytes); + + if (vport->ingress.legacy.drop_counter) + mlx5_fc_query(dev, vport->ingress.legacy.drop_counter, + &stats->tx_dropped, &bytes); + + if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && + !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) + goto unlock; + + err = mlx5_query_vport_down_stats(dev, vport->vport, 1, + &rx_discard_vport_down, + &tx_discard_vport_down); + if (err) + goto unlock; + + if (MLX5_CAP_GEN(dev, receive_discard_vport_down)) + stats->rx_dropped += rx_discard_vport_down; + if (MLX5_CAP_GEN(dev, transmit_discard_vport_down)) + stats->tx_dropped += tx_discard_vport_down; + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + u16 vport, u16 vlan, u8 qos) +{ + u8 set_flags = 0; + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + if (vlan || qos) + set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + if (!vlan) + goto unlock; /* compatibility with libvirt */ + + err = -EOPNOTSUPP; + goto unlock; + } + + err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, + u16 vport, bool spoofchk) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + bool pschk; + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto unlock; + } + pschk = evport->info.spoofchk; + evport->info.spoofchk = spoofchk; + if (pschk && !is_valid_ether_addr(evport->info.mac)) + mlx5_core_warn(esw->dev, + "Spoofchk in set while MAC is invalid, vport(%d)\n", + evport->vport); + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) + err = esw_acl_ingress_lgcy_setup(esw, evport); + if (err) + evport->info.spoofchk = pschk; + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, + u16 vport, bool setting) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int err = 0; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto unlock; + } + evport->info.trusted = setting; + if (evport->enabled) + esw_vport_change_handle_locked(evport); + +unlock: + mutex_unlock(&esw->state_lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h new file mode 100644 index 000000000000..e0820bb72b57 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies Ltd */ + +#ifndef __MLX5_ESW_LEGACY_H__ +#define __MLX5_ESW_LEGACY_H__ + +#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ + MLX5_VPORT_MC_ADDR_CHANGE | \ + MLX5_VPORT_PROMISC_CHANGE) + +struct mlx5_eswitch; + +int esw_legacy_enable(struct mlx5_eswitch *esw); +void esw_legacy_disable(struct mlx5_eswitch *esw); + +int esw_legacy_vport_acl_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_legacy_vport_acl_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_query_vport_drop_stats(struct mlx5_core_dev *dev, + struct mlx5_vport *vport, + struct mlx5_vport_drop_stats *stats); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c new file mode 100644 index 000000000000..794012c5c476 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c @@ -0,0 +1,585 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021 Mellanox Technologies. */ + +#include <linux/skbuff.h> +#include <net/psample.h> +#include "en/mapping.h" +#include "esw/sample.h" +#include "eswitch.h" +#include "en_tc.h" +#include "fs_core.h" + +#define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024) + +static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = { + .max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE, + .max_num_groups = 0, /* default num of groups */ + .flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP, +}; + +struct mlx5_esw_psample { + struct mlx5e_priv *priv; + struct mlx5_flow_table *termtbl; + struct mlx5_flow_handle *termtbl_rule; + DECLARE_HASHTABLE(hashtbl, 8); + struct mutex ht_lock; /* protect hashtbl */ + DECLARE_HASHTABLE(restore_hashtbl, 8); + struct mutex restore_lock; /* protect restore_hashtbl */ +}; + +struct mlx5_sampler { + struct hlist_node hlist; + u32 sampler_id; + u32 sample_ratio; + u32 sample_table_id; + u32 default_table_id; + int count; +}; + +struct mlx5_sample_flow { + struct mlx5_sampler *sampler; + struct mlx5_sample_restore *restore; + struct mlx5_flow_attr *pre_attr; + struct mlx5_flow_handle *pre_rule; + struct mlx5_flow_handle *rule; +}; + +struct mlx5_sample_restore { + struct hlist_node hlist; + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_flow_handle *rule; + u32 obj_id; + int count; +}; + +static int +sampler_termtbl_create(struct mlx5_esw_psample *esw_psample) +{ + struct mlx5_core_dev *dev = esw_psample->priv->mdev; + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_act act = {}; + int err; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, termination_table)) { + mlx5_core_warn(dev, "termination table is not supported\n"); + return -EOPNOTSUPP; + } + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + mlx5_core_warn(dev, "failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.prio = FDB_SLOW_PATH; + ft_attr.max_fte = 1; + ft_attr.level = 1; + esw_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); + if (IS_ERR(esw_psample->termtbl)) { + err = PTR_ERR(esw_psample->termtbl); + mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err); + return err; + } + + act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.vport.num = esw->manager_vport; + esw_psample->termtbl_rule = mlx5_add_flow_rules(esw_psample->termtbl, NULL, &act, &dest, 1); + if (IS_ERR(esw_psample->termtbl_rule)) { + err = PTR_ERR(esw_psample->termtbl_rule); + mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err); + mlx5_destroy_flow_table(esw_psample->termtbl); + return err; + } + + return 0; +} + +static void +sampler_termtbl_destroy(struct mlx5_esw_psample *esw_psample) +{ + mlx5_del_flow_rules(esw_psample->termtbl_rule); + mlx5_destroy_flow_table(esw_psample->termtbl); +} + +static int +sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5_sampler *sampler) +{ + u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u64 general_obj_types; + void *obj; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER)) + return -EOPNOTSUPP; + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level)) + return -EOPNOTSUPP; + + obj = MLX5_ADDR_OF(create_sampler_obj_in, in, sampler_object); + MLX5_SET(sampler_obj, obj, table_type, FS_FT_FDB); + MLX5_SET(sampler_obj, obj, ignore_flow_level, 1); + MLX5_SET(sampler_obj, obj, level, 1); + MLX5_SET(sampler_obj, obj, sample_ratio, sampler->sample_ratio); + MLX5_SET(sampler_obj, obj, sample_table_id, sampler->sample_table_id); + MLX5_SET(sampler_obj, obj, default_table_id, sampler->default_table_id); + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) + sampler->sampler_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return err; +} + +static void +sampler_obj_destroy(struct mlx5_core_dev *mdev, u32 sampler_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static u32 +sampler_hash(u32 sample_ratio, u32 default_table_id) +{ + return jhash_2words(sample_ratio, default_table_id, 0); +} + +static int +sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 default_table_id2) +{ + return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2; +} + +static struct mlx5_sampler * +sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_table_id) +{ + struct mlx5_sampler *sampler; + u32 hash_key; + int err; + + mutex_lock(&esw_psample->ht_lock); + hash_key = sampler_hash(sample_ratio, default_table_id); + hash_for_each_possible(esw_psample->hashtbl, sampler, hlist, hash_key) + if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id, + sample_ratio, default_table_id)) + goto add_ref; + + sampler = kzalloc(sizeof(*sampler), GFP_KERNEL); + if (!sampler) { + err = -ENOMEM; + goto err_alloc; + } + + sampler->sample_table_id = esw_psample->termtbl->id; + sampler->default_table_id = default_table_id; + sampler->sample_ratio = sample_ratio; + + err = sampler_obj_create(esw_psample->priv->mdev, sampler); + if (err) + goto err_create; + + hash_add(esw_psample->hashtbl, &sampler->hlist, hash_key); + +add_ref: + sampler->count++; + mutex_unlock(&esw_psample->ht_lock); + return sampler; + +err_create: + kfree(sampler); +err_alloc: + mutex_unlock(&esw_psample->ht_lock); + return ERR_PTR(err); +} + +static void +sampler_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sampler *sampler) +{ + mutex_lock(&esw_psample->ht_lock); + if (--sampler->count == 0) { + hash_del(&sampler->hlist); + sampler_obj_destroy(esw_psample->priv->mdev, sampler->sampler_id); + kfree(sampler); + } + mutex_unlock(&esw_psample->ht_lock); +} + +static struct mlx5_modify_hdr * +sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id) +{ + struct mlx5e_tc_mod_hdr_acts mod_acts = {}; + struct mlx5_modify_hdr *modify_hdr; + int err; + + err = mlx5e_tc_match_to_reg_set(mdev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, + CHAIN_TO_REG, obj_id); + if (err) + goto err_set_regc0; + + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB, + mod_acts.num_actions, + mod_acts.actions); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + goto err_modify_hdr; + } + + dealloc_mod_hdr_actions(&mod_acts); + return modify_hdr; + +err_modify_hdr: + dealloc_mod_hdr_actions(&mod_acts); +err_set_regc0: + return ERR_PTR(err); +} + +static struct mlx5_sample_restore * +sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id) +{ + struct mlx5_core_dev *mdev = esw_psample->priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_sample_restore *restore; + struct mlx5_modify_hdr *modify_hdr; + int err; + + mutex_lock(&esw_psample->restore_lock); + hash_for_each_possible(esw_psample->restore_hashtbl, restore, hlist, obj_id) + if (restore->obj_id == obj_id) + goto add_ref; + + restore = kzalloc(sizeof(*restore), GFP_KERNEL); + if (!restore) { + err = -ENOMEM; + goto err_alloc; + } + restore->obj_id = obj_id; + + modify_hdr = sample_metadata_rule_get(mdev, obj_id); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + goto err_modify_hdr; + } + restore->modify_hdr = modify_hdr; + + restore->rule = esw_add_restore_rule(esw, obj_id); + if (IS_ERR(restore->rule)) { + err = PTR_ERR(restore->rule); + goto err_restore; + } + + hash_add(esw_psample->restore_hashtbl, &restore->hlist, obj_id); +add_ref: + restore->count++; + mutex_unlock(&esw_psample->restore_lock); + return restore; + +err_restore: + mlx5_modify_header_dealloc(mdev, restore->modify_hdr); +err_modify_hdr: + kfree(restore); +err_alloc: + mutex_unlock(&esw_psample->restore_lock); + return ERR_PTR(err); +} + +static void +sample_restore_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sample_restore *restore) +{ + mutex_lock(&esw_psample->restore_lock); + if (--restore->count == 0) + hash_del(&restore->hlist); + mutex_unlock(&esw_psample->restore_lock); + + if (!restore->count) { + mlx5_del_flow_rules(restore->rule); + mlx5_modify_header_dealloc(esw_psample->priv->mdev, restore->modify_hdr); + kfree(restore); + } +} + +void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) +{ + u32 trunc_size = mapped_obj->sample.trunc_size; + struct psample_group psample_group = {}; + struct psample_metadata md = {}; + + md.trunc_size = trunc_size ? min(trunc_size, skb->len) : skb->len; + md.in_ifindex = skb->dev->ifindex; + psample_group.group_num = mapped_obj->sample.group_id; + psample_group.net = &init_net; + skb_push(skb, skb->mac_len); + + psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md); +} + +/* For the following typical flow table: + * + * +-------------------------------+ + * + original flow table + + * +-------------------------------+ + * + original match + + * +-------------------------------+ + * + sample action + other actions + + * +-------------------------------+ + * + * We translate the tc filter with sample action to the following HW model: + * + * +---------------------+ + * + original flow table + + * +---------------------+ + * + original match + + * +---------------------+ + * | + * v + * +------------------------------------------------+ + * + Flow Sampler Object + + * +------------------------------------------------+ + * + sample ratio + + * +------------------------------------------------+ + * + sample table id | default table id + + * +------------------------------------------------+ + * | | + * v v + * +-----------------------------+ +----------------------------------------+ + * + sample table + + default table per <vport, chain, prio> + + * +-----------------------------+ +----------------------------------------+ + * + forward to management vport + + original match + + * +-----------------------------+ +----------------------------------------+ + * + other actions + + * +----------------------------------------+ + */ +struct mlx5_flow_handle * +mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_vport_tbl_attr per_vport_tbl_attr; + struct mlx5_esw_flow_attr *pre_esw_attr; + struct mlx5_mapped_obj restore_obj = {}; + struct mlx5_sample_flow *sample_flow; + struct mlx5_sample_attr *sample_attr; + struct mlx5_flow_table *default_tbl; + struct mlx5_flow_attr *pre_attr; + struct mlx5_eswitch *esw; + u32 obj_id; + int err; + + if (IS_ERR_OR_NULL(esw_psample)) + return ERR_PTR(-EOPNOTSUPP); + + /* If slow path flag is set, eg. when the neigh is invalid for encap, + * don't offload sample action. + */ + esw = esw_psample->priv->mdev->priv.eswitch; + if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + + sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL); + if (!sample_flow) + return ERR_PTR(-ENOMEM); + esw_attr->sample->sample_flow = sample_flow; + + /* Allocate default table per vport, chain and prio. Otherwise, there is + * only one default table for the same sampler object. Rules with different + * prio and chain may overlap. For CT sample action, per vport default + * table is needed to resotre the metadata. + */ + per_vport_tbl_attr.chain = attr->chain; + per_vport_tbl_attr.prio = attr->prio; + per_vport_tbl_attr.vport = esw_attr->in_rep->vport; + per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; + default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr); + if (IS_ERR(default_tbl)) { + err = PTR_ERR(default_tbl); + goto err_default_tbl; + } + + /* Perform the original matches on the default table. + * Offload all actions except the sample action. + */ + esw_attr->sample->sample_default_tbl = default_tbl; + /* When offloading sample and encap action, if there is no valid + * neigh data struct, a slow path rule is offloaded first. Source + * port metadata match is set at that time. A per vport table is + * already allocated. No need to match it again. So clear the source + * port metadata match. + */ + mlx5_eswitch_clear_rule_source_port(esw, spec); + sample_flow->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + if (IS_ERR(sample_flow->rule)) { + err = PTR_ERR(sample_flow->rule); + goto err_offload_rule; + } + + /* Create sampler object. */ + sample_flow->sampler = sampler_get(esw_psample, esw_attr->sample->rate, default_tbl->id); + if (IS_ERR(sample_flow->sampler)) { + err = PTR_ERR(sample_flow->sampler); + goto err_sampler; + } + + /* Create an id mapping reg_c0 value to sample object. */ + restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE; + restore_obj.sample.group_id = esw_attr->sample->group_num; + restore_obj.sample.rate = esw_attr->sample->rate; + restore_obj.sample.trunc_size = esw_attr->sample->trunc_size; + err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id); + if (err) + goto err_obj_id; + esw_attr->sample->restore_obj_id = obj_id; + + /* Create sample restore context. */ + sample_flow->restore = sample_restore_get(esw_psample, obj_id); + if (IS_ERR(sample_flow->restore)) { + err = PTR_ERR(sample_flow->restore); + goto err_sample_restore; + } + + /* Perform the original matches on the original table. Offload the + * sample action. The destination is the sampler object. + */ + pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); + if (!pre_attr) { + err = -ENOMEM; + goto err_alloc_flow_attr; + } + sample_attr = kzalloc(sizeof(*sample_attr), GFP_KERNEL); + if (!sample_attr) { + err = -ENOMEM; + goto err_alloc_sample_attr; + } + pre_esw_attr = pre_attr->esw_attr; + pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + pre_attr->modify_hdr = sample_flow->restore->modify_hdr; + pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE; + pre_attr->chain = attr->chain; + pre_attr->prio = attr->prio; + pre_esw_attr->sample = sample_attr; + pre_esw_attr->sample->sampler_id = sample_flow->sampler->sampler_id; + pre_esw_attr->in_mdev = esw_attr->in_mdev; + pre_esw_attr->in_rep = esw_attr->in_rep; + sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr); + if (IS_ERR(sample_flow->pre_rule)) { + err = PTR_ERR(sample_flow->pre_rule); + goto err_pre_offload_rule; + } + sample_flow->pre_attr = pre_attr; + + return sample_flow->rule; + +err_pre_offload_rule: + kfree(sample_attr); +err_alloc_sample_attr: + kfree(pre_attr); +err_alloc_flow_attr: + sample_restore_put(esw_psample, sample_flow->restore); +err_sample_restore: + mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id); +err_obj_id: + sampler_put(esw_psample, sample_flow->sampler); +err_sampler: + /* For sample offload, rule is added in default_tbl. No need to call + * mlx5_esw_chains_put_table() + */ + attr->prio = 0; + attr->chain = 0; + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr); +err_offload_rule: + mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr); +err_default_tbl: + return ERR_PTR(err); +} + +void +mlx5_esw_sample_unoffload(struct mlx5_esw_psample *esw_psample, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_sample_flow *sample_flow; + struct mlx5_vport_tbl_attr tbl_attr; + struct mlx5_flow_attr *pre_attr; + struct mlx5_eswitch *esw; + + if (IS_ERR_OR_NULL(esw_psample)) + return; + + /* If slow path flag is set, sample action is not offloaded. + * No need to delete sample rule. + */ + esw = esw_psample->priv->mdev->priv.eswitch; + if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + return; + } + + sample_flow = esw_attr->sample->sample_flow; + pre_attr = sample_flow->pre_attr; + memset(pre_attr, 0, sizeof(*pre_attr)); + esw = esw_psample->priv->mdev->priv.eswitch; + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, pre_attr); + mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr); + + sample_restore_put(esw_psample, sample_flow->restore); + mapping_remove(esw->offloads.reg_c0_obj_pool, esw_attr->sample->restore_obj_id); + sampler_put(esw_psample, sample_flow->sampler); + tbl_attr.chain = attr->chain; + tbl_attr.prio = attr->prio; + tbl_attr.vport = esw_attr->in_rep->vport; + tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; + mlx5_esw_vporttbl_put(esw, &tbl_attr); + + kfree(pre_attr->esw_attr->sample); + kfree(pre_attr); + kfree(sample_flow); +} + +struct mlx5_esw_psample * +mlx5_esw_sample_init(struct mlx5e_priv *priv) +{ + struct mlx5_esw_psample *esw_psample; + int err; + + esw_psample = kzalloc(sizeof(*esw_psample), GFP_KERNEL); + if (!esw_psample) + return ERR_PTR(-ENOMEM); + esw_psample->priv = priv; + err = sampler_termtbl_create(esw_psample); + if (err) + goto err_termtbl; + + mutex_init(&esw_psample->ht_lock); + mutex_init(&esw_psample->restore_lock); + + return esw_psample; + +err_termtbl: + kfree(esw_psample); + return ERR_PTR(err); +} + +void +mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample) +{ + if (IS_ERR_OR_NULL(esw_psample)) + return; + + mutex_destroy(&esw_psample->restore_lock); + mutex_destroy(&esw_psample->ht_lock); + sampler_termtbl_destroy(esw_psample); + kfree(esw_psample); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h new file mode 100644 index 000000000000..2a3f4be10030 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TC_SAMPLE_H__ +#define __MLX5_EN_TC_SAMPLE_H__ + +#include "en.h" +#include "eswitch.h" + +struct mlx5e_priv; +struct mlx5_flow_attr; +struct mlx5_esw_psample; + +struct mlx5_sample_attr { + u32 group_num; + u32 rate; + u32 trunc_size; + u32 restore_obj_id; + u32 sampler_id; + struct mlx5_flow_table *sample_default_tbl; + struct mlx5_sample_flow *sample_flow; +}; + +void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj); + +struct mlx5_flow_handle * +mlx5_esw_sample_offload(struct mlx5_esw_psample *sample_priv, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr); + +void +mlx5_esw_sample_unoffload(struct mlx5_esw_psample *sample_priv, + struct mlx5_flow_handle *rule, + struct mlx5_flow_attr *attr); + +struct mlx5_esw_psample * +mlx5_esw_sample_init(struct mlx5e_priv *priv); + +void +mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample); + +#endif /* __MLX5_EN_TC_SAMPLE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c new file mode 100644 index 000000000000..9e72118f2e4c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021 Mellanox Technologies. + +#include "eswitch.h" + +/* This struct is used as a key to the hash table and we need it to be packed + * so hash result is consistent + */ +struct mlx5_vport_key { + u32 chain; + u16 prio; + u16 vport; + u16 vhca_id; + const struct esw_vport_tbl_namespace *vport_ns; +} __packed; + +struct mlx5_vport_table { + struct hlist_node hlist; + struct mlx5_flow_table *fdb; + u32 num_rules; + struct mlx5_vport_key key; +}; + +static struct mlx5_flow_table * +esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns, + const struct esw_vport_tbl_namespace *vport_ns) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *fdb; + + if (vport_ns->max_num_groups) + ft_attr.autogroup.max_num_groups = vport_ns->max_num_groups; + else + ft_attr.autogroup.max_num_groups = esw->params.large_group_num; + ft_attr.max_fte = vport_ns->max_fte; + ft_attr.prio = FDB_PER_VPORT; + ft_attr.flags = vport_ns->flags; + fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(fdb)) { + esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n", + PTR_ERR(fdb)); + } + + return fdb; +} + +static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw, + struct mlx5_vport_tbl_attr *attr, + struct mlx5_vport_key *key) +{ + key->vport = attr->vport; + key->chain = attr->chain; + key->prio = attr->prio; + key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + key->vport_ns = attr->vport_ns; + return jhash(key, sizeof(*key), 0); +} + +/* caller must hold vports.lock */ +static struct mlx5_vport_table * +esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key) +{ + struct mlx5_vport_table *e; + + hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key) + if (!memcmp(&e->key, skey, sizeof(*skey))) + return e; + + return NULL; +} + +struct mlx5_flow_table * +mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *fdb; + struct mlx5_vport_table *e; + struct mlx5_vport_key skey; + u32 hkey; + + mutex_lock(&esw->fdb_table.offloads.vports.lock); + hkey = flow_attr_to_vport_key(esw, attr, &skey); + e = esw_vport_tbl_lookup(esw, &skey, hkey); + if (e) { + e->num_rules++; + goto out; + } + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) { + fdb = ERR_PTR(-ENOMEM); + goto err_alloc; + } + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!ns) { + esw_warn(dev, "Failed to get FDB namespace\n"); + fdb = ERR_PTR(-ENOENT); + goto err_ns; + } + + fdb = esw_vport_tbl_create(esw, ns, attr->vport_ns); + if (IS_ERR(fdb)) + goto err_ns; + + e->fdb = fdb; + e->num_rules = 1; + e->key = skey; + hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey); +out: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); + return e->fdb; + +err_ns: + kfree(e); +err_alloc: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); + return fdb; +} + +void +mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) +{ + struct mlx5_vport_table *e; + struct mlx5_vport_key key; + u32 hkey; + + mutex_lock(&esw->fdb_table.offloads.vports.lock); + hkey = flow_attr_to_vport_key(esw, attr, &key); + e = esw_vport_tbl_lookup(esw, &key, hkey); + if (!e || --e->num_rules) + goto out; + + hash_del(&e->hlist); + mlx5_destroy_flow_table(e->fdb); + kfree(e); +out: + mutex_unlock(&esw->fdb_table.offloads.vports.lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index aba17835465b..570f2280823c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -36,6 +36,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/fs.h> #include "esw/acl/lgcy.h" +#include "esw/legacy.h" #include "mlx5_core.h" #include "lib/eq.h" #include "eswitch.h" @@ -61,9 +62,6 @@ struct vport_addr { bool mc_promisc; }; -static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw); -static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw); - static int mlx5_eswitch_check(const struct mlx5_core_dev *dev) { if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) @@ -90,20 +88,17 @@ struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink) struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) { - u16 idx; + struct mlx5_vport *vport; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) return ERR_PTR(-EPERM); - idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); - - if (idx > esw->total_vports - 1) { - esw_debug(esw->dev, "vport out of range: num(0x%x), idx(0x%x)\n", - vport_num, idx); + vport = xa_load(&esw->vports, vport_num); + if (!vport) { + esw_debug(esw->dev, "vport out of range: num(0x%x)\n", vport_num); return ERR_PTR(-EINVAL); } - - return &esw->vports[idx]; + return vport; } static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, @@ -278,224 +273,6 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport) return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); } -enum { - LEGACY_VEPA_PRIO = 0, - LEGACY_FDB_PRIO, -}; - -static int esw_create_legacy_vepa_table(struct mlx5_eswitch *esw) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *root_ns; - struct mlx5_flow_table *fdb; - int err; - - root_ns = mlx5_get_fdb_sub_ns(dev, 0); - if (!root_ns) { - esw_warn(dev, "Failed to get FDB flow namespace\n"); - return -EOPNOTSUPP; - } - - /* num FTE 2, num FG 2 */ - ft_attr.prio = LEGACY_VEPA_PRIO; - ft_attr.max_fte = 2; - ft_attr.autogroup.max_num_groups = 2; - fdb = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); - if (IS_ERR(fdb)) { - err = PTR_ERR(fdb); - esw_warn(dev, "Failed to create VEPA FDB err %d\n", err); - return err; - } - esw->fdb_table.legacy.vepa_fdb = fdb; - - return 0; -} - -static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *root_ns; - struct mlx5_flow_table *fdb; - struct mlx5_flow_group *g; - void *match_criteria; - int table_size; - u32 *flow_group_in; - u8 *dmac; - int err = 0; - - esw_debug(dev, "Create FDB log_max_size(%d)\n", - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - - root_ns = mlx5_get_fdb_sub_ns(dev, 0); - if (!root_ns) { - esw_warn(dev, "Failed to get FDB flow namespace\n"); - return -EOPNOTSUPP; - } - - flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) - return -ENOMEM; - - table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - ft_attr.max_fte = table_size; - ft_attr.prio = LEGACY_FDB_PRIO; - fdb = mlx5_create_flow_table(root_ns, &ft_attr); - if (IS_ERR(fdb)) { - err = PTR_ERR(fdb); - esw_warn(dev, "Failed to create FDB Table err %d\n", err); - goto out; - } - esw->fdb_table.legacy.fdb = fdb; - - /* Addresses group : Full match unicast/multicast addresses */ - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_OUTER_HEADERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); - /* Preserve 2 entries for allmulti and promisc rules*/ - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3); - eth_broadcast_addr(dmac); - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create flow group err(%d)\n", err); - goto out; - } - esw->fdb_table.legacy.addr_grp = g; - - /* Allmulti group : One rule that forwards any mcast traffic */ - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_OUTER_HEADERS); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2); - eth_zero_addr(dmac); - dmac[0] = 0x01; - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); - goto out; - } - esw->fdb_table.legacy.allmulti_grp = g; - - /* Promiscuous group : - * One rule that forward all unmatched traffic from previous groups - */ - eth_zero_addr(dmac); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); - goto out; - } - esw->fdb_table.legacy.promisc_grp = g; - -out: - if (err) - esw_destroy_legacy_fdb_table(esw); - - kvfree(flow_group_in); - return err; -} - -static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw) -{ - esw_debug(esw->dev, "Destroy VEPA Table\n"); - if (!esw->fdb_table.legacy.vepa_fdb) - return; - - mlx5_destroy_flow_table(esw->fdb_table.legacy.vepa_fdb); - esw->fdb_table.legacy.vepa_fdb = NULL; -} - -static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) -{ - esw_debug(esw->dev, "Destroy FDB Table\n"); - if (!esw->fdb_table.legacy.fdb) - return; - - if (esw->fdb_table.legacy.promisc_grp) - mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); - if (esw->fdb_table.legacy.allmulti_grp) - mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); - if (esw->fdb_table.legacy.addr_grp) - mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); - mlx5_destroy_flow_table(esw->fdb_table.legacy.fdb); - - esw->fdb_table.legacy.fdb = NULL; - esw->fdb_table.legacy.addr_grp = NULL; - esw->fdb_table.legacy.allmulti_grp = NULL; - esw->fdb_table.legacy.promisc_grp = NULL; -} - -static int esw_create_legacy_table(struct mlx5_eswitch *esw) -{ - int err; - - memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb)); - - err = esw_create_legacy_vepa_table(esw); - if (err) - return err; - - err = esw_create_legacy_fdb_table(esw); - if (err) - esw_destroy_legacy_vepa_table(esw); - - return err; -} - -static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) -{ - esw_cleanup_vepa_rules(esw); - esw_destroy_legacy_fdb_table(esw); - esw_destroy_legacy_vepa_table(esw); -} - -#define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ - MLX5_VPORT_MC_ADDR_CHANGE | \ - MLX5_VPORT_PROMISC_CHANGE) - -static int esw_legacy_enable(struct mlx5_eswitch *esw) -{ - struct mlx5_vport *vport; - int ret, i; - - ret = esw_create_legacy_table(esw); - if (ret) - return ret; - - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) - vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; - - ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); - if (ret) - esw_destroy_legacy_table(esw); - return ret; -} - -static void esw_legacy_disable(struct mlx5_eswitch *esw) -{ - struct esw_mc_addr *mc_promisc; - - mlx5_eswitch_disable_pf_vf_vports(esw); - - mc_promisc = &esw->mc_promisc; - if (mc_promisc->uplink_rule) - mlx5_del_flow_rules(mc_promisc->uplink_rule); - - esw_destroy_legacy_table(esw); -} - /* E-Switch vport UC/MC lists management */ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, struct vport_addr *vaddr); @@ -565,9 +342,10 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw, { u8 *mac = vaddr->node.addr; struct mlx5_vport *vport; - u16 i, vport_num; + unsigned long i; + u16 vport_num; - mlx5_esw_for_all_vports(esw, i, vport) { + mlx5_esw_for_each_vport(esw, i, vport) { struct hlist_head *vport_hash = vport->mc_list; struct vport_addr *iter_vaddr = l2addr_hash_find(vport_hash, @@ -917,7 +695,7 @@ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, (promisc_all || promisc_mc)); } -static void esw_vport_change_handle_locked(struct mlx5_vport *vport) +void esw_vport_change_handle_locked(struct mlx5_vport *vport) { struct mlx5_core_dev *dev = vport->dev; struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -1141,6 +919,8 @@ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, struct mlx5_vport *vport; vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); if (!vport->qos.enabled) return -EOPNOTSUPP; @@ -1166,56 +946,20 @@ static void node_guid_gen_from_mac(u64 *node_guid, const u8 *mac) ((u8 *)node_guid)[0] = mac[5]; } -static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - int ret; - - /* Only non manager vports need ACL in legacy mode */ - if (mlx5_esw_is_manager_vport(esw, vport->vport)) - return 0; - - ret = esw_acl_ingress_lgcy_setup(esw, vport); - if (ret) - goto ingress_err; - - ret = esw_acl_egress_lgcy_setup(esw, vport); - if (ret) - goto egress_err; - - return 0; - -egress_err: - esw_acl_ingress_lgcy_cleanup(esw, vport); -ingress_err: - return ret; -} - static int esw_vport_setup_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { if (esw->mode == MLX5_ESWITCH_LEGACY) - return esw_vport_create_legacy_acl_tables(esw, vport); + return esw_legacy_vport_acl_setup(esw, vport); else return esw_vport_create_offloads_acl_tables(esw, vport); } -static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) - -{ - if (mlx5_esw_is_manager_vport(esw, vport->vport)) - return; - - esw_acl_egress_lgcy_cleanup(esw, vport); - esw_acl_ingress_lgcy_cleanup(esw, vport); -} - static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { if (esw->mode == MLX5_ESWITCH_LEGACY) - esw_vport_destroy_legacy_acl_tables(esw, vport); + esw_legacy_vport_acl_cleanup(esw, vport); else esw_vport_destroy_offloads_acl_tables(esw, vport); } @@ -1231,7 +975,7 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) return err; /* Attach vport to the eswitch rate limiter */ - esw_vport_enable_qos(esw, vport, vport->info.max_rate, vport->qos.bw_share); + esw_vport_enable_qos(esw, vport, vport->qos.max_rate, vport->qos.bw_share); if (mlx5_esw_is_manager_vport(esw, vport_num)) return 0; @@ -1279,6 +1023,8 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, int ret; vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); @@ -1326,6 +1072,8 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) struct mlx5_vport *vport; vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; mutex_lock(&esw->state_lock); if (!vport->enabled) @@ -1382,15 +1130,9 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) { int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out); u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {}; - u16 max_sf_vports; u32 *out; int err; - max_sf_vports = mlx5_sf_max_functions(dev); - /* Device interface is array of 64-bits */ - if (max_sf_vports) - outlen += DIV_ROUND_UP(max_sf_vports, BITS_PER_TYPE(__be64)) * sizeof(__be64); - out = kvzalloc(outlen, GFP_KERNEL); if (!out) return ERR_PTR(-ENOMEM); @@ -1431,7 +1173,7 @@ static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; - int i; + unsigned long i; mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) { memset(&vport->qos, 0, sizeof(vport->qos)); @@ -1441,8 +1183,6 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) } /* Public E-Switch API */ -#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) - int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, enum mlx5_eswitch_vport_event enabled_events) { @@ -1471,20 +1211,25 @@ void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) { - int i; + struct mlx5_vport *vport; + unsigned long i; - mlx5_esw_for_each_vf_vport_num_reverse(esw, i, num_vfs) - mlx5_eswitch_unload_vport(esw, i); + mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { + if (!vport->enabled) + continue; + mlx5_eswitch_unload_vport(esw, vport->vport); + } } int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, enum mlx5_eswitch_vport_event enabled_events) { + struct mlx5_vport *vport; + unsigned long i; int err; - int i; - mlx5_esw_for_each_vf_vport_num(esw, i, num_vfs) { - err = mlx5_eswitch_load_vport(esw, i, enabled_events); + mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { + err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events); if (err) goto vf_err; } @@ -1492,7 +1237,7 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, return 0; vf_err: - mlx5_eswitch_unload_vf_vports(esw, i - 1); + mlx5_eswitch_unload_vf_vports(esw, num_vfs); return err; } @@ -1625,6 +1370,47 @@ static void mlx5_esw_mode_change_notify(struct mlx5_eswitch *esw, u16 mode) blocking_notifier_call_chain(&esw->n_head, 0, &info); } +static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + int total_vports; + int err; + + total_vports = mlx5_eswitch_get_total_vports(dev); + + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { + err = mlx5_fs_egress_acls_init(dev, total_vports); + if (err) + return err; + } else { + esw_warn(dev, "engress ACL is not supported by FW\n"); + } + + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { + err = mlx5_fs_ingress_acls_init(dev, total_vports); + if (err) + goto err; + } else { + esw_warn(dev, "ingress ACL is not supported by FW\n"); + } + return 0; + +err: + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) + mlx5_fs_egress_acls_cleanup(dev); + return err; +} + +static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) + mlx5_fs_ingress_acls_cleanup(dev); + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) + mlx5_fs_egress_acls_cleanup(dev); +} + /** * mlx5_eswitch_enable_locked - Enable eswitch * @esw: Pointer to eswitch @@ -1653,14 +1439,12 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) return -EOPNOTSUPP; } - if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support)) - esw_warn(esw->dev, "ingress ACL is not supported by FW\n"); - - if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) - esw_warn(esw->dev, "engress ACL is not supported by FW\n"); - mlx5_eswitch_get_devlink_param(esw); + err = mlx5_esw_acls_ns_init(esw); + if (err) + return err; + mlx5_eswitch_update_num_of_vfs(esw, num_vfs); esw_create_tsar(esw); @@ -1696,6 +1480,7 @@ abort: mlx5_rescan_drivers(esw->dev); esw_destroy_tsar(esw); + mlx5_esw_acls_ns_cleanup(esw); return err; } @@ -1711,10 +1496,10 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { int ret; - if (!ESW_ALLOWED(esw)) + if (!mlx5_esw_allowed(esw)) return 0; - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); if (esw->mode == MLX5_ESWITCH_NONE) { ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); } else { @@ -1726,7 +1511,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) if (!ret) esw->esw_funcs.num_vfs = num_vfs; } - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return ret; } @@ -1764,6 +1549,7 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) mlx5_rescan_drivers(esw->dev); esw_destroy_tsar(esw); + mlx5_esw_acls_ns_cleanup(esw); if (clear_vf) mlx5_eswitch_clear_vf_vports_info(esw); @@ -1771,33 +1557,170 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) { - if (!ESW_ALLOWED(esw)) + if (!mlx5_esw_allowed(esw)) return; - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); mlx5_eswitch_disable_locked(esw, clear_vf); esw->esw_funcs.num_vfs = 0; - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); +} + +static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out) +{ + u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); + u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; + + MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); + MLX5_SET(query_hca_cap_in, in, op_mod, opmod); + MLX5_SET(query_hca_cap_in, in, function_id, MLX5_VPORT_PF); + MLX5_SET(query_hca_cap_in, in, other_function, true); + return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); +} + +int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id) + +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + if (!mlx5_core_is_ecpf(dev)) { + *max_sfs = 0; + return 0; + } + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_query_hca_cap_host_pf(dev, query_ctx); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + *max_sfs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_sf); + *sf_base_id = MLX5_GET(cmd_hca_cap, hca_caps, sf_base_id); + +out_free: + kfree(query_ctx); + return err; +} + +static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, struct mlx5_core_dev *dev, + int index, u16 vport_num) +{ + struct mlx5_vport *vport; + int err; + + vport = kzalloc(sizeof(*vport), GFP_KERNEL); + if (!vport) + return -ENOMEM; + + vport->dev = esw->dev; + vport->vport = vport_num; + vport->index = index; + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); + err = xa_insert(&esw->vports, vport_num, vport, GFP_KERNEL); + if (err) + goto insert_err; + + esw->total_vports++; + return 0; + +insert_err: + kfree(vport); + return err; +} + +static void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + xa_erase(&esw->vports, vport->vport); + kfree(vport); +} + +static void mlx5_esw_vports_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_vport(esw, i, vport) + mlx5_esw_vport_free(esw, vport); + xa_destroy(&esw->vports); +} + +static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) +{ + struct mlx5_core_dev *dev = esw->dev; + u16 max_host_pf_sfs; + u16 base_sf_num; + int idx = 0; + int err; + int i; + + xa_init(&esw->vports); + + err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_PF); + if (err) + goto err; + if (esw->first_host_vport == MLX5_VPORT_PF) + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); + idx++; + + for (i = 0; i < mlx5_core_max_vfs(dev); i++) { + err = mlx5_esw_vport_alloc(esw, dev, idx, idx); + if (err) + goto err; + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF); + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); + idx++; + } + base_sf_num = mlx5_sf_start_function_id(dev); + for (i = 0; i < mlx5_sf_max_functions(dev); i++) { + err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i); + if (err) + goto err; + xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF); + idx++; + } + + err = mlx5_esw_sf_max_hpf_functions(dev, &max_host_pf_sfs, &base_sf_num); + if (err) + goto err; + for (i = 0; i < max_host_pf_sfs; i++) { + err = mlx5_esw_vport_alloc(esw, dev, idx, base_sf_num + i); + if (err) + goto err; + xa_set_mark(&esw->vports, base_sf_num + i, MLX5_ESW_VPT_SF); + idx++; + } + + if (mlx5_ecpf_vport_exists(dev)) { + err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_ECPF); + if (err) + goto err; + idx++; + } + err = mlx5_esw_vport_alloc(esw, dev, idx, MLX5_VPORT_UPLINK); + if (err) + goto err; + return 0; + +err: + mlx5_esw_vports_cleanup(esw); + return err; } int mlx5_eswitch_init(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw; - struct mlx5_vport *vport; - int total_vports; - int err, i; + int err; if (!MLX5_VPORT_MANAGER(dev)) return 0; - total_vports = mlx5_eswitch_get_total_vports(dev); - - esw_info(dev, - "Total vports %d, per vport: max uc(%d) max mc(%d)\n", - total_vports, - MLX5_MAX_UC_PER_VPORT(dev), - MLX5_MAX_MC_PER_VPORT(dev)); - esw = kzalloc(sizeof(*esw), GFP_KERNEL); if (!esw) return -ENOMEM; @@ -1812,18 +1735,13 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } - esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport), - GFP_KERNEL); - if (!esw->vports) { - err = -ENOMEM; + err = mlx5_esw_vports_init(esw); + if (err) goto abort; - } - - esw->total_vports = total_vports; err = esw_offloads_init_reps(esw); if (err) - goto abort; + goto reps_err; mutex_init(&esw->offloads.encap_tbl_lock); hash_init(esw->offloads.encap_tbl); @@ -1834,15 +1752,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) ida_init(&esw->offloads.vport_metadata_ida); xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC); mutex_init(&esw->state_lock); - mutex_init(&esw->mode_lock); - - mlx5_esw_for_all_vports(esw, i, vport) { - vport->vport = mlx5_eswitch_index_to_vport_num(esw, i); - vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; - vport->dev = dev; - INIT_WORK(&vport->vport_change_handler, - esw_vport_change_handler); - } + init_rwsem(&esw->mode_lock); esw->enabled_vports = 0; esw->mode = MLX5_ESWITCH_NONE; @@ -1850,12 +1760,19 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) dev->priv.eswitch = esw; BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); + + esw_info(dev, + "Total vports %d, per vport: max uc(%d) max mc(%d)\n", + esw->total_vports, + MLX5_MAX_UC_PER_VPORT(dev), + MLX5_MAX_MC_PER_VPORT(dev)); return 0; + +reps_err: + mlx5_esw_vports_cleanup(esw); abort: if (esw->work_queue) destroy_workqueue(esw->work_queue); - esw_offloads_cleanup_reps(esw); - kfree(esw->vports); kfree(esw); return err; } @@ -1869,8 +1786,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); - esw_offloads_cleanup_reps(esw); - mutex_destroy(&esw->mode_lock); mutex_destroy(&esw->state_lock); WARN_ON(!xa_empty(&esw->offloads.vhca_map)); xa_destroy(&esw->offloads.vhca_map); @@ -1878,7 +1793,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) mlx5e_mod_hdr_tbl_destroy(&esw->offloads.mod_hdr); mutex_destroy(&esw->offloads.encap_tbl_lock); mutex_destroy(&esw->offloads.decap_tbl_lock); - kfree(esw->vports); + esw_offloads_cleanup_reps(esw); + mlx5_esw_vports_cleanup(esw); kfree(esw); } @@ -1937,8 +1853,29 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, return err; } +static bool mlx5_esw_check_port_type(struct mlx5_eswitch *esw, u16 vport_num, xa_mark_t mark) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return false; + + return xa_get_mark(&esw->vports, vport_num, mark); +} + +bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_VF); +} + +bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + return mlx5_esw_check_port_type(esw, vport_num, MLX5_ESW_VPT_SF); +} + static bool -is_port_function_supported(const struct mlx5_eswitch *esw, u16 vport_num) +is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) { return vport_num == MLX5_VPORT_PF || mlx5_eswitch_is_vf_vport(esw, vport_num) || @@ -2023,7 +1960,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int other_vport = 1; int err = 0; - if (!ESW_ALLOWED(esw)) + if (!mlx5_esw_allowed(esw)) return -EPERM; if (IS_ERR(evport)) return PTR_ERR(evport); @@ -2034,6 +1971,10 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, vport = 0; } mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_LEGACY) { + err = -EOPNOTSUPP; + goto unlock; + } err = mlx5_modify_vport_admin_state(esw->dev, opmod, vport, other_vport, link_state); if (err) { @@ -2067,8 +2008,8 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, ivi->qos = evport->info.qos; ivi->spoofchk = evport->info.spoofchk; ivi->trusted = evport->info.trusted; - ivi->min_tx_rate = evport->info.min_rate; - ivi->max_tx_rate = evport->info.max_rate; + ivi->min_tx_rate = evport->qos.min_rate; + ivi->max_tx_rate = evport->qos.max_rate; mutex_unlock(&esw->state_lock); return 0; @@ -2101,196 +2042,17 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, return err; } -int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - u16 vport, u16 vlan, u8 qos) -{ - u8 set_flags = 0; - int err; - - if (!ESW_ALLOWED(esw)) - return -EPERM; - - if (vlan || qos) - set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; - - mutex_lock(&esw->state_lock); - err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); - mutex_unlock(&esw->state_lock); - - return err; -} - -int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, - u16 vport, bool spoofchk) -{ - struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); - bool pschk; - int err = 0; - - if (!ESW_ALLOWED(esw)) - return -EPERM; - if (IS_ERR(evport)) - return PTR_ERR(evport); - - mutex_lock(&esw->state_lock); - pschk = evport->info.spoofchk; - evport->info.spoofchk = spoofchk; - if (pschk && !is_valid_ether_addr(evport->info.mac)) - mlx5_core_warn(esw->dev, - "Spoofchk in set while MAC is invalid, vport(%d)\n", - evport->vport); - if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) - err = esw_acl_ingress_lgcy_setup(esw, evport); - if (err) - evport->info.spoofchk = pschk; - mutex_unlock(&esw->state_lock); - - return err; -} - -static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw) -{ - if (esw->fdb_table.legacy.vepa_uplink_rule) - mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_uplink_rule); - - if (esw->fdb_table.legacy.vepa_star_rule) - mlx5_del_flow_rules(esw->fdb_table.legacy.vepa_star_rule); - - esw->fdb_table.legacy.vepa_uplink_rule = NULL; - esw->fdb_table.legacy.vepa_star_rule = NULL; -} - -static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, - u8 setting) -{ - struct mlx5_flow_destination dest = {}; - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_handle *flow_rule; - struct mlx5_flow_spec *spec; - int err = 0; - void *misc; - - if (!setting) { - esw_cleanup_vepa_rules(esw); - return 0; - } - - if (esw->fdb_table.legacy.vepa_uplink_rule) - return 0; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; - - /* Uplink rule forward uplink traffic to FDB */ - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); - - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = esw->fdb_table.legacy.fdb; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, spec, - &flow_act, &dest, 1); - if (IS_ERR(flow_rule)) { - err = PTR_ERR(flow_rule); - goto out; - } else { - esw->fdb_table.legacy.vepa_uplink_rule = flow_rule; - } - - /* Star rule to forward all traffic to uplink vport */ - memset(&dest, 0, sizeof(dest)); - dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport.num = MLX5_VPORT_UPLINK; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.legacy.vepa_fdb, NULL, - &flow_act, &dest, 1); - if (IS_ERR(flow_rule)) { - err = PTR_ERR(flow_rule); - goto out; - } else { - esw->fdb_table.legacy.vepa_star_rule = flow_rule; - } - -out: - kvfree(spec); - if (err) - esw_cleanup_vepa_rules(esw); - return err; -} - -int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) -{ - int err = 0; - - if (!esw) - return -EOPNOTSUPP; - - if (!ESW_ALLOWED(esw)) - return -EPERM; - - mutex_lock(&esw->state_lock); - if (esw->mode != MLX5_ESWITCH_LEGACY) { - err = -EOPNOTSUPP; - goto out; - } - - err = _mlx5_eswitch_set_vepa_locked(esw, setting); - -out: - mutex_unlock(&esw->state_lock); - return err; -} - -int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) -{ - if (!esw) - return -EOPNOTSUPP; - - if (!ESW_ALLOWED(esw)) - return -EPERM; - - if (esw->mode != MLX5_ESWITCH_LEGACY) - return -EOPNOTSUPP; - - *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; - return 0; -} - -int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, - u16 vport, bool setting) -{ - struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); - - if (!ESW_ALLOWED(esw)) - return -EPERM; - if (IS_ERR(evport)) - return PTR_ERR(evport); - - mutex_lock(&esw->state_lock); - evport->info.trusted = setting; - if (evport->enabled) - esw_vport_change_handle_locked(evport); - mutex_unlock(&esw->state_lock); - - return 0; -} - static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); struct mlx5_vport *evport; u32 max_guarantee = 0; - int i; + unsigned long i; - mlx5_esw_for_all_vports(esw, i, evport) { - if (!evport->enabled || evport->info.min_rate < max_guarantee) + mlx5_esw_for_each_vport(esw, i, evport) { + if (!evport->enabled || evport->qos.min_rate < max_guarantee) continue; - max_guarantee = evport->info.min_rate; + max_guarantee = evport->qos.min_rate; } if (max_guarantee) @@ -2305,15 +2067,15 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw) struct mlx5_vport *evport; u32 vport_max_rate; u32 vport_min_rate; + unsigned long i; u32 bw_share; int err; - int i; - mlx5_esw_for_all_vports(esw, i, evport) { + mlx5_esw_for_each_vport(esw, i, evport) { if (!evport->enabled) continue; - vport_min_rate = evport->info.min_rate; - vport_max_rate = evport->info.max_rate; + vport_min_rate = evport->qos.min_rate; + vport_max_rate = evport->qos.max_rate; bw_share = 0; if (divider) @@ -2345,7 +2107,7 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, bool max_rate_supported; int err = 0; - if (!ESW_ALLOWED(esw)) + if (!mlx5_esw_allowed(esw)) return -EPERM; if (IS_ERR(evport)) return PTR_ERR(evport); @@ -2360,68 +2122,24 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, mutex_lock(&esw->state_lock); - if (min_rate == evport->info.min_rate) + if (min_rate == evport->qos.min_rate) goto set_max_rate; - previous_min_rate = evport->info.min_rate; - evport->info.min_rate = min_rate; + previous_min_rate = evport->qos.min_rate; + evport->qos.min_rate = min_rate; err = normalize_vports_min_rate(esw); if (err) { - evport->info.min_rate = previous_min_rate; + evport->qos.min_rate = previous_min_rate; goto unlock; } set_max_rate: - if (max_rate == evport->info.max_rate) + if (max_rate == evport->qos.max_rate) goto unlock; err = esw_vport_qos_config(esw, evport, max_rate, evport->qos.bw_share); if (!err) - evport->info.max_rate = max_rate; - -unlock: - mutex_unlock(&esw->state_lock); - return err; -} - -static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, - struct mlx5_vport *vport, - struct mlx5_vport_drop_stats *stats) -{ - struct mlx5_eswitch *esw = dev->priv.eswitch; - u64 rx_discard_vport_down, tx_discard_vport_down; - u64 bytes = 0; - int err = 0; - - if (esw->mode != MLX5_ESWITCH_LEGACY) - return 0; - - mutex_lock(&esw->state_lock); - if (!vport->enabled) - goto unlock; - - if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter)) - mlx5_fc_query(dev, vport->egress.legacy.drop_counter, - &stats->rx_dropped, &bytes); - - if (vport->ingress.legacy.drop_counter) - mlx5_fc_query(dev, vport->ingress.legacy.drop_counter, - &stats->tx_dropped, &bytes); - - if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && - !MLX5_CAP_GEN(dev, transmit_discard_vport_down)) - goto unlock; - - err = mlx5_query_vport_down_stats(dev, vport->vport, 1, - &rx_discard_vport_down, - &tx_discard_vport_down); - if (err) - goto unlock; - - if (MLX5_CAP_GEN(dev, receive_discard_vport_down)) - stats->rx_dropped += rx_discard_vport_down; - if (MLX5_CAP_GEN(dev, transmit_discard_vport_down)) - stats->tx_dropped += tx_discard_vport_down; + evport->qos.max_rate = max_rate; unlock: mutex_unlock(&esw->state_lock); @@ -2495,7 +2213,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, vf_stats->broadcast = MLX5_GET_CTR(out, received_eth_broadcast.packets); - err = mlx5_eswitch_query_vport_drop_stats(esw->dev, vport, &stats); + err = mlx5_esw_query_vport_drop_stats(esw->dev, vport, &stats); if (err) goto free_out; vf_stats->rx_dropped = stats.rx_dropped; @@ -2510,7 +2228,7 @@ u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw = dev->priv.eswitch; - return ESW_ALLOWED(esw) ? esw->mode : MLX5_ESWITCH_NONE; + return mlx5_esw_allowed(esw) ? esw->mode : MLX5_ESWITCH_NONE; } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); @@ -2520,7 +2238,7 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) struct mlx5_eswitch *esw; esw = dev->priv.eswitch; - return ESW_ALLOWED(esw) ? esw->offloads.encap : + return mlx5_esw_allowed(esw) ? esw->offloads.encap : DEVLINK_ESWITCH_ENCAP_MODE_NONE; } EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); @@ -2552,3 +2270,110 @@ void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifie { blocking_notifier_chain_unregister(&esw->n_head, nb); } + +/** + * mlx5_esw_hold() - Try to take a read lock on esw mode lock. + * @mdev: mlx5 core device. + * + * Should be called by esw resources callers. + * + * Return: true on success or false. + */ +bool mlx5_esw_hold(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + /* e.g. VF doesn't have eswitch so nothing to do */ + if (!mlx5_esw_allowed(esw)) + return true; + + if (down_read_trylock(&esw->mode_lock) != 0) + return true; + + return false; +} + +/** + * mlx5_esw_release() - Release a read lock on esw mode lock. + * @mdev: mlx5 core device. + */ +void mlx5_esw_release(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + if (mlx5_esw_allowed(esw)) + up_read(&esw->mode_lock); +} + +/** + * mlx5_esw_get() - Increase esw user count. + * @mdev: mlx5 core device. + */ +void mlx5_esw_get(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + if (mlx5_esw_allowed(esw)) + atomic64_inc(&esw->user_count); +} + +/** + * mlx5_esw_put() - Decrease esw user count. + * @mdev: mlx5 core device. + */ +void mlx5_esw_put(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + + if (mlx5_esw_allowed(esw)) + atomic64_dec_if_positive(&esw->user_count); +} + +/** + * mlx5_esw_try_lock() - Take a write lock on esw mode lock. + * @esw: eswitch device. + * + * Should be called by esw mode change routine. + * + * Return: + * * 0 - esw mode if successfully locked and refcount is 0. + * * -EBUSY - refcount is not 0. + * * -EINVAL - In the middle of switching mode or lock is already held. + */ +int mlx5_esw_try_lock(struct mlx5_eswitch *esw) +{ + if (down_write_trylock(&esw->mode_lock) == 0) + return -EINVAL; + + if (atomic64_read(&esw->user_count) > 0) { + up_write(&esw->mode_lock); + return -EBUSY; + } + + return esw->mode; +} + +/** + * mlx5_esw_unlock() - Release write lock on esw mode lock + * @esw: eswitch device. + */ +void mlx5_esw_unlock(struct mlx5_eswitch *esw) +{ + up_write(&esw->mode_lock); +} + +/** + * mlx5_eswitch_get_total_vports - Get total vports of the eswitch + * + * @dev: Pointer to core device + * + * mlx5_eswitch_get_total_vports returns total number of eswitch vports. + */ +u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + + esw = dev->priv.eswitch; + return mlx5_esw_allowed(esw) ? esw->total_vports : 0; +} +EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index fdf5c8c05c1b..64ccb2bc0b58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -46,6 +46,24 @@ #include "lib/fs_chains.h" #include "sf/sf.h" #include "en/tc_ct.h" +#include "esw/sample.h" + +enum mlx5_mapped_obj_type { + MLX5_MAPPED_OBJ_CHAIN, + MLX5_MAPPED_OBJ_SAMPLE, +}; + +struct mlx5_mapped_obj { + enum mlx5_mapped_obj_type type; + union { + u32 chain; + struct { + u32 group_id; + u32 rate; + u32 trunc_size; + } sample; + }; +}; #ifdef CONFIG_MLX5_ESWITCH @@ -118,13 +136,11 @@ struct mlx5_vport_drop_stats { struct mlx5_vport_info { u8 mac[ETH_ALEN]; u16 vlan; - u8 qos; u64 node_guid; int link_state; - u32 min_rate; - u32 max_rate; - bool spoofchk; - bool trusted; + u8 qos; + u8 spoofchk: 1; + u8 trusted: 1; }; /* Vport context events */ @@ -136,7 +152,6 @@ enum mlx5_eswitch_vport_event { struct mlx5_vport { struct mlx5_core_dev *dev; - int vport; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; struct mlx5_flow_handle *promisc_rule; @@ -154,10 +169,14 @@ struct mlx5_vport { bool enabled; u32 esw_tsar_ix; u32 bw_share; + u32 min_rate; + u32 max_rate; } qos; + u16 vport; bool enabled; enum mlx5_eswitch_vport_event enabled_events; + int index; struct devlink_port *dl_port; }; @@ -206,10 +225,11 @@ struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads_restore; struct mlx5_flow_group *restore_group; struct mlx5_modify_hdr *restore_copy_hdr_id; + struct mapping_ctx *reg_c0_obj_pool; struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; - struct mlx5_eswitch_rep *vport_reps; + struct xarray vport_reps; struct list_head peer_flows; struct mutex peer_mutex; struct mutex encap_tbl_lock; /* protects encap_tbl */ @@ -259,7 +279,7 @@ struct mlx5_eswitch { struct esw_mc_addr mc_promisc; /* end of legacy */ struct workqueue_struct *work_queue; - struct mlx5_vport *vports; + struct xarray vports; u32 flags; int total_vports; int enabled_vports; @@ -271,7 +291,8 @@ struct mlx5_eswitch { /* Protects eswitch mode change that occurs via one or more * user commands, i.e. sriov state change, devlink commands. */ - struct mutex mode_lock; + struct rw_semaphore mode_lock; + atomic64_t user_count; struct { bool enabled; @@ -294,6 +315,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); +bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw); +int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable); u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw); void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata); @@ -356,6 +379,9 @@ void mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, struct mlx5_termtbl_handle *tt); +void +mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec); + struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, @@ -403,6 +429,7 @@ enum { MLX5_ESW_ATTR_FLAG_SLOW_PATH = BIT(1), MLX5_ESW_ATTR_FLAG_NO_IN_PORT = BIT(2), MLX5_ESW_ATTR_FLAG_SRC_REWRITE = BIT(3), + MLX5_ESW_ATTR_FLAG_SAMPLE = BIT(4), }; struct mlx5_esw_flow_attr { @@ -427,6 +454,7 @@ struct mlx5_esw_flow_attr { } dests[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5_rx_tun_attr *rx_tun_attr; struct mlx5_pkt_reformat *decap_pkt_reformat; + struct mlx5_sample_attr *sample; }; int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, @@ -494,6 +522,11 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev); #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) +static inline bool mlx5_esw_allowed(const struct mlx5_eswitch *esw) +{ + return esw && MLX5_ESWITCH_MANAGER(esw->dev); +} + /* The returned number is valid only when the dev is eswitch manager. */ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) { @@ -513,94 +546,11 @@ static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev) MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF; } -static inline int mlx5_esw_sf_start_idx(const struct mlx5_eswitch *esw) -{ - /* PF and VF vports indices start from 0 to max_vfs */ - return MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev); -} - -static inline int mlx5_esw_sf_end_idx(const struct mlx5_eswitch *esw) -{ - return mlx5_esw_sf_start_idx(esw) + mlx5_sf_max_functions(esw->dev); -} - -static inline int -mlx5_esw_sf_vport_num_to_index(const struct mlx5_eswitch *esw, u16 vport_num) -{ - return vport_num - mlx5_sf_start_function_id(esw->dev) + - MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev); -} - -static inline u16 -mlx5_esw_sf_vport_index_to_num(const struct mlx5_eswitch *esw, int idx) -{ - return mlx5_sf_start_function_id(esw->dev) + idx - - (MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev)); -} - -static inline bool -mlx5_esw_is_sf_vport(const struct mlx5_eswitch *esw, u16 vport_num) -{ - return mlx5_sf_supported(esw->dev) && - vport_num >= mlx5_sf_start_function_id(esw->dev) && - (vport_num < (mlx5_sf_start_function_id(esw->dev) + - mlx5_sf_max_functions(esw->dev))); -} - static inline bool mlx5_eswitch_is_funcs_handler(const struct mlx5_core_dev *dev) { return mlx5_core_is_ecpf_esw_manager(dev); } -static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) -{ - /* Uplink always locate at the last element of the array.*/ - return esw->total_vports - 1; -} - -static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw) -{ - return esw->total_vports - 2; -} - -static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, - u16 vport_num) -{ - if (vport_num == MLX5_VPORT_ECPF) { - if (!mlx5_ecpf_vport_exists(esw->dev)) - esw_warn(esw->dev, "ECPF vport doesn't exist!\n"); - return mlx5_eswitch_ecpf_idx(esw); - } - - if (vport_num == MLX5_VPORT_UPLINK) - return mlx5_eswitch_uplink_idx(esw); - - if (mlx5_esw_is_sf_vport(esw, vport_num)) - return mlx5_esw_sf_vport_num_to_index(esw, vport_num); - - /* PF and VF vports start from 0 to max_vfs */ - return vport_num; -} - -static inline u16 mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, - int index) -{ - if (index == mlx5_eswitch_ecpf_idx(esw) && - mlx5_ecpf_vport_exists(esw->dev)) - return MLX5_VPORT_ECPF; - - if (index == mlx5_eswitch_uplink_idx(esw)) - return MLX5_VPORT_UPLINK; - - /* SF vports indices are after VFs and before ECPF */ - if (mlx5_sf_supported(esw->dev) && - index > mlx5_core_max_vfs(esw->dev)) - return mlx5_esw_sf_vport_index_to_num(esw, index); - - /* PF and VF vports start from 0 to max_vfs */ - return index; -} - static inline unsigned int mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev, u16 vport_num) @@ -617,82 +567,42 @@ mlx5_esw_devlink_port_index_to_vport_num(unsigned int dl_port_index) /* TODO: This mlx5e_tc function shouldn't be called by eswitch */ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); -/* The vport getter/iterator are only valid after esw->total_vports - * and vport->vport are initialized in mlx5_eswitch_init. +/* Each mark identifies eswitch vport type. + * MLX5_ESW_VPT_HOST_FN is used to identify both PF and VF ports using + * a single mark. + * MLX5_ESW_VPT_VF identifies a SRIOV VF vport. + * MLX5_ESW_VPT_SF identifies SF vport. */ -#define mlx5_esw_for_all_vports(esw, i, vport) \ - for ((i) = MLX5_VPORT_PF; \ - (vport) = &(esw)->vports[i], \ - (i) < (esw)->total_vports; (i)++) - -#define mlx5_esw_for_all_vports_reverse(esw, i, vport) \ - for ((i) = (esw)->total_vports - 1; \ - (vport) = &(esw)->vports[i], \ - (i) >= MLX5_VPORT_PF; (i)--) - -#define mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) \ - for ((i) = MLX5_VPORT_FIRST_VF; \ - (vport) = &(esw)->vports[(i)], \ - (i) <= (nvfs); (i)++) - -#define mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, nvfs) \ - for ((i) = (nvfs); \ - (vport) = &(esw)->vports[(i)], \ - (i) >= MLX5_VPORT_FIRST_VF; (i)--) - -/* The rep getter/iterator are only valid after esw->total_vports - * and vport->vport are initialized in mlx5_eswitch_init. +#define MLX5_ESW_VPT_HOST_FN XA_MARK_0 +#define MLX5_ESW_VPT_VF XA_MARK_1 +#define MLX5_ESW_VPT_SF XA_MARK_2 + +/* The vport iterator is valid only after vport are initialized in mlx5_eswitch_init. + * Borrowed the idea from xa_for_each_marked() but with support for desired last element. */ -#define mlx5_esw_for_all_reps(esw, i, rep) \ - for ((i) = MLX5_VPORT_PF; \ - (rep) = &(esw)->offloads.vport_reps[i], \ - (i) < (esw)->total_vports; (i)++) - -#define mlx5_esw_for_each_vf_rep(esw, i, rep, nvfs) \ - for ((i) = MLX5_VPORT_FIRST_VF; \ - (rep) = &(esw)->offloads.vport_reps[i], \ - (i) <= (nvfs); (i)++) - -#define mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, nvfs) \ - for ((i) = (nvfs); \ - (rep) = &(esw)->offloads.vport_reps[i], \ - (i) >= MLX5_VPORT_FIRST_VF; (i)--) - -#define mlx5_esw_for_each_vf_vport_num(esw, vport, nvfs) \ - for ((vport) = MLX5_VPORT_FIRST_VF; (vport) <= (nvfs); (vport)++) - -#define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs) \ - for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--) - -/* Includes host PF (vport 0) if it's not esw manager. */ -#define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs) \ - for ((i) = (esw)->first_host_vport; \ - (rep) = &(esw)->offloads.vport_reps[i], \ - (i) <= (nvfs); (i)++) - -#define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs) \ - for ((i) = (nvfs); \ - (rep) = &(esw)->offloads.vport_reps[i], \ - (i) >= (esw)->first_host_vport; (i)--) - -#define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs) \ - for ((vport) = (esw)->first_host_vport; \ - (vport) <= (nvfs); (vport)++) - -#define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs) \ - for ((vport) = (nvfs); \ - (vport) >= (esw)->first_host_vport; (vport)--) - -#define mlx5_esw_for_each_sf_rep(esw, i, rep) \ - for ((i) = mlx5_esw_sf_start_idx(esw); \ - (rep) = &(esw)->offloads.vport_reps[(i)], \ - (i) < mlx5_esw_sf_end_idx(esw); (i++)) + +#define mlx5_esw_for_each_vport(esw, index, vport) \ + xa_for_each(&((esw)->vports), index, vport) + +#define mlx5_esw_for_each_entry_marked(xa, index, entry, last, filter) \ + for (index = 0, entry = xa_find(xa, &index, last, filter); \ + entry; entry = xa_find_after(xa, &index, last, filter)) + +#define mlx5_esw_for_each_vport_marked(esw, index, vport, last, filter) \ + mlx5_esw_for_each_entry_marked(&((esw)->vports), index, vport, last, filter) + +#define mlx5_esw_for_each_vf_vport(esw, index, vport, last) \ + mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_VF) + +#define mlx5_esw_for_each_host_func_vport(esw, index, vport, last) \ + mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_HOST_FN) struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink); struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); -bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); +bool mlx5_eswitch_is_vf_vport(struct mlx5_eswitch *esw, u16 vport_num); +bool mlx5_esw_is_sf_vport(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); @@ -712,13 +622,26 @@ void esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw); -void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw); +struct esw_vport_tbl_namespace { + int max_fte; + int max_num_groups; + u32 flags; +}; + +struct mlx5_vport_tbl_attr { + u16 chain; + u16 prio; + u16 vport; + const struct esw_vport_tbl_namespace *vport_ns; +}; + +struct mlx5_flow_table * +mlx5_esw_vporttbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr); +void +mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr); struct mlx5_flow_handle * esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag); -u32 -esw_get_max_restore_tag(struct mlx5_eswitch *esw); int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num); void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num); @@ -739,12 +662,13 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, - u16 vport_num, u32 sfnum); + u16 vport_num, u32 controller, u32 sfnum); void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port, - u16 vport_num, u32 sfnum); + u16 vport_num, u32 controller, u32 sfnum); void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id); int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num); void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num); @@ -761,6 +685,18 @@ struct mlx5_esw_event_info { int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *n); void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *n); + +bool mlx5_esw_hold(struct mlx5_core_dev *dev); +void mlx5_esw_release(struct mlx5_core_dev *dev); +void mlx5_esw_get(struct mlx5_core_dev *dev); +void mlx5_esw_put(struct mlx5_core_dev *dev); +int mlx5_esw_try_lock(struct mlx5_eswitch *esw); +void mlx5_esw_unlock(struct mlx5_eswitch *esw); + +void esw_vport_change_handle_locked(struct mlx5_vport *vport); + +bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -781,6 +717,13 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) { return ERR_PTR(-EOPNOTSUPP); } + +static inline unsigned int +mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev, + u16 vport_num) +{ + return vport_num; +} #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 94cb0217b4f3..db1e74280e57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -40,7 +40,6 @@ #include "eswitch.h" #include "esw/indir_table.h" #include "esw/acl/ofld.h" -#include "esw/indir_table.h" #include "rdma.h" #include "en.h" #include "fs_core.h" @@ -48,6 +47,17 @@ #include "lib/eq.h" #include "lib/fs_chains.h" #include "en_tc.h" +#include "en/mapping.h" + +#define mlx5_esw_for_each_rep(esw, i, rep) \ + xa_for_each(&((esw)->offloads.vport_reps), i, rep) + +#define mlx5_esw_for_each_sf_rep(esw, i, rep) \ + xa_for_each_marked(&((esw)->offloads.vport_reps), i, rep, MLX5_ESW_VPT_SF) + +#define mlx5_esw_for_each_vf_rep(esw, index, rep) \ + mlx5_esw_for_each_entry_marked(&((esw)->offloads.vport_reps), index, \ + rep, (esw)->esw_funcs.num_vfs, MLX5_ESW_VPT_VF) /* There are two match-all miss flows, one for unicast dst mac and * one for multicast. @@ -55,192 +65,19 @@ #define MLX5_ESW_MISS_FLOWS (2) #define UPLINK_REP_INDEX 0 -/* Per vport tables */ - -#define MLX5_ESW_VPORT_TABLE_SIZE 128 - -/* This struct is used as a key to the hash table and we need it to be packed - * so hash result is consistent - */ -struct mlx5_vport_key { - u32 chain; - u16 prio; - u16 vport; - u16 vhca_id; -} __packed; - -struct mlx5_vport_tbl_attr { - u16 chain; - u16 prio; - u16 vport; -}; - -struct mlx5_vport_table { - struct hlist_node hlist; - struct mlx5_flow_table *fdb; - u32 num_rules; - struct mlx5_vport_key key; -}; - +#define MLX5_ESW_VPORT_TBL_SIZE 128 #define MLX5_ESW_VPORT_TBL_NUM_GROUPS 4 -static struct mlx5_flow_table * -esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_table *fdb; - - ft_attr.autogroup.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS; - ft_attr.max_fte = MLX5_ESW_VPORT_TABLE_SIZE; - ft_attr.prio = FDB_PER_VPORT; - fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(fdb)) { - esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n", - PTR_ERR(fdb)); - } - - return fdb; -} - -static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw, - struct mlx5_vport_tbl_attr *attr, - struct mlx5_vport_key *key) -{ - key->vport = attr->vport; - key->chain = attr->chain; - key->prio = attr->prio; - key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); - return jhash(key, sizeof(*key), 0); -} - -/* caller must hold vports.lock */ -static struct mlx5_vport_table * -esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key) -{ - struct mlx5_vport_table *e; - - hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key) - if (!memcmp(&e->key, skey, sizeof(*skey))) - return e; - - return NULL; -} - -static void -esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) -{ - struct mlx5_vport_table *e; - struct mlx5_vport_key key; - u32 hkey; - - mutex_lock(&esw->fdb_table.offloads.vports.lock); - hkey = flow_attr_to_vport_key(esw, attr, &key); - e = esw_vport_tbl_lookup(esw, &key, hkey); - if (!e || --e->num_rules) - goto out; - - hash_del(&e->hlist); - mlx5_destroy_flow_table(e->fdb); - kfree(e); -out: - mutex_unlock(&esw->fdb_table.offloads.vports.lock); -} - -static struct mlx5_flow_table * -esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr) -{ - struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *ns; - struct mlx5_flow_table *fdb; - struct mlx5_vport_table *e; - struct mlx5_vport_key skey; - u32 hkey; - - mutex_lock(&esw->fdb_table.offloads.vports.lock); - hkey = flow_attr_to_vport_key(esw, attr, &skey); - e = esw_vport_tbl_lookup(esw, &skey, hkey); - if (e) { - e->num_rules++; - goto out; - } - - e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) { - fdb = ERR_PTR(-ENOMEM); - goto err_alloc; - } - - ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); - if (!ns) { - esw_warn(dev, "Failed to get FDB namespace\n"); - fdb = ERR_PTR(-ENOENT); - goto err_ns; - } - - fdb = esw_vport_tbl_create(esw, ns); - if (IS_ERR(fdb)) - goto err_ns; - - e->fdb = fdb; - e->num_rules = 1; - e->key = skey; - hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey); -out: - mutex_unlock(&esw->fdb_table.offloads.vports.lock); - return e->fdb; - -err_ns: - kfree(e); -err_alloc: - mutex_unlock(&esw->fdb_table.offloads.vports.lock); - return fdb; -} - -int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw) -{ - struct mlx5_vport_tbl_attr attr; - struct mlx5_flow_table *fdb; - struct mlx5_vport *vport; - int i; - - attr.chain = 0; - attr.prio = 1; - mlx5_esw_for_all_vports(esw, i, vport) { - attr.vport = vport->vport; - fdb = esw_vport_tbl_get(esw, &attr); - if (IS_ERR(fdb)) - goto out; - } - return 0; - -out: - mlx5_esw_vport_tbl_put(esw); - return PTR_ERR(fdb); -} - -void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw) -{ - struct mlx5_vport_tbl_attr attr; - struct mlx5_vport *vport; - int i; - - attr.chain = 0; - attr.prio = 1; - mlx5_esw_for_all_vports(esw, i, vport) { - attr.vport = vport->vport; - esw_vport_tbl_put(esw, &attr); - } -} - -/* End: Per vport tables */ +static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = { + .max_fte = MLX5_ESW_VPORT_TBL_SIZE, + .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS, + .flags = 0, +}; static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw, u16 vport_num) { - int idx = mlx5_eswitch_vport_num_to_index(esw, vport_num); - - WARN_ON(idx > esw->total_vports - 1); - return &esw->offloads.vport_reps[idx]; + return xa_load(&esw->offloads.vport_reps, vport_num); } static void @@ -256,6 +93,26 @@ mlx5_eswitch_set_rule_flow_source(struct mlx5_eswitch *esw, MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; } +/* Actually only the upper 16 bits of reg c0 need to be cleared, but the lower 16 bits + * are not needed as well in the following process. So clear them all for simplicity. + */ +void +mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec) +{ + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + void *misc2; + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0); + + if (!memchr_inv(misc2, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc2))) + spec->match_criteria_enable &= ~MLX5_MATCH_MISC_PARAMETERS_2; + } +} + static void mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, @@ -327,6 +184,19 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw, } static int +esw_setup_sampler_dest(struct mlx5_flow_destination *dest, + struct mlx5_flow_act *flow_act, + struct mlx5_esw_flow_attr *esw_attr, + int i) +{ + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; + dest[i].sampler_id = esw_attr->sample->sampler_id; + + return 0; +} + +static int esw_setup_ft_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, struct mlx5_eswitch *esw, @@ -537,6 +407,14 @@ esw_setup_vport_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act * return i; } +static bool +esw_src_port_rewrite_supported(struct mlx5_eswitch *esw) +{ + return MLX5_CAP_GEN(esw->dev, reg_c_preserve) && + mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level); +} + static int esw_setup_dests(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, @@ -550,11 +428,13 @@ esw_setup_dests(struct mlx5_flow_destination *dest, int err = 0; if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) && - MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve) && - mlx5_eswitch_vport_match_metadata_enabled(esw)) + esw_src_port_rewrite_supported(esw)) attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE; - if (attr->dest_ft) { + if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) { + esw_setup_sampler_dest(dest, flow_act, esw_attr, *i); + (*i)++; + } else if (attr->dest_ft) { esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); (*i)++; } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { @@ -657,12 +537,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_hdr = attr->modify_hdr; - if (split) { + /* esw_attr->sample is allocated only when there is a sample action */ + if (esw_attr->sample && esw_attr->sample->sample_default_tbl) { + fdb = esw_attr->sample->sample_default_tbl; + } else if (split) { fwd_attr.chain = attr->chain; fwd_attr.prio = attr->prio; fwd_attr.vport = esw_attr->in_rep->vport; + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; - fdb = esw_vport_tbl_get(esw, &fwd_attr); + fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr); } else { if (attr->chain || attr->prio) fdb = mlx5_chains_get_table(chains, attr->chain, @@ -694,7 +578,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, err_add_rule: if (split) - esw_vport_tbl_put(esw, &fwd_attr); + mlx5_esw_vporttbl_put(esw, &fwd_attr); else if (attr->chain || attr->prio) mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); err_esw_get: @@ -727,7 +611,8 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, fwd_attr.chain = attr->chain; fwd_attr.prio = attr->prio; fwd_attr.vport = esw_attr->in_rep->vport; - fwd_fdb = esw_vport_tbl_get(esw, &fwd_attr); + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + fwd_fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr); if (IS_ERR(fwd_fdb)) { rule = ERR_CAST(fwd_fdb); goto err_get_fwd; @@ -772,7 +657,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, return rule; err_chain_src_rewrite: esw_put_dest_tables_loop(esw, attr, 0, i); - esw_vport_tbl_put(esw, &fwd_attr); + mlx5_esw_vporttbl_put(esw, &fwd_attr); err_get_fwd: mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); err_get_fast: @@ -807,15 +692,16 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, fwd_attr.chain = attr->chain; fwd_attr.prio = attr->prio; fwd_attr.vport = esw_attr->in_rep->vport; + fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; } if (fwd_rule) { - esw_vport_tbl_put(esw, &fwd_attr); + mlx5_esw_vporttbl_put(esw, &fwd_attr); mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count); } else { if (split) - esw_vport_tbl_put(esw, &fwd_attr); + mlx5_esw_vporttbl_put(esw, &fwd_attr); else if (attr->chain || attr->prio) mlx5_chains_put_table(chains, attr->chain, attr->prio, 0); esw_cleanup_dests(esw, attr); @@ -841,10 +727,11 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) { struct mlx5_eswitch_rep *rep; - int i, err = 0; + unsigned long i; + int err = 0; esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); - mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) { + mlx5_esw_for_each_host_func_vport(esw, i, rep, esw->esw_funcs.num_vfs) { if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) continue; @@ -1036,7 +923,8 @@ out: } struct mlx5_flow_handle * -mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport, +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, + struct mlx5_eswitch_rep *rep, u32 sqn) { struct mlx5_flow_act flow_act = {0}; @@ -1054,21 +942,30 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); /* source vport is the esw manager */ - MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport); + MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport); + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(rep->esw->dev, vhca_id)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport.num = vport; + dest.vport.num = rep->vport; + dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id); + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) - esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); + esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n", + PTR_ERR(flow_rule)); out: kvfree(spec); return flow_rule; @@ -1083,13 +980,13 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw) { struct mlx5_flow_handle **flows = esw->fdb_table.offloads.send_to_vport_meta_rules; - int i = 0, num_vfs = esw->esw_funcs.num_vfs, vport_num; + int i = 0, num_vfs = esw->esw_funcs.num_vfs; if (!num_vfs || !flows) return; - mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs) - mlx5_del_flow_rules(flows[i++]); + for (i = 0; i < num_vfs; i++) + mlx5_del_flow_rules(flows[i]); kvfree(flows); } @@ -1097,12 +994,15 @@ static void mlx5_eswitch_del_send_to_vport_meta_rules(struct mlx5_eswitch *esw) static int mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw) { - int num_vfs, vport_num, rule_idx = 0, err = 0; struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {0}; + int num_vfs, rule_idx = 0, err = 0; struct mlx5_flow_handle *flow_rule; struct mlx5_flow_handle **flows; struct mlx5_flow_spec *spec; + struct mlx5_vport *vport; + unsigned long i; + u16 vport_num; num_vfs = esw->esw_funcs.num_vfs; flows = kvzalloc(num_vfs * sizeof(*flows), GFP_KERNEL); @@ -1126,7 +1026,8 @@ mlx5_eswitch_add_send_to_vport_meta_rules(struct mlx5_eswitch *esw) dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - mlx5_esw_for_each_vf_vport_num(esw, vport_num, num_vfs) { + mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { + vport_num = vport->vport; MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); dest.vport.num = vport_num; @@ -1268,12 +1169,14 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_handle **flows; - struct mlx5_flow_handle *flow; - struct mlx5_flow_spec *spec; /* total vports is the same for both e-switches */ int nvports = esw->total_vports; + struct mlx5_flow_handle *flow; + struct mlx5_flow_spec *spec; + struct mlx5_vport *vport; + unsigned long i; void *misc; - int err, i; + int err; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) @@ -1292,6 +1195,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc_parameters); if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, spec, MLX5_VPORT_PF); @@ -1301,10 +1205,11 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, err = PTR_ERR(flow); goto add_pf_flow_err; } - flows[MLX5_VPORT_PF] = flow; + flows[vport->index] = flow; } if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); @@ -1312,13 +1217,13 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, err = PTR_ERR(flow); goto add_ecpf_flow_err; } - flows[mlx5_eswitch_ecpf_idx(esw)] = flow; + flows[vport->index] = flow; } - mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) { + mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, - spec, i); + spec, vport->vport); flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); @@ -1326,7 +1231,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, err = PTR_ERR(flow); goto add_vf_flow_err; } - flows[i] = flow; + flows[vport->index] = flow; } esw->fdb_table.offloads.peer_miss_rules = flows; @@ -1335,15 +1240,20 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, return 0; add_vf_flow_err: - nvports = --i; - mlx5_esw_for_each_vf_vport_num_reverse(esw, i, nvports) - mlx5_del_flow_rules(flows[i]); - - if (mlx5_ecpf_vport_exists(esw->dev)) - mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); + mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + mlx5_del_flow_rules(flows[vport->index]); + } add_ecpf_flow_err: - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) - mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + mlx5_del_flow_rules(flows[vport->index]); + } add_pf_flow_err: esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); kvfree(flows); @@ -1355,20 +1265,23 @@ alloc_flows_err: static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) { struct mlx5_flow_handle **flows; - int i; + struct mlx5_vport *vport; + unsigned long i; flows = esw->fdb_table.offloads.peer_miss_rules; - mlx5_esw_for_each_vf_vport_num_reverse(esw, i, - mlx5_core_max_vfs(esw->dev)) - mlx5_del_flow_rules(flows[i]); - - if (mlx5_ecpf_vport_exists(esw->dev)) - mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]); + mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) + mlx5_del_flow_rules(flows[vport->index]); - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) - mlx5_del_flow_rules(flows[MLX5_VPORT_PF]); + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + mlx5_del_flow_rules(flows[vport->index]); + } + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + mlx5_del_flow_rules(flows[vport->index]); + } kvfree(flows); } @@ -1446,14 +1359,14 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) return ERR_PTR(-EOPNOTSUPP); - spec = kzalloc(sizeof(*spec), GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return ERR_PTR(-ENOMEM); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - ESW_CHAIN_TAG_METADATA_MASK); + ESW_REG_C0_USER_DATA_METADATA_MASK); misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag); @@ -1469,7 +1382,7 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) dest.ft = esw->offloads.ft_offloads; flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); - kfree(spec); + kvfree(spec); if (IS_ERR(flow_rule)) esw_warn(esw->dev, @@ -1479,12 +1392,6 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) return flow_rule; } -u32 -esw_get_max_restore_tag(struct mlx5_eswitch *esw) -{ - return ESW_CHAIN_TAG_METADATA_MASK; -} - #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 @@ -1514,6 +1421,44 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, } #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) +static void esw_vport_tbl_put(struct mlx5_eswitch *esw) +{ + struct mlx5_vport_tbl_attr attr; + struct mlx5_vport *vport; + unsigned long i; + + attr.chain = 0; + attr.prio = 1; + mlx5_esw_for_each_vport(esw, i, vport) { + attr.vport = vport->vport; + attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + mlx5_esw_vporttbl_put(esw, &attr); + } +} + +static int esw_vport_tbl_get(struct mlx5_eswitch *esw) +{ + struct mlx5_vport_tbl_attr attr; + struct mlx5_flow_table *fdb; + struct mlx5_vport *vport; + unsigned long i; + + attr.chain = 0; + attr.prio = 1; + mlx5_esw_for_each_vport(esw, i, vport) { + attr.vport = vport->vport; + attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns; + fdb = mlx5_esw_vporttbl_get(esw, &attr); + if (IS_ERR(fdb)) + goto out; + } + return 0; + +out: + esw_vport_tbl_put(esw); + return PTR_ERR(fdb); +} + #define fdb_modify_header_fwd_to_table_supported(esw) \ (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags) @@ -1563,7 +1508,7 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) attr.max_ft_sz = fdb_max; attr.max_grp_num = esw->params.large_group_num; attr.default_ft = miss_fdb; - attr.max_restore_tag = esw_get_max_restore_tag(esw); + attr.mapping = esw->offloads.reg_c0_obj_pool; chains = mlx5_chains_create(dev, &attr); if (IS_ERR(chains)) { @@ -1591,7 +1536,7 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb) /* Open level 1 for split fdb rules now if prios isn't supported */ if (!mlx5_chains_prios_supported(chains)) { - err = mlx5_esw_vport_tbl_get(esw); + err = esw_vport_tbl_get(esw); if (err) goto level_1_err; } @@ -1615,7 +1560,7 @@ static void esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains) { if (!mlx5_chains_prios_supported(chains)) - mlx5_esw_vport_tbl_put(esw); + esw_vport_tbl_put(esw); mlx5_chains_put_table(chains, 0, 1, 0); mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0); mlx5_chains_destroy(chains); @@ -1702,6 +1647,12 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); @@ -1715,36 +1666,40 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) } esw->fdb_table.offloads.send_to_vport_grp = g; - /* meta send to vport */ - memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS_2); - - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + if (esw_src_port_rewrite_supported(esw)) { + /* meta send to vport */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); - MLX5_SET(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); - MLX5_SET(fte_match_param, match_criteria, - misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - num_vfs = esw->esw_funcs.num_vfs; - if (num_vfs) { - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + num_vfs - 1); - ix += num_vfs; + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK); - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n", - err); - goto send_vport_meta_err; + num_vfs = esw->esw_funcs.num_vfs; + if (num_vfs) { + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, + end_flow_index, ix + num_vfs - 1); + ix += num_vfs; + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n", + err); + goto send_vport_meta_err; + } + esw->fdb_table.offloads.send_to_vport_meta_grp = g; + + err = mlx5_eswitch_add_send_to_vport_meta_rules(esw); + if (err) + goto meta_rule_err; } - esw->fdb_table.offloads.send_to_vport_meta_grp = g; - - err = mlx5_eswitch_add_send_to_vport_meta_rules(esw); - if (err) - goto meta_rule_err; } if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { @@ -1854,6 +1809,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) /* Holds true only as long as DMFS is the default */ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, MLX5_FLOW_STEERING_MODE_DMFS); + atomic64_set(&esw->user_count, 0); } static int esw_create_offloads_table(struct mlx5_eswitch *esw) @@ -1977,12 +1933,12 @@ out: return flow_rule; } - -static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode) +static int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode) { u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; struct mlx5_core_dev *dev = esw->dev; - int vport; + struct mlx5_vport *vport; + unsigned long i; if (!MLX5_CAP_GEN(dev, vport_group_manager)) return -EOPNOTSUPP; @@ -2003,8 +1959,8 @@ static int mlx5_eswitch_inline_mode_get(const struct mlx5_eswitch *esw, u8 *mode query_vports: mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode); - mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) { - mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); + mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + mlx5_query_nic_vport_min_inline(dev, vport->vport, &mlx5_mode); if (prev_mlx5_mode != mlx5_mode) return -EINVAL; prev_mlx5_mode = mlx5_mode; @@ -2056,7 +2012,7 @@ static int esw_create_restore_table(struct mlx5_eswitch *esw) goto out_free; } - ft_attr.max_fte = 1 << ESW_CHAIN_TAG_METADATA_BITS; + ft_attr.max_fte = 1 << ESW_REG_C0_USER_DATA_METADATA_BITS; ft = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft)) { err = PTR_ERR(ft); @@ -2071,7 +2027,7 @@ static int esw_create_restore_table(struct mlx5_eswitch *esw) misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - ESW_CHAIN_TAG_METADATA_MASK); + ESW_REG_C0_USER_DATA_METADATA_MASK); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft_attr.max_fte - 1); @@ -2147,34 +2103,82 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return err; } -void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) +static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + xa_mark_t mark) { - kfree(esw->offloads.vport_reps); + bool mark_set; + + /* Copy the mark from vport to its rep */ + mark_set = xa_get_mark(&esw->vports, rep->vport, mark); + if (mark_set) + xa_set_mark(&esw->offloads.vport_reps, rep->vport, mark); } -int esw_offloads_init_reps(struct mlx5_eswitch *esw) +static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport) { - int total_vports = esw->total_vports; struct mlx5_eswitch_rep *rep; - int vport_index; - u8 rep_type; + int rep_type; + int err; - esw->offloads.vport_reps = kcalloc(total_vports, - sizeof(struct mlx5_eswitch_rep), - GFP_KERNEL); - if (!esw->offloads.vport_reps) + rep = kzalloc(sizeof(*rep), GFP_KERNEL); + if (!rep) return -ENOMEM; - mlx5_esw_for_all_reps(esw, vport_index, rep) { - rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index); - rep->vport_index = vport_index; + rep->vport = vport->vport; + rep->vport_index = vport->index; + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); - for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) - atomic_set(&rep->rep_data[rep_type].state, - REP_UNREGISTERED); - } + err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL); + if (err) + goto insert_err; + + mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_HOST_FN); + mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_VF); + mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_SF); + return 0; + +insert_err: + kfree(rep); + return err; +} + +static void mlx5_esw_offloads_rep_cleanup(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + xa_erase(&esw->offloads.vport_reps, rep->vport); + kfree(rep); +} + +void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + + mlx5_esw_for_each_rep(esw, i, rep) + mlx5_esw_offloads_rep_cleanup(esw, rep); + xa_destroy(&esw->offloads.vport_reps); +} + +int esw_offloads_init_reps(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + xa_init(&esw->offloads.vport_reps); + mlx5_esw_for_each_vport(esw, i, vport) { + err = mlx5_esw_offloads_rep_init(esw, vport); + if (err) + goto err; + } return 0; + +err: + esw_offloads_cleanup_reps(esw); + return err; } static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, @@ -2188,7 +2192,7 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int i; + unsigned long i; mlx5_esw_for_each_sf_rep(esw, i, rep) __esw_offloads_unload_rep(esw, rep, rep_type); @@ -2197,11 +2201,11 @@ static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type) static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int i; + unsigned long i; __unload_reps_sf_vport(esw, rep_type); - mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, esw->esw_funcs.num_vfs) + mlx5_esw_for_each_vf_rep(esw, i, rep) __esw_offloads_unload_rep(esw, rep, rep_type); if (mlx5_ecpf_vport_exists(esw->dev)) { @@ -2259,9 +2263,11 @@ int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num) if (esw->mode != MLX5_ESWITCH_OFFLOADS) return 0; - err = mlx5_esw_offloads_devlink_port_register(esw, vport_num); - if (err) - return err; + if (vport_num != MLX5_VPORT_UPLINK) { + err = mlx5_esw_offloads_devlink_port_register(esw, vport_num); + if (err) + return err; + } err = mlx5_esw_offloads_rep_load(esw, vport_num); if (err) @@ -2269,7 +2275,8 @@ int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num) return err; load_err: - mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); + if (vport_num != MLX5_VPORT_UPLINK) + mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); return err; } @@ -2279,7 +2286,9 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) return; mlx5_esw_offloads_rep_unload(esw, vport_num); - mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); + + if (vport_num != MLX5_VPORT_UPLINK) + mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); } #define ESW_OFFLOADS_DEVCOM_PAIR (0) @@ -2288,13 +2297,8 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, struct mlx5_eswitch *peer_esw) { - int err; - - err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); - if (err) - return err; - return 0; + return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); } static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) @@ -2419,8 +2423,7 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); } -static bool -esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) +bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) { if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl)) return false; @@ -2489,25 +2492,25 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw, static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; - int i; + unsigned long i; if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) return; - mlx5_esw_for_all_vports_reverse(esw, i, vport) + mlx5_esw_for_each_vport(esw, i, vport) esw_offloads_vport_metadata_cleanup(esw, vport); } static int esw_offloads_metadata_init(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; + unsigned long i; int err; - int i; if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) return 0; - mlx5_esw_for_all_vports(esw, i, vport) { + mlx5_esw_for_each_vport(esw, i, vport) { err = esw_offloads_vport_metadata_setup(esw, vport); if (err) goto metadata_err; @@ -2520,6 +2523,28 @@ metadata_err: return err; } +int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable) +{ + int err = 0; + + down_write(&esw->mode_lock); + if (esw->mode != MLX5_ESWITCH_NONE) { + err = -EBUSY; + goto done; + } + if (!mlx5_esw_vport_match_metadata_supported(esw)) { + err = -EOPNOTSUPP; + goto done; + } + if (enable) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + else + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; +done: + up_write(&esw->mode_lock); + return err; +} + int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) @@ -2554,6 +2579,9 @@ static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) struct mlx5_vport *vport; vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(vport)) + return PTR_ERR(vport); + return esw_vport_create_offloads_acl_tables(esw, vport); } @@ -2562,6 +2590,9 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) struct mlx5_vport *vport; vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(vport)) + return; + esw_vport_destroy_offloads_acl_tables(esw, vport); } @@ -2573,6 +2604,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.vports.lock); hash_init(esw->fdb_table.offloads.vports.table); + atomic64_set(&esw->user_count, 0); indir = mlx5_esw_indir_table_init(); if (IS_ERR(indir)) { @@ -2715,10 +2747,25 @@ static int mlx5_esw_host_number_init(struct mlx5_eswitch *esw) return 0; } +bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller) +{ + /* Local controller is always valid */ + if (controller == 0) + return true; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return false; + + /* External host number starts with zero in device */ + return (controller == esw->offloads.host_number + 1); +} + int esw_offloads_enable(struct mlx5_eswitch *esw) { + struct mapping_ctx *reg_c0_obj_pool; struct mlx5_vport *vport; - int err, i; + unsigned long i; + int err; if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap)) @@ -2733,9 +2780,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_metadata; - if (esw_check_vport_match_metadata_supported(esw)) - esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; - err = esw_offloads_metadata_init(esw); if (err) goto err_metadata; @@ -2744,6 +2788,15 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_vport_metadata; + reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj), + ESW_REG_C0_USER_DATA_METADATA_MASK, + true); + if (IS_ERR(reg_c0_obj_pool)) { + err = PTR_ERR(reg_c0_obj_pool); + goto err_pool; + } + esw->offloads.reg_c0_obj_pool = reg_c0_obj_pool; + err = esw_offloads_steering_init(esw); if (err) goto err_steering_init; @@ -2770,11 +2823,12 @@ err_vports: err_uplink: esw_offloads_steering_cleanup(esw); err_steering_init: + mapping_destroy(reg_c0_obj_pool); +err_pool: esw_set_passing_vport_metadata(esw, false); err_vport_metadata: esw_offloads_metadata_uninit(esw); err_metadata: - esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); return err; @@ -2808,8 +2862,8 @@ void esw_offloads_disable(struct mlx5_eswitch *esw) esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); + mapping_destroy(esw->offloads.reg_c0_obj_pool); esw_offloads_metadata_uninit(esw); - esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; @@ -2914,8 +2968,14 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; - mutex_lock(&esw->mode_lock); - cur_mlx5_mode = esw->mode; + err = mlx5_esw_try_lock(esw); + if (err < 0) { + NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy"); + return err; + } + cur_mlx5_mode = err; + err = 0; + if (cur_mlx5_mode == mlx5_mode) goto unlock; @@ -2927,7 +2987,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, err = -EINVAL; unlock: - mutex_unlock(&esw->mode_lock); + mlx5_esw_unlock(esw); return err; } @@ -2940,14 +3000,45 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) if (IS_ERR(esw)) return PTR_ERR(esw); - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; err = esw_mode_to_devlink(esw->mode, mode); unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); + return err; +} + +static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_vport *vport; + u16 err_vport_num = 0; + unsigned long i; + int err = 0; + + mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode); + if (err) { + err_vport_num = vport->vport; + NL_SET_ERR_MSG_MOD(extack, + "Failed to set min inline on vport"); + goto revert_inline_mode; + } + } + return 0; + +revert_inline_mode: + mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + if (vport->vport == err_vport_num) + break; + mlx5_modify_nic_vport_min_inline(dev, + vport->vport, + esw->offloads.inline_mode); + } return err; } @@ -2955,15 +3046,15 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); - int err, vport, num_vport; struct mlx5_eswitch *esw; u8 mlx5_mode; + int err; esw = mlx5_devlink_eswitch_get(devlink); if (IS_ERR(esw)) return PTR_ERR(esw); - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto out; @@ -2992,27 +3083,16 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (err) goto out; - mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) { - err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode); - if (err) { - NL_SET_ERR_MSG_MOD(extack, - "Failed to set min inline on vport"); - goto revert_inline_mode; - } - } + err = mlx5_esw_vports_inline_set(esw, mlx5_mode, extack); + if (err) + goto out; esw->offloads.inline_mode = mlx5_mode; - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return 0; -revert_inline_mode: - num_vport = --vport; - mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport) - mlx5_modify_nic_vport_min_inline(dev, - vport, - esw->offloads.inline_mode); out: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; } @@ -3025,14 +3105,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) if (IS_ERR(esw)) return PTR_ERR(esw); - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; } @@ -3048,7 +3128,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, if (IS_ERR(esw)) return PTR_ERR(esw); - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; @@ -3094,7 +3174,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, } unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; } @@ -3109,14 +3189,14 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, return PTR_ERR(esw); - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; *encap = esw->offloads.encap; unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return 0; } @@ -3141,11 +3221,12 @@ void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, { struct mlx5_eswitch_rep_data *rep_data; struct mlx5_eswitch_rep *rep; - int i; + unsigned long i; esw->offloads.rep_ops[rep_type] = ops; - mlx5_esw_for_all_reps(esw, i, rep) { - if (likely(mlx5_eswitch_vport_has_rep(esw, i))) { + mlx5_esw_for_each_rep(esw, i, rep) { + if (likely(mlx5_eswitch_vport_has_rep(esw, rep->vport))) { + rep->esw = esw; rep_data = &rep->rep_data[rep_type]; atomic_set(&rep_data->state, REP_REGISTERED); } @@ -3156,12 +3237,12 @@ EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; - int i; + unsigned long i; if (esw->mode == MLX5_ESWITCH_OFFLOADS) __unload_reps_all_vport(esw, rep_type); - mlx5_esw_for_all_reps(esw, i, rep) + mlx5_esw_for_each_rep(esw, i, rep) atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); } EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); @@ -3202,12 +3283,6 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, } EXPORT_SYMBOL(mlx5_eswitch_vport_rep); -bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num) -{ - return vport_num >= MLX5_VPORT_FIRST_VF && - vport_num <= esw->dev->priv.sriov.max_vfs; -} - bool mlx5_eswitch_reg_c1_loopback_enabled(const struct mlx5_eswitch *esw) { return !!(esw->flags & MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED); @@ -3233,7 +3308,7 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port, - u16 vport_num, u32 sfnum) + u16 vport_num, u32 controller, u32 sfnum) { int err; @@ -3241,7 +3316,7 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p if (err) return err; - err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, sfnum); + err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, controller, sfnum); if (err) goto devlink_err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index ec679560a95d..a81ece94f599 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -83,14 +83,16 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, ft_attr.autogroup.max_num_groups = 1; tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); if (IS_ERR(tt->termtbl)) { - esw_warn(dev, "Failed to create termination table\n"); + esw_warn(dev, "Failed to create termination table (error %d)\n", + IS_ERR(tt->termtbl)); return -EOPNOTSUPP; } tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act, &tt->dest, 1); if (IS_ERR(tt->rule)) { - esw_warn(dev, "Failed to create termination table rule\n"); + esw_warn(dev, "Failed to create termination table rule (error %d)\n", + IS_ERR(tt->rule)); goto add_flow_err; } return 0; @@ -140,10 +142,9 @@ mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, memcpy(&tt->flow_act, flow_act, sizeof(*flow_act)); err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act); - if (err) { - esw_warn(esw->dev, "Failed to create termination table\n"); + if (err) goto tt_create_err; - } + hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key); tt_add_ref: tt->ref_count++; @@ -282,7 +283,8 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, &dest[i], attr); if (IS_ERR(tt)) { - esw_warn(esw->dev, "Failed to create termination table\n"); + esw_warn(esw->dev, "Failed to get termination table (error %d)\n", + IS_ERR(tt)); goto revert_changes; } attr->dests[num_vport_dests].termtbl = tt; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 80da50e12915..bd66ab2af5b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -575,6 +575,7 @@ static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn, MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size)); MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); + MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma); if (MLX5_CAP_GEN(mdev, cqe_version) == 1) MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 22bee4990232..0bba92cf5dc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -707,7 +707,7 @@ static void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, } if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) { - err = ida_simple_get(&fipsec->halloc, 1, 0, GFP_KERNEL); + err = ida_alloc_min(&fipsec->halloc, 1, GFP_KERNEL); if (err < 0) { context = ERR_PTR(err); goto exists; @@ -758,7 +758,7 @@ delete_hash: unlock_hash: mutex_unlock(&fipsec->sa_hash_lock); if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) - ida_simple_remove(&fipsec->halloc, sa_ctx->sa_handle); + ida_free(&fipsec->halloc, sa_ctx->sa_handle); exists: mutex_unlock(&fpga_xfrm->lock); kfree(sa_ctx); @@ -850,9 +850,9 @@ mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx) return; } - if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action & + if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) - ida_simple_remove(&fipsec->halloc, sa_ctx->sa_handle); + ida_free(&fipsec->halloc, sa_ctx->sa_handle); mutex_lock(&fipsec->sa_hash_lock); WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash, @@ -1085,6 +1085,7 @@ static int fpga_ipsec_fs_create_fte(struct mlx5_flow_root_namespace *ns, rule->ctx = mlx5_fpga_ipsec_fs_create_sa_ctx(dev, fte, is_egress); if (IS_ERR(rule->ctx)) { int err = PTR_ERR(rule->ctx); + kfree(rule); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 66ad599bd488..f74d2c834037 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -105,7 +105,7 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Promiscuous, Vlan, mac, ttc, inner ttc, {aRFS/accel and esp/esp_err} */ +/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}} */ #define KERNEL_NIC_PRIO_NUM_LEVELS 7 #define KERNEL_NIC_NUM_PRIOS 1 /* One more level for tc */ @@ -590,7 +590,7 @@ static void del_sw_fte(struct fs_node *node) &fte->hash, rhash_fte); WARN_ON(err); - ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); + ida_free(&fg->fte_allocator, fte->index - fg->start_index); kmem_cache_free(steering->ftes_cache, fte); } @@ -640,7 +640,7 @@ static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) int index; int ret; - index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL); + index = ida_alloc_max(&fg->fte_allocator, fg->max_ftes - 1, GFP_KERNEL); if (index < 0) return index; @@ -656,7 +656,7 @@ static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) return 0; err_ida_remove: - ida_simple_remove(&fg->fte_allocator, index); + ida_free(&fg->fte_allocator, index); return ret; } @@ -2229,17 +2229,21 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d { struct mlx5_flow_steering *steering = dev->priv.steering; - if (!steering || vport >= mlx5_eswitch_get_total_vports(dev)) + if (!steering) return NULL; switch (type) { case MLX5_FLOW_NAMESPACE_ESW_EGRESS: + if (vport >= steering->esw_egress_acl_vports) + return NULL; if (steering->esw_egress_root_ns && steering->esw_egress_root_ns[vport]) return &steering->esw_egress_root_ns[vport]->ns; else return NULL; case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + if (vport >= steering->esw_ingress_acl_vports) + return NULL; if (steering->esw_ingress_root_ns && steering->esw_ingress_root_ns[vport]) return &steering->esw_ingress_root_ns[vport]->ns; @@ -2395,14 +2399,12 @@ static int init_root_tree(struct mlx5_flow_steering *steering, struct init_tree_node *init_node, struct fs_node *fs_parent_node) { - int i; - struct mlx5_flow_namespace *fs_ns; int err; + int i; - fs_get_obj(fs_ns, fs_parent_node); for (i = 0; i < init_node->ar_size; i++) { err = init_root_tree_recursive(steering, &init_node->children[i], - &fs_ns->node, + fs_parent_node, init_node, i); if (err) return err; @@ -2573,43 +2575,11 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns) clean_tree(&root_ns->ns.node); } -static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev) -{ - struct mlx5_flow_steering *steering = dev->priv.steering; - int i; - - if (!steering->esw_egress_root_ns) - return; - - for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++) - cleanup_root_ns(steering->esw_egress_root_ns[i]); - - kfree(steering->esw_egress_root_ns); - steering->esw_egress_root_ns = NULL; -} - -static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev) -{ - struct mlx5_flow_steering *steering = dev->priv.steering; - int i; - - if (!steering->esw_ingress_root_ns) - return; - - for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++) - cleanup_root_ns(steering->esw_ingress_root_ns[i]); - - kfree(steering->esw_ingress_root_ns); - steering->esw_ingress_root_ns = NULL; -} - void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; cleanup_root_ns(steering->root_ns); - cleanup_egress_acls_root_ns(dev); - cleanup_ingress_acls_root_ns(dev); cleanup_root_ns(steering->fdb_root_ns); steering->fdb_root_ns = NULL; kfree(steering->fdb_sub_ns); @@ -2854,10 +2824,9 @@ static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vpo return PTR_ERR_OR_ZERO(prio); } -static int init_egress_acls_root_ns(struct mlx5_core_dev *dev) +int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports) { struct mlx5_flow_steering *steering = dev->priv.steering; - int total_vports = mlx5_eswitch_get_total_vports(dev); int err; int i; @@ -2873,7 +2842,7 @@ static int init_egress_acls_root_ns(struct mlx5_core_dev *dev) if (err) goto cleanup_root_ns; } - + steering->esw_egress_acl_vports = total_vports; return 0; cleanup_root_ns: @@ -2884,10 +2853,24 @@ cleanup_root_ns: return err; } -static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev) +void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int i; + + if (!steering->esw_egress_root_ns) + return; + + for (i = 0; i < steering->esw_egress_acl_vports; i++) + cleanup_root_ns(steering->esw_egress_root_ns[i]); + + kfree(steering->esw_egress_root_ns); + steering->esw_egress_root_ns = NULL; +} + +int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports) { struct mlx5_flow_steering *steering = dev->priv.steering; - int total_vports = mlx5_eswitch_get_total_vports(dev); int err; int i; @@ -2903,7 +2886,7 @@ static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev) if (err) goto cleanup_root_ns; } - + steering->esw_ingress_acl_vports = total_vports; return 0; cleanup_root_ns: @@ -2914,6 +2897,21 @@ cleanup_root_ns: return err; } +void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int i; + + if (!steering->esw_ingress_root_ns) + return; + + for (i = 0; i < steering->esw_ingress_acl_vports; i++) + cleanup_root_ns(steering->esw_ingress_root_ns[i]); + + kfree(steering->esw_ingress_root_ns); + steering->esw_ingress_root_ns = NULL; +} + static int init_egress_root_ns(struct mlx5_flow_steering *steering) { int err; @@ -2976,16 +2974,6 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) if (err) goto err; } - if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { - err = init_egress_acls_root_ns(dev); - if (err) - goto err; - } - if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { - err = init_ingress_acls_root_ns(dev); - if (err) - goto err; - } } if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index b24a9849c45e..e577a2c424af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -129,6 +129,8 @@ struct mlx5_flow_steering { struct mlx5_flow_root_namespace *rdma_rx_root_ns; struct mlx5_flow_root_namespace *rdma_tx_root_ns; struct mlx5_flow_root_namespace *egress_root_ns; + int esw_egress_acl_vports; + int esw_ingress_acl_vports; }; struct fs_node { @@ -287,6 +289,11 @@ int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, int mlx5_init_fs(struct mlx5_core_dev *dev); void mlx5_cleanup_fs(struct mlx5_core_dev *dev); +int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports); +void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev); +int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports); +void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev); + #define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } #define fs_list_for_each_entry(pos, root) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index f43caefd07a1..18e5aec14641 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -497,13 +497,13 @@ static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc); bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1; - bulk = kzalloc(sizeof(*bulk) + bulk_len * sizeof(struct mlx5_fc), - GFP_KERNEL); + bulk = kvzalloc(sizeof(*bulk) + bulk_len * sizeof(struct mlx5_fc), + GFP_KERNEL); if (!bulk) goto err_alloc_bulk; - bulk->bitmask = kcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), - GFP_KERNEL); + bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), + GFP_KERNEL); if (!bulk->bitmask) goto err_alloc_bitmask; @@ -521,9 +521,9 @@ static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) return bulk; err_mlx5_cmd_bulk_alloc: - kfree(bulk->bitmask); + kvfree(bulk->bitmask); err_alloc_bitmask: - kfree(bulk); + kvfree(bulk); err_alloc_bulk: return ERR_PTR(err); } @@ -537,8 +537,8 @@ mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk) } mlx5_cmd_fc_free(dev, bulk->base_id); - kfree(bulk->bitmask); - kfree(bulk); + kvfree(bulk->bitmask); + kvfree(bulk); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index f9042e147c7f..d5d57630015f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -104,7 +104,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { - mlx5_load_one(dev, false); + mlx5_load_one(dev); devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); @@ -119,7 +119,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work) int err; mlx5_enter_error_state(dev, true); - mlx5_unload_one(dev, false); + mlx5_unload_one(dev); err = mlx5_health_wait_pci_up(dev); if (err) mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); @@ -199,16 +199,11 @@ static void mlx5_fw_live_patch_event(struct work_struct *work) struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, fw_live_patch_work); struct mlx5_core_dev *dev = fw_reset->dev; - struct mlx5_fw_tracer *tracer; mlx5_core_info(dev, "Live patch updated firmware version: %d.%d.%d\n", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev)); - tracer = dev->tracer; - if (IS_ERR_OR_NULL(tracer)) - return; - - if (mlx5_fw_tracer_reload(tracer)) + if (mlx5_fw_tracer_reload(dev->tracer)) mlx5_core_err(dev, "Failed to reload FW tracer\n"); } @@ -342,7 +337,7 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) } mlx5_enter_error_state(dev, true); - mlx5_unload_one(dev, false); + mlx5_unload_one(dev); done: fw_reset->ret = err; mlx5_fw_reset_complete_reload(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 0c32c485eb58..9ff163c5bcde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -335,12 +335,12 @@ static int mlx5_health_try_recover(struct mlx5_core_dev *dev) return -EIO; } mlx5_core_err(dev, "starting health recovery flow\n"); - mlx5_recover_device(dev); - if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) || - mlx5_health_check_fatal_sensors(dev)) { + if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) { mlx5_core_err(dev, "health recovery failed\n"); return -EIO; } + + mlx5_core_info(dev, "health recovery succeeded\n"); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 1eeca45cfcdf..7d7ed025db0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -33,6 +33,7 @@ #include <rdma/ib_verbs.h> #include <linux/mlx5/fs.h> #include "en.h" +#include "en/params.h" #include "ipoib.h" #define IB_DEFAULT_Q_KEY 0xb1b @@ -233,6 +234,7 @@ int mlx5i_create_underlay_qp(struct mlx5e_priv *priv) } qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(priv->mdev)); MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, @@ -371,6 +373,7 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) static int mlx5i_init_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + u16 max_nch = priv->max_nch; int err; mlx5e_create_q_counters(priv); @@ -385,7 +388,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv, priv->direct_tir); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_rqts; @@ -393,7 +396,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv, priv->direct_tir); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch); if (err) goto err_destroy_indirect_tirs; @@ -404,11 +407,11 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) return 0; err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -420,10 +423,12 @@ err_destroy_q_counters: static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { + u16 max_nch = priv->max_nch; + mlx5i_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv, priv->direct_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch); mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv, priv->direct_tir); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@ -468,6 +473,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), .stats_grps = mlx5i_stats_grps, .stats_grps_num = mlx5i_stats_grps_num, + .rx_ptp_support = false, }; /* mlx5i netdev NDos */ @@ -475,28 +481,19 @@ static const struct mlx5e_profile mlx5i_nic_profile = { static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); - struct mlx5e_channels new_channels = {}; - struct mlx5e_params *params; + struct mlx5e_params new_params; int err = 0; mutex_lock(&priv->state_lock); - params = &priv->channels.params; - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - params->sw_mtu = new_mtu; - netdev->mtu = params->sw_mtu; - goto out; - } - - new_channels.params = *params; - new_channels.params.sw_mtu = new_mtu; + new_params = priv->channels.params; + new_params.sw_mtu = new_mtu; - err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); + err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true); if (err) goto out; - netdev->mtu = new_channels.params.sw_mtu; + netdev->mtu = new_params.sw_mtu; out: mutex_unlock(&priv->state_lock); @@ -694,6 +691,7 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) static void mlx5_rdma_netdev_free(struct net_device *netdev) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; struct mlx5i_priv *ipriv = priv->ppriv; const struct mlx5e_profile *profile = priv->profile; @@ -702,13 +700,13 @@ static void mlx5_rdma_netdev_free(struct net_device *netdev) if (!ipriv->sub_interface) { mlx5i_pkey_qpn_ht_cleanup(netdev); - mlx5e_destroy_mdev_resources(priv->mdev); + mlx5e_destroy_mdev_resources(mdev); } } static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev) { - return mdev->mlx5e_res.pdn != 0; + return mdev->mlx5e_res.hw_objs.pdn != 0; } static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev) @@ -718,7 +716,7 @@ static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev) return &mlx5i_nic_profile; } -static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num, +static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u32 port_num, struct net_device *netdev, void *param) { struct mlx5_core_dev *mdev = (struct mlx5_core_dev *)param; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 3d0a18a0bed4..18ee21b06a00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -350,6 +350,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = { .rx_handlers = &mlx5i_rx_handlers, .max_tc = MLX5I_MAX_NUM_TC, .rq_groups = MLX5E_NUM_RQ_GROUPS(REGULAR), + .rx_ptp_support = false, }; const struct mlx5e_profile *mlx5i_pkey_get_profile(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 83a05371e2aa..b8748390335f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -603,8 +603,6 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) if (err) mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", err); - - return; } /* Must be called with intf_mutex held */ @@ -768,7 +766,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, spin_lock(&lag_lock); ldev = mlx5_lag_dev_get(dev); - if (ldev && __mlx5_lag_is_roce(ldev)) { + if (ldev && __mlx5_lag_is_active(ldev)) { num_ports = MLX5_MAX_PORTS; mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev; mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 88e58ac902de..2c41a6920264 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -35,7 +35,7 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) } /** - * Set lag port affinity + * mlx5_lag_set_port_affinity * * @ldev: lag device * @port: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index b0e129d0f6d8..ce696d523493 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -495,15 +495,15 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, return -EINVAL; field_select = MLX5_MTPPS_FS_ENABLE; + pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, rq->perout.index); + if (pin < 0) + return -EBUSY; + if (on) { bool rt_mode = mlx5_real_time_mode(mdev); u32 nsec; s64 sec; - pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, rq->perout.index); - if (pin < 0) - return -EBUSY; - pin_mode = MLX5_PIN_MODE_OUT; pattern = MLX5_OUT_PATTERN_PERIODIC; ts.tv_sec = rq->perout.period.sec; @@ -645,16 +645,19 @@ static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) return PTP_PF_NONE; } -static int mlx5_init_pin_config(struct mlx5_clock *clock) +static void mlx5_init_pin_config(struct mlx5_clock *clock) { int i; + if (!clock->ptp_info.n_pins) + return; + clock->ptp_info.pin_config = kcalloc(clock->ptp_info.n_pins, sizeof(*clock->ptp_info.pin_config), GFP_KERNEL); if (!clock->ptp_info.pin_config) - return -ENOMEM; + return; clock->ptp_info.enable = mlx5_ptp_enable; clock->ptp_info.verify = mlx5_ptp_verify; clock->ptp_info.pps = 1; @@ -667,8 +670,6 @@ static int mlx5_init_pin_config(struct mlx5_clock *clock) clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i); clock->ptp_info.pin_config[i].chan = 0; } - - return 0; } static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) @@ -859,6 +860,17 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) } } +static void mlx5_init_pps(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + if (!MLX5_PPS_CAP(mdev)) + return; + + mlx5_get_pps_caps(mdev); + mlx5_init_pin_config(clock); +} + void mlx5_init_clock(struct mlx5_core_dev *mdev) { struct mlx5_clock *clock = &mdev->clock; @@ -876,10 +888,7 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) clock->ptp_info = mlx5_ptp_clock_info; /* Initialize 1PPS data structures */ - if (MLX5_PPS_CAP(mdev)) - mlx5_get_pps_caps(mdev); - if (clock->ptp_info.n_pins) - mlx5_init_pin_config(clock); + mlx5_init_pps(mdev); clock->ptp = ptp_clock_register(&clock->ptp_info, &mdev->pdev->dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index a12c7da618a7..ceae6bc378e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -105,4 +105,15 @@ static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock, } #endif +static inline cqe_ts_to_ns mlx5_rq_ts_translator(struct mlx5_core_dev *mdev) +{ + return mlx5_is_real_time_rq(mdev) ? mlx5_real_time_cyc2time : + mlx5_timecounter_cyc2time; +} + +static inline cqe_ts_to_ns mlx5_sq_ts_translator(struct mlx5_core_dev *mdev) +{ + return mlx5_is_real_time_sq(mdev) ? mlx5_real_time_cyc2time : + mlx5_timecounter_cyc2time; +} #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c index 57eb91bcbca7..e995f8378df7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -46,7 +46,7 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); - MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.pdn); + MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn); err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (!err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 81f2cc4ca1da..f607a3858ef5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -22,15 +22,15 @@ struct mlx5_cq_table { }; struct mlx5_eq { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; struct mlx5_core_dev *dev; struct mlx5_cq_table cq_table; __be32 __iomem *doorbell; u32 cons_index; - struct mlx5_frag_buf buf; unsigned int vecidx; unsigned int irqn; u8 eqn; - int nent; struct mlx5_rsc_debug *dbg; }; @@ -47,16 +47,21 @@ struct mlx5_eq_comp { struct list_head list; }; +static inline u32 eq_get_size(struct mlx5_eq *eq) +{ + return eq->fbc.sz_m1 + 1; +} + static inline struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) { - return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE); + return mlx5_frag_buf_get_wqe(&eq->fbc, entry); } static inline struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq) { - struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1)); + struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & eq->fbc.sz_m1); - return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe; + return (eqe->owner ^ (eq->cons_index >> eq->fbc.log_sz)) & 1 ? NULL : eqe; } static inline void eq_update_ci(struct mlx5_eq *eq, int arm) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 381325b4a863..00ef10a1a9f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -7,15 +7,11 @@ #include "lib/fs_chains.h" #include "en/mapping.h" -#include "mlx5_core.h" #include "fs_core.h" -#include "eswitch.h" -#include "en.h" #include "en_tc.h" #define chains_lock(chains) ((chains)->lock) #define chains_ht(chains) ((chains)->chains_ht) -#define chains_mapping(chains) ((chains)->chains_mapping) #define prios_ht(chains) ((chains)->prios_ht) #define ft_pool_left(chains) ((chains)->ft_left) #define tc_default_ft(chains) ((chains)->tc_default_ft) @@ -300,7 +296,7 @@ create_chain_restore(struct fs_chain *chain) !mlx5_chains_prios_supported(chains)) return 0; - err = mapping_add(chains_mapping(chains), &chain->chain, &index); + err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index); if (err) return err; if (index == MLX5_FS_DEFAULT_FLOW_TAG) { @@ -310,10 +306,8 @@ create_chain_restore(struct fs_chain *chain) * * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0. */ - err = mapping_add(chains_mapping(chains), - &chain->chain, &index); - mapping_remove(chains_mapping(chains), - MLX5_FS_DEFAULT_FLOW_TAG); + err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index); + mapping_remove(chains->chains_mapping, MLX5_FS_DEFAULT_FLOW_TAG); if (err) return err; } @@ -361,7 +355,7 @@ err_mod_hdr: mlx5_del_flow_rules(chain->restore_rule); err_rule: /* Datapath can't find this mapping, so we can safely remove it */ - mapping_remove(chains_mapping(chains), chain->id); + mapping_remove(chains->chains_mapping, chain->id); return err; } @@ -376,7 +370,7 @@ static void destroy_chain_restore(struct fs_chain *chain) mlx5_del_flow_rules(chain->restore_rule); mlx5_modify_header_dealloc(chains->dev, chain->miss_modify_hdr); - mapping_remove(chains_mapping(chains), chain->id); + mapping_remove(chains->chains_mapping, chain->id); } static struct fs_chain * @@ -797,7 +791,6 @@ static struct mlx5_fs_chains * mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) { struct mlx5_fs_chains *chains_priv; - struct mapping_ctx *mapping; u32 max_flow_counter; int err; @@ -816,6 +809,7 @@ mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) chains_priv->flags = attr->flags; chains_priv->ns = attr->ns; chains_priv->group_num = attr->max_grp_num; + chains_priv->chains_mapping = attr->mapping; tc_default_ft(chains_priv) = tc_end_ft(chains_priv) = attr->default_ft; mlx5_core_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", @@ -832,20 +826,10 @@ mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr) if (err) goto init_prios_ht_err; - mapping = mapping_create(sizeof(u32), attr->max_restore_tag, - true); - if (IS_ERR(mapping)) { - err = PTR_ERR(mapping); - goto mapping_err; - } - chains_mapping(chains_priv) = mapping; - mutex_init(&chains_lock(chains_priv)); return chains_priv; -mapping_err: - rhashtable_destroy(&prios_ht(chains_priv)); init_prios_ht_err: rhashtable_destroy(&chains_ht(chains_priv)); init_chains_ht_err: @@ -857,7 +841,6 @@ static void mlx5_chains_cleanup(struct mlx5_fs_chains *chains) { mutex_destroy(&chains_lock(chains)); - mapping_destroy(chains_mapping(chains)); rhashtable_destroy(&prios_ht(chains)); rhashtable_destroy(&chains_ht(chains)); @@ -884,25 +867,18 @@ int mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain, u32 *chain_mapping) { - return mapping_add(chains_mapping(chains), &chain, chain_mapping); + struct mapping_ctx *ctx = chains->chains_mapping; + struct mlx5_mapped_obj mapped_obj = {}; + + mapped_obj.type = MLX5_MAPPED_OBJ_CHAIN; + mapped_obj.chain = chain; + return mapping_add(ctx, &mapped_obj, chain_mapping); } int mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping) { - return mapping_remove(chains_mapping(chains), chain_mapping); -} - -int mlx5_get_chain_for_tag(struct mlx5_fs_chains *chains, u32 tag, - u32 *chain) -{ - int err; + struct mapping_ctx *ctx = chains->chains_mapping; - err = mapping_find(chains_mapping(chains), tag, chain); - if (err) { - mlx5_core_warn(chains->dev, "Can't find chain for tag: %d\n", tag); - return -ENOENT; - } - - return 0; + return mapping_remove(ctx, chain_mapping); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h index 6d5be31b05dd..e96f345e7dae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h @@ -7,6 +7,7 @@ #include <linux/mlx5/fs.h> struct mlx5_fs_chains; +struct mlx5_mapped_obj; enum mlx5_chains_flags { MLX5_CHAINS_AND_PRIOS_SUPPORTED = BIT(0), @@ -20,7 +21,7 @@ struct mlx5_chains_attr { u32 max_ft_sz; u32 max_grp_num; struct mlx5_flow_table *default_ft; - u32 max_restore_tag; + struct mapping_ctx *mapping; }; #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) @@ -63,9 +64,6 @@ struct mlx5_fs_chains * mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr); void mlx5_chains_destroy(struct mlx5_fs_chains *chains); -int -mlx5_get_chain_for_tag(struct mlx5_fs_chains *chains, u32 tag, u32 *chain); - void mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains, struct mlx5_flow_table *ft); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c index a68738c8f4bc..96ffc0a0e670 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c @@ -55,10 +55,6 @@ void mlx5_cleanup_reserved_gids(struct mlx5_core_dev *dev) int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count) { - if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { - mlx5_core_err(dev, "Cannot reserve GIDs when interfaces are up\n"); - return -EPERM; - } if (dev->roce.reserved_gids.start < count) { mlx5_core_warn(dev, "GID table exhausted attempting to reserve %d more GIDs\n", count); @@ -79,7 +75,6 @@ int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count) void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count) { - WARN(test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state), "Unreserving GIDs when interfaces are up"); WARN(count > dev->roce.reserved_gids.count, "Unreserving %u GIDs when only %u reserved", count, dev->roce.reserved_gids.count); @@ -93,12 +88,12 @@ void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count) int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index) { int end = dev->roce.reserved_gids.start + - dev->roce.reserved_gids.count; + dev->roce.reserved_gids.count - 1; int index = 0; - index = ida_simple_get(&dev->roce.reserved_gids.ida, - dev->roce.reserved_gids.start, end, - GFP_KERNEL); + index = ida_alloc_range(&dev->roce.reserved_gids.ida, + dev->roce.reserved_gids.start, end, + GFP_KERNEL); if (index < 0) return index; @@ -110,7 +105,7 @@ int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index) void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index) { mlx5_core_dbg(dev, "Freeing reserved GID %u\n", gid_index); - ida_simple_remove(&dev->roce.reserved_gids.ida, gid_index); + ida_free(&dev->roce.reserved_gids.ida, gid_index); } unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev) @@ -142,10 +137,10 @@ int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, } ether_addr_copy(addr_mac, mac); - MLX5_SET_RA(in_addr, roce_version, roce_version); - MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type); memcpy(addr_l3_addr, gid, gidsz); } + MLX5_SET_RA(in_addr, roce_version, roce_version); + MLX5_SET_RA(in_addr, roce_l3_type, roce_l3_type); if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0) MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index d046db7bb047..2f536c5d30b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -95,4 +95,13 @@ static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) return devlink_net(priv_to_devlink(dev)); } +static inline void mlx5_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev) +{ + mdev->mlx5e_res.uplink_netdev = netdev; +} + +static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev) +{ + return mdev->mlx5e_res.uplink_netdev; +} #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c568896cfb23..c114365eb126 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -571,6 +571,10 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) mlx5_vhca_state_cap_handle(dev, set_hca_cap); + if (MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)) + MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix, + MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } @@ -1235,7 +1239,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_put_uars_page(dev, dev->priv.uar); } -int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) +int mlx5_init_one(struct mlx5_core_dev *dev) { int err = 0; @@ -1247,16 +1251,14 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) /* remove any previous indication of internal error */ dev->state = MLX5_DEVICE_STATE_UP; - err = mlx5_function_setup(dev, boot); + err = mlx5_function_setup(dev, true); if (err) goto err_function; - if (boot) { - err = mlx5_init_once(dev); - if (err) { - mlx5_core_err(dev, "sw objs init failed\n"); - goto function_teardown; - } + err = mlx5_init_once(dev); + if (err) { + mlx5_core_err(dev, "sw objs init failed\n"); + goto function_teardown; } err = mlx5_load(dev); @@ -1265,16 +1267,11 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - if (boot) { - err = mlx5_devlink_register(priv_to_devlink(dev), dev->device); - if (err) - goto err_devlink_reg; - - err = mlx5_register_device(dev); - } else { - err = mlx5_attach_device(dev); - } + err = mlx5_devlink_register(priv_to_devlink(dev), dev->device); + if (err) + goto err_devlink_reg; + err = mlx5_register_device(dev); if (err) goto err_register; @@ -1282,16 +1279,14 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) return 0; err_register: - if (boot) - mlx5_devlink_unregister(priv_to_devlink(dev)); + mlx5_devlink_unregister(priv_to_devlink(dev)); err_devlink_reg: clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); err_load: - if (boot) - mlx5_cleanup_once(dev); + mlx5_cleanup_once(dev); function_teardown: - mlx5_function_teardown(dev, boot); + mlx5_function_teardown(dev, true); err_function: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; out: @@ -1299,33 +1294,84 @@ out: return err; } -void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) +void mlx5_uninit_one(struct mlx5_core_dev *dev) { mutex_lock(&dev->intf_state_mutex); - if (cleanup) { - mlx5_unregister_device(dev); - mlx5_devlink_unregister(priv_to_devlink(dev)); - } else { - mlx5_detach_device(dev); - } + mlx5_unregister_device(dev); + mlx5_devlink_unregister(priv_to_devlink(dev)); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", __func__); - if (cleanup) - mlx5_cleanup_once(dev); + mlx5_cleanup_once(dev); goto out; } clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + mlx5_unload(dev); + mlx5_cleanup_once(dev); + mlx5_function_teardown(dev, true); +out: + mutex_unlock(&dev->intf_state_mutex); +} +int mlx5_load_one(struct mlx5_core_dev *dev) +{ + int err = 0; + + mutex_lock(&dev->intf_state_mutex); + if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + mlx5_core_warn(dev, "interface is up, NOP\n"); + goto out; + } + /* remove any previous indication of internal error */ + dev->state = MLX5_DEVICE_STATE_UP; + + err = mlx5_function_setup(dev, false); + if (err) + goto err_function; + + err = mlx5_load(dev); + if (err) + goto err_load; + + set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + + err = mlx5_attach_device(dev); + if (err) + goto err_attach; + + mutex_unlock(&dev->intf_state_mutex); + return 0; + +err_attach: + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); +err_load: + mlx5_function_teardown(dev, false); +err_function: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; +out: + mutex_unlock(&dev->intf_state_mutex); + return err; +} - if (cleanup) - mlx5_cleanup_once(dev); +void mlx5_unload_one(struct mlx5_core_dev *dev) +{ + mutex_lock(&dev->intf_state_mutex); - mlx5_function_teardown(dev, cleanup); + mlx5_detach_device(dev); + + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { + mlx5_core_warn(dev, "%s: interface is down, NOP\n", + __func__); + goto out; + } + + clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); + mlx5_unload(dev); + mlx5_function_teardown(dev, false); out: mutex_unlock(&dev->intf_state_mutex); } @@ -1397,7 +1443,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mutex_destroy(&dev->intf_state_mutex); } -static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) +static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx5_core_dev *dev; struct devlink *devlink; @@ -1433,11 +1479,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto pci_init_err; } - err = mlx5_load_one(dev, true); + err = mlx5_init_one(dev); if (err) { - mlx5_core_err(dev, "mlx5_load_one failed with error code %d\n", + mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n", err); - goto err_load_one; + goto err_init_one; } err = mlx5_crdump_enable(dev); @@ -1449,7 +1495,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) devlink_reload_enable(devlink); return 0; -err_load_one: +err_init_one: mlx5_pci_close(dev); pci_init_err: mlx5_mdev_uninit(dev); @@ -1469,7 +1515,7 @@ static void remove_one(struct pci_dev *pdev) devlink_reload_disable(devlink); mlx5_crdump_disable(dev); mlx5_drain_health_wq(dev); - mlx5_unload_one(dev, true); + mlx5_uninit_one(dev); mlx5_pci_close(dev); mlx5_mdev_uninit(dev); mlx5_adev_idx_free(dev->priv.adev_idx); @@ -1485,7 +1531,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev, false); mlx5_error_sw_reset(dev); - mlx5_unload_one(dev, false); + mlx5_unload_one(dev); mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); @@ -1555,7 +1601,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev) mlx5_core_info(dev, "%s was called\n", __func__); - err = mlx5_load_one(dev, false); + err = mlx5_load_one(dev); if (err) mlx5_core_err(dev, "%s: mlx5_load_one failed with error code: %d\n", __func__, err); @@ -1627,7 +1673,7 @@ static void shutdown(struct pci_dev *pdev) mlx5_core_info(dev, "Shutdown was called\n"); err = mlx5_try_fast_unload(dev); if (err) - mlx5_unload_one(dev, false); + mlx5_unload_one(dev); mlx5_pci_disable_device(dev); } @@ -1635,7 +1681,7 @@ static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - mlx5_unload_one(dev, false); + mlx5_unload_one(dev); return 0; } @@ -1644,7 +1690,7 @@ static int mlx5_resume(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - return mlx5_load_one(dev, false); + return mlx5_load_one(dev); } static const struct pci_device_id mlx5_core_pci_table[] = { @@ -1676,26 +1722,31 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); void mlx5_disable_device(struct mlx5_core_dev *dev) { mlx5_error_sw_reset(dev); - mlx5_unload_one(dev, false); + mlx5_unload_one(dev); } -void mlx5_recover_device(struct mlx5_core_dev *dev) +int mlx5_recover_device(struct mlx5_core_dev *dev) { + int ret = -EIO; + mlx5_pci_disable_device(dev); if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) - mlx5_pci_resume(dev->pdev); + ret = mlx5_load_one(dev); + return ret; } static struct pci_driver mlx5_core_driver = { .name = KBUILD_MODNAME, .id_table = mlx5_core_pci_table, - .probe = init_one, + .probe = probe_one, .remove = remove_one, .suspend = mlx5_suspend, .resume = mlx5_resume, .shutdown = shutdown, .err_handler = &mlx5_err_handler, .sriov_configure = mlx5_core_sriov_configure, + .sriov_get_vf_total_msix = mlx5_sriov_get_vf_total_msix, + .sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count, }; static void mlx5_core_verify_params(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index efe403c7e354..a22b706eebd3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -134,12 +134,13 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev); u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev); int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); -void mlx5_recover_device(struct mlx5_core_dev *dev); +int mlx5_recover_device(struct mlx5_core_dev *dev); int mlx5_sriov_init(struct mlx5_core_dev *dev); void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_sriov_attach(struct mlx5_core_dev *dev); void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, @@ -174,6 +175,11 @@ int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb); int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb); + +int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn, + int msix_vec_count); +int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs); + struct cpumask * mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx); struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table); @@ -267,10 +273,18 @@ static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev) int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx); void mlx5_mdev_uninit(struct mlx5_core_dev *dev); -void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); -int mlx5_load_one(struct mlx5_core_dev *dev, bool boot); +int mlx5_init_one(struct mlx5_core_dev *dev); +void mlx5_uninit_one(struct mlx5_core_dev *dev); +void mlx5_unload_one(struct mlx5_core_dev *dev); +int mlx5_load_one(struct mlx5_core_dev *dev); int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out); void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); +static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + + return MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); +} #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index c0656d4782e1..110c0837f95b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -61,7 +61,7 @@ struct fw_page { u32 function; unsigned long bitmask; struct list_head list; - unsigned free_count; + unsigned int free_count; }; enum { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index a61e09aff152..1f907df5b3a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -61,6 +61,79 @@ static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) return &irq_table->irq[vecidx]; } +/** + * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors + * to be ssigned to each VF. + * @dev: PF to work on + * @num_vfs: Number of enabled VFs + */ +int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs) +{ + int num_vf_msix, min_msix, max_msix; + + num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); + if (!num_vf_msix) + return 0; + + min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); + max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); + + /* Limit maximum number of MSI-X vectors so the default configuration + * has some available in the pool. This will allow the user to increase + * the number of vectors in a VF without having to first size-down other + * VFs. + */ + return max(min(num_vf_msix / num_vfs, max_msix / 2), min_msix); +} + +/** + * mlx5_set_msix_vec_count - Set dynamically allocated MSI-X on the VF + * @dev: PF to work on + * @function_id: Internal PCI VF function IDd + * @msix_vec_count: Number of MSI-X vectors to set + */ +int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, + int msix_vec_count) +{ + int sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + int num_vf_msix, min_msix, max_msix; + void *hca_cap, *cap; + int ret; + + num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); + if (!num_vf_msix) + return 0; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || !mlx5_core_is_pf(dev)) + return -EOPNOTSUPP; + + min_msix = MLX5_CAP_GEN(dev, min_dynamic_vf_msix_table_size); + max_msix = MLX5_CAP_GEN(dev, max_dynamic_vf_msix_table_size); + + if (msix_vec_count < min_msix) + return -EINVAL; + + if (msix_vec_count > max_msix) + return -EOVERFLOW; + + hca_cap = kzalloc(sz, GFP_KERNEL); + if (!hca_cap) + return -ENOMEM; + + cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability); + MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count); + + MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); + + MLX5_SET(set_hca_cap_in, hca_cap, op_mod, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1); + ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap); + kfree(hca_cap); + return ret; +} + int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, struct notifier_block *nb) { @@ -94,7 +167,6 @@ static void irq_set_name(char *name, int vecidx) snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx - MLX5_IRQ_VEC_COMP_BASE); - return; } static int request_irqs(struct mlx5_core_dev *dev, int nvec) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 4bb219565c58..1ef2b6a848c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -353,69 +353,123 @@ static void mlx5_sfp_eeprom_params_set(u16 *i2c_addr, int *page_num, u16 *offset *offset -= MLX5_EEPROM_PAGE_LENGTH; } -int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, - u16 offset, u16 size, u8 *data) +static int mlx5_query_mcia(struct mlx5_core_dev *dev, + struct mlx5_module_eeprom_query_params *params, u8 *data) { - int module_num, status, err, page_num = 0; u32 in[MLX5_ST_SZ_DW(mcia_reg)] = {}; u32 out[MLX5_ST_SZ_DW(mcia_reg)]; - u16 i2c_addr = 0; - u8 module_id; + int status, err; void *ptr; + u16 size; - err = mlx5_query_module_num(dev, &module_num); + size = min_t(int, params->size, MLX5_EEPROM_MAX_BYTES); + + MLX5_SET(mcia_reg, in, l, 0); + MLX5_SET(mcia_reg, in, size, size); + MLX5_SET(mcia_reg, in, module, params->module_number); + MLX5_SET(mcia_reg, in, device_address, params->offset); + MLX5_SET(mcia_reg, in, page_number, params->page); + MLX5_SET(mcia_reg, in, i2c_device_address, params->i2c_address); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCIA, 0, 0); if (err) return err; - err = mlx5_query_module_id(dev, module_num, &module_id); + status = MLX5_GET(mcia_reg, out, status); + if (status) { + mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", + status); + return -EIO; + } + + ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + memcpy(data, ptr, size); + + return size; +} + +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data) +{ + struct mlx5_module_eeprom_query_params query = {0}; + u8 module_id; + int err; + + err = mlx5_query_module_num(dev, &query.module_number); + if (err) + return err; + + err = mlx5_query_module_id(dev, query.module_number, &module_id); if (err) return err; switch (module_id) { case MLX5_MODULE_ID_SFP: - mlx5_sfp_eeprom_params_set(&i2c_addr, &page_num, &offset); + mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset); break; case MLX5_MODULE_ID_QSFP: case MLX5_MODULE_ID_QSFP_PLUS: case MLX5_MODULE_ID_QSFP28: - mlx5_qsfp_eeprom_params_set(&i2c_addr, &page_num, &offset); + mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset); break; default: mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); return -EINVAL; } - if (offset + size > MLX5_EEPROM_PAGE_LENGTH) + if (query.offset + size > MLX5_EEPROM_PAGE_LENGTH) /* Cross pages read, read until offset 256 in low page */ size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; - size = min_t(int, size, MLX5_EEPROM_MAX_BYTES); + query.size = size; - MLX5_SET(mcia_reg, in, l, 0); - MLX5_SET(mcia_reg, in, module, module_num); - MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr); - MLX5_SET(mcia_reg, in, page_number, page_num); - MLX5_SET(mcia_reg, in, device_address, offset); - MLX5_SET(mcia_reg, in, size, size); + return mlx5_query_mcia(dev, &query, data); +} +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_MCIA, 0, 0); +int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, + struct mlx5_module_eeprom_query_params *params, + u8 *data) +{ + u8 module_id; + int err; + + err = mlx5_query_module_num(dev, ¶ms->module_number); if (err) return err; - status = MLX5_GET(mcia_reg, out, status); - if (status) { - mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", - status); - return -EIO; + err = mlx5_query_module_id(dev, params->module_number, &module_id); + if (err) + return err; + + switch (module_id) { + case MLX5_MODULE_ID_SFP: + if (params->page > 0) + return -EINVAL; + break; + case MLX5_MODULE_ID_QSFP: + case MLX5_MODULE_ID_QSFP28: + case MLX5_MODULE_ID_QSFP_PLUS: + if (params->page > 3) + return -EINVAL; + break; + case MLX5_MODULE_ID_DSFP: + break; + default: + mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); + return -EINVAL; } - ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); - memcpy(data, ptr, size); + if (params->i2c_address != MLX5_I2C_ADDR_HIGH && + params->i2c_address != MLX5_I2C_ADDR_LOW) { + mlx5_core_err(dev, "I2C address not recognized: 0x%x\n", params->i2c_address); + return -EINVAL; + } - return size; + return mlx5_query_mcia(dev, params, data); } -EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom_by_page); static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, int pvlc_size, u8 local_port) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 8e0dddc6383f..441b5453acae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -180,5 +180,4 @@ del_roce_addr: mlx5_rdma_del_roce_addr(dev); disable_roce: mlx5_nic_vport_disable_roce(dev); - return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index 99039c47ef33..7161220afe30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -117,6 +117,9 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, bool empty_found = false; int i; + lockdep_assert_held(&table->rl_lock); + WARN_ON(!table->rl_entry); + for (i = 0; i < table->max_size; i++) { if (dedicated) { if (!table->rl_entry[i].refcount) @@ -172,38 +175,103 @@ bool mlx5_rl_are_equal(struct mlx5_rate_limit *rl_0, } EXPORT_SYMBOL(mlx5_rl_are_equal); +static int mlx5_rl_table_get(struct mlx5_rl_table *table) +{ + int i; + + lockdep_assert_held(&table->rl_lock); + + if (table->rl_entry) { + table->refcount++; + return 0; + } + + table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry), + GFP_KERNEL); + if (!table->rl_entry) + return -ENOMEM; + + /* The index represents the index in HW rate limit table + * Index 0 is reserved for unlimited rate + */ + for (i = 0; i < table->max_size; i++) + table->rl_entry[i].index = i + 1; + + table->refcount++; + return 0; +} + +static void mlx5_rl_table_put(struct mlx5_rl_table *table) +{ + lockdep_assert_held(&table->rl_lock); + if (--table->refcount) + return; + + kfree(table->rl_entry); + table->rl_entry = NULL; +} + +static void mlx5_rl_table_free(struct mlx5_core_dev *dev, struct mlx5_rl_table *table) +{ + int i; + + if (!table->rl_entry) + return; + + /* Clear all configured rates */ + for (i = 0; i < table->max_size; i++) + if (table->rl_entry[i].refcount) + mlx5_set_pp_rate_limit_cmd(dev, &table->rl_entry[i], false); + kfree(table->rl_entry); +} + +static void mlx5_rl_entry_get(struct mlx5_rl_entry *entry) +{ + entry->refcount++; +} + +static void +mlx5_rl_entry_put(struct mlx5_core_dev *dev, struct mlx5_rl_entry *entry) +{ + entry->refcount--; + if (!entry->refcount) + mlx5_set_pp_rate_limit_cmd(dev, entry, false); +} + int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid, bool dedicated_entry, u16 *index) { struct mlx5_rl_table *table = &dev->priv.rl_table; struct mlx5_rl_entry *entry; - int err = 0; u32 rate; + int err; - rate = MLX5_GET(set_pp_rate_limit_context, rl_in, rate_limit); - mutex_lock(&table->rl_lock); + if (!table->max_size) + return -EOPNOTSUPP; + rate = MLX5_GET(set_pp_rate_limit_context, rl_in, rate_limit); if (!rate || !mlx5_rl_is_in_range(dev, rate)) { mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n", rate, table->min_rate, table->max_rate); - err = -EINVAL; - goto out; + return -EINVAL; } + mutex_lock(&table->rl_lock); + err = mlx5_rl_table_get(table); + if (err) + goto out; + entry = find_rl_entry(table, rl_in, uid, dedicated_entry); if (!entry) { mlx5_core_err(dev, "Max number of %u rates reached\n", table->max_size); err = -ENOSPC; - goto out; + goto rl_err; } - if (entry->refcount) { - /* rate already configured */ - entry->refcount++; - } else { + if (!entry->refcount) { + /* new rate limit */ memcpy(entry->rl_raw, rl_in, sizeof(entry->rl_raw)); entry->uid = uid; - /* new rate limit */ err = mlx5_set_pp_rate_limit_cmd(dev, entry, true); if (err) { mlx5_core_err( @@ -214,14 +282,18 @@ int mlx5_rl_add_rate_raw(struct mlx5_core_dev *dev, void *rl_in, u16 uid, burst_upper_bound), MLX5_GET(set_pp_rate_limit_context, rl_in, typical_packet_size)); - goto out; + goto rl_err; } - entry->refcount = 1; entry->dedicated = dedicated_entry; } + mlx5_rl_entry_get(entry); *index = entry->index; + mutex_unlock(&table->rl_lock); + return 0; +rl_err: + mlx5_rl_table_put(table); out: mutex_unlock(&table->rl_lock); return err; @@ -235,10 +307,8 @@ void mlx5_rl_remove_rate_raw(struct mlx5_core_dev *dev, u16 index) mutex_lock(&table->rl_lock); entry = &table->rl_entry[index - 1]; - entry->refcount--; - if (!entry->refcount) - /* need to remove rate */ - mlx5_set_pp_rate_limit_cmd(dev, entry, false); + mlx5_rl_entry_put(dev, entry); + mlx5_rl_table_put(table); mutex_unlock(&table->rl_lock); } EXPORT_SYMBOL(mlx5_rl_remove_rate_raw); @@ -286,12 +356,8 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, struct mlx5_rate_limit *rl) rl->rate, rl->max_burst_sz, rl->typical_pkt_sz); goto out; } - - entry->refcount--; - if (!entry->refcount) - /* need to remove rate */ - mlx5_set_pp_rate_limit_cmd(dev, entry, false); - + mlx5_rl_entry_put(dev, entry); + mlx5_rl_table_put(table); out: mutex_unlock(&table->rl_lock); } @@ -300,31 +366,19 @@ EXPORT_SYMBOL(mlx5_rl_remove_rate); int mlx5_init_rl_table(struct mlx5_core_dev *dev) { struct mlx5_rl_table *table = &dev->priv.rl_table; - int i; - mutex_init(&table->rl_lock); if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) { table->max_size = 0; return 0; } + mutex_init(&table->rl_lock); + /* First entry is reserved for unlimited rate */ table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1; table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate); table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate); - table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry), - GFP_KERNEL); - if (!table->rl_entry) - return -ENOMEM; - - /* The index represents the index in HW rate limit table - * Index 0 is reserved for unlimited rate - */ - for (i = 0; i < table->max_size; i++) - table->rl_entry[i].index = i + 1; - - /* Index 0 is reserved */ mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n", table->max_size, table->min_rate >> 10, @@ -336,13 +390,10 @@ int mlx5_init_rl_table(struct mlx5_core_dev *dev) void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) { struct mlx5_rl_table *table = &dev->priv.rl_table; - int i; - /* Clear all configured rates */ - for (i = 0; i < table->max_size; i++) - if (table->rl_entry[i].refcount) - mlx5_set_pp_rate_limit_cmd(dev, &table->rl_entry[i], - false); + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) + return; - kfree(dev->priv.rl_table.rl_entry); + mlx5_rl_table_free(dev, table); + mutex_destroy(&table->rl_lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c index b265f27b2166..6a0c6f965ad1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -148,9 +148,19 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_ struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb); const struct mlx5_vhca_state_event *event = data; struct mlx5_sf_dev *sf_dev; + u16 max_functions; u16 sf_index; + u16 base_id; + + max_functions = mlx5_sf_max_functions(table->dev); + if (!max_functions) + return 0; + + base_id = MLX5_CAP_GEN(table->dev, sf_base_id); + if (event->function_id < base_id || event->function_id >= (base_id + max_functions)) + return 0; - sf_index = event->function_id - MLX5_CAP_GEN(table->dev, sf_base_id); + sf_index = event->function_id - base_id; sf_dev = xa_load(&table->devices, sf_index); switch (event->new_vhca_state) { case MLX5_VHCA_STATE_ALLOCATED: @@ -181,15 +191,13 @@ static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table) u16 max_functions; u16 function_id; int err = 0; - bool ecpu; int i; max_functions = mlx5_sf_max_functions(dev); function_id = MLX5_CAP_GEN(dev, sf_base_id); - ecpu = mlx5_read_embedded_cpu(dev); /* Arm the vhca context as the vhca event notifier */ for (i = 0; i < max_functions; i++) { - err = mlx5_vhca_event_arm(dev, function_id, ecpu); + err = mlx5_vhca_event_arm(dev, function_id); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h index 4de02902aef1..149fd9e698cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h @@ -47,7 +47,7 @@ static inline void mlx5_sf_driver_unregister(void) static inline bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev) { - return 0; + return false; } #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index c4bf555c25ea..42c8ee03fe3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -41,14 +41,15 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia goto remap_err; } - err = mlx5_load_one(mdev, true); + err = mlx5_init_one(mdev); if (err) { - mlx5_core_warn(mdev, "mlx5_load_one err=%d\n", err); - goto load_one_err; + mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err); + goto init_one_err; } + devlink_reload_enable(devlink); return 0; -load_one_err: +init_one_err: iounmap(mdev->iseg); remap_err: mlx5_mdev_uninit(mdev); @@ -63,7 +64,8 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev) struct devlink *devlink; devlink = priv_to_devlink(sf_dev->mdev); - mlx5_unload_one(sf_dev->mdev, true); + devlink_reload_disable(devlink); + mlx5_uninit_one(sf_dev->mdev); iounmap(sf_dev->mdev->iseg); mlx5_mdev_uninit(sf_dev->mdev); mlx5_devlink_free(devlink); @@ -73,7 +75,7 @@ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev) { struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); - mlx5_unload_one(sf_dev->mdev, false); + mlx5_unload_one(sf_dev->mdev); } static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index c2ba41bb7a70..a8e73c9ed1ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -12,6 +12,7 @@ struct mlx5_sf { struct devlink_port dl_port; unsigned int port_index; + u32 controller; u16 id; u16 hw_fn_id; u16 hw_state; @@ -58,7 +59,8 @@ static void mlx5_sf_id_erase(struct mlx5_sf_table *table, struct mlx5_sf *sf) } static struct mlx5_sf * -mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *extack) +mlx5_sf_alloc(struct mlx5_sf_table *table, struct mlx5_eswitch *esw, + u32 controller, u32 sfnum, struct netlink_ext_ack *extack) { unsigned int dl_port_index; struct mlx5_sf *sf; @@ -66,7 +68,12 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex int id_err; int err; - id_err = mlx5_sf_hw_table_sf_alloc(table->dev, sfnum); + if (!mlx5_esw_offloads_controller_valid(esw, controller)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid controller number"); + return ERR_PTR(-EINVAL); + } + + id_err = mlx5_sf_hw_table_sf_alloc(table->dev, controller, sfnum); if (id_err < 0) { err = id_err; goto id_err; @@ -78,11 +85,12 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex goto alloc_err; } sf->id = id_err; - hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sf->id); + hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, controller, sf->id); dl_port_index = mlx5_esw_vport_to_devlink_port_index(table->dev, hw_fn_id); sf->port_index = dl_port_index; sf->hw_fn_id = hw_fn_id; sf->hw_state = MLX5_VHCA_STATE_ALLOCATED; + sf->controller = controller; err = mlx5_sf_id_insert(table, sf); if (err) @@ -93,7 +101,7 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex insert_err: kfree(sf); alloc_err: - mlx5_sf_hw_table_sf_free(table->dev, id_err); + mlx5_sf_hw_table_sf_free(table->dev, controller, id_err); id_err: if (err == -EEXIST) NL_SET_ERR_MSG_MOD(extack, "SF already exist. Choose different sfnum"); @@ -103,7 +111,7 @@ id_err: static void mlx5_sf_free(struct mlx5_sf_table *table, struct mlx5_sf *sf) { mlx5_sf_id_erase(table, sf); - mlx5_sf_hw_table_sf_free(table->dev, sf->id); + mlx5_sf_hw_table_sf_free(table->dev, sf->controller, sf->id); kfree(sf); } @@ -270,15 +278,14 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, { struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_sf *sf; - u16 hw_fn_id; int err; - sf = mlx5_sf_alloc(table, new_attr->sfnum, extack); + sf = mlx5_sf_alloc(table, esw, new_attr->controller, new_attr->sfnum, extack); if (IS_ERR(sf)) return PTR_ERR(sf); - hw_fn_id = mlx5_sf_sw_to_hw_id(dev, sf->id); - err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, hw_fn_id, new_attr->sfnum); + err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, sf->hw_fn_id, + new_attr->controller, new_attr->sfnum); if (err) goto esw_err; *new_port_index = sf->port_index; @@ -307,7 +314,8 @@ mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_ "User must provide unique sfnum. Driver does not support auto assignment"); return -EOPNOTSUPP; } - if (new_attr->controller_valid && new_attr->controller) { + if (new_attr->controller_valid && new_attr->controller && + !mlx5_core_is_ecpf_esw_manager(dev)) { NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported"); return -EOPNOTSUPP; } @@ -353,10 +361,10 @@ static void mlx5_sf_dealloc(struct mlx5_sf_table *table, struct mlx5_sf *sf) * firmware gives confirmation that it is detached by the driver. */ mlx5_cmd_sf_disable_hca(table->dev, sf->hw_fn_id); - mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->id); + mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id); kfree(sf); } else { - mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->id); + mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->controller, sf->id); kfree(sf); } } @@ -438,9 +446,6 @@ sf_err: static void mlx5_sf_table_enable(struct mlx5_sf_table *table) { - if (!mlx5_sf_max_functions(table->dev)) - return; - init_completion(&table->disable_complete); refcount_set(&table->refcount, 1); } @@ -463,9 +468,6 @@ static void mlx5_sf_deactivate_all(struct mlx5_sf_table *table) static void mlx5_sf_table_disable(struct mlx5_sf_table *table) { - if (!mlx5_sf_max_functions(table->dev)) - return; - if (!refcount_read(&table->refcount)) return; @@ -492,14 +494,15 @@ static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, voi break; default: break; - }; + } return 0; } static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev) { - return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) && mlx5_sf_supported(dev); + return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) && + mlx5_sf_hw_table_supported(dev); } int mlx5_sf_table_init(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c index 58b6be0b03d7..ef5f892aafad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c @@ -5,8 +5,10 @@ #include "priv.h" #include "sf.h" #include "mlx5_ifc_vhca_event.h" -#include "vhca_event.h" #include "ecpf.h" +#include "vhca_event.h" +#include "mlx5_core.h" +#include "eswitch.h" struct mlx5_sf_hw { u32 usr_sfnum; @@ -14,164 +16,276 @@ struct mlx5_sf_hw { u8 pending_delete: 1; }; +struct mlx5_sf_hwc_table { + struct mlx5_sf_hw *sfs; + int max_fn; + u16 start_fn_id; +}; + +enum mlx5_sf_hwc_index { + MLX5_SF_HWC_LOCAL, + MLX5_SF_HWC_EXTERNAL, + MLX5_SF_HWC_MAX, +}; + struct mlx5_sf_hw_table { struct mlx5_core_dev *dev; - struct mlx5_sf_hw *sfs; - int max_local_functions; - u8 ecpu: 1; struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */ struct notifier_block vhca_nb; + struct mlx5_sf_hwc_table hwc[MLX5_SF_HWC_MAX]; }; -u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id) +static struct mlx5_sf_hwc_table * +mlx5_sf_controller_to_hwc(struct mlx5_core_dev *dev, u32 controller) { - return sw_id + mlx5_sf_start_function_id(dev); + int idx = !!controller; + + return &dev->priv.sf_hw_table->hwc[idx]; } -static u16 mlx5_sf_hw_to_sw_id(const struct mlx5_core_dev *dev, u16 hw_id) +u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id) { - return hw_id - mlx5_sf_start_function_id(dev); + struct mlx5_sf_hwc_table *hwc; + + hwc = mlx5_sf_controller_to_hwc(dev, controller); + return hwc->start_fn_id + sw_id; } -int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum) +static u16 mlx5_sf_hw_to_sw_id(struct mlx5_sf_hwc_table *hwc, u16 hw_id) +{ + return hw_id - hwc->start_fn_id; +} + +static struct mlx5_sf_hwc_table * +mlx5_sf_table_fn_to_hwc(struct mlx5_sf_hw_table *table, u16 fn_id) { - struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; - int sw_id = -ENOSPC; - u16 hw_fn_id; - int err; int i; - if (!table->max_local_functions) - return -EOPNOTSUPP; + for (i = 0; i < ARRAY_SIZE(table->hwc); i++) { + if (table->hwc[i].max_fn && + fn_id >= table->hwc[i].start_fn_id && + fn_id < (table->hwc[i].start_fn_id + table->hwc[i].max_fn)) + return &table->hwc[i]; + } + return NULL; +} + +static int mlx5_sf_hw_table_id_alloc(struct mlx5_sf_hw_table *table, u32 controller, + u32 usr_sfnum) +{ + struct mlx5_sf_hwc_table *hwc; + int i; + + hwc = mlx5_sf_controller_to_hwc(table->dev, controller); + if (!hwc->sfs) + return -ENOSPC; - mutex_lock(&table->table_lock); /* Check if sf with same sfnum already exists or not. */ - for (i = 0; i < table->max_local_functions; i++) { - if (table->sfs[i].allocated && table->sfs[i].usr_sfnum == usr_sfnum) { - err = -EEXIST; - goto exist_err; - } + for (i = 0; i < hwc->max_fn; i++) { + if (hwc->sfs[i].allocated && hwc->sfs[i].usr_sfnum == usr_sfnum) + return -EEXIST; } - /* Find the free entry and allocate the entry from the array */ - for (i = 0; i < table->max_local_functions; i++) { - if (!table->sfs[i].allocated) { - table->sfs[i].usr_sfnum = usr_sfnum; - table->sfs[i].allocated = true; - sw_id = i; - break; + for (i = 0; i < hwc->max_fn; i++) { + if (!hwc->sfs[i].allocated) { + hwc->sfs[i].usr_sfnum = usr_sfnum; + hwc->sfs[i].allocated = true; + return i; } } - if (sw_id == -ENOSPC) { - err = -ENOSPC; - goto err; + return -ENOSPC; +} + +static void mlx5_sf_hw_table_id_free(struct mlx5_sf_hw_table *table, u32 controller, int id) +{ + struct mlx5_sf_hwc_table *hwc; + + hwc = mlx5_sf_controller_to_hwc(table->dev, controller); + hwc->sfs[id].allocated = false; + hwc->sfs[id].pending_delete = false; +} + +int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + u16 hw_fn_id; + int sw_id; + int err; + + if (!table) + return -EOPNOTSUPP; + + mutex_lock(&table->table_lock); + sw_id = mlx5_sf_hw_table_id_alloc(table, controller, usr_sfnum); + if (sw_id < 0) { + err = sw_id; + goto exist_err; } - hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sw_id); - err = mlx5_cmd_alloc_sf(table->dev, hw_fn_id); + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, sw_id); + err = mlx5_cmd_alloc_sf(dev, hw_fn_id); if (err) goto err; - err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, table->ecpu, usr_sfnum); + err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, usr_sfnum); if (err) goto vhca_err; + if (controller) { + /* If this SF is for external controller, SF manager + * needs to arm firmware to receive the events. + */ + err = mlx5_vhca_event_arm(dev, hw_fn_id); + if (err) + goto vhca_err; + } + mutex_unlock(&table->table_lock); return sw_id; vhca_err: - mlx5_cmd_dealloc_sf(table->dev, hw_fn_id); + mlx5_cmd_dealloc_sf(dev, hw_fn_id); err: - table->sfs[i].allocated = false; + mlx5_sf_hw_table_id_free(table, controller, sw_id); exist_err: mutex_unlock(&table->table_lock); return err; } -static void _mlx5_sf_hw_id_free(struct mlx5_core_dev *dev, u16 id) +void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id) { struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; u16 hw_fn_id; - hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, id); - mlx5_cmd_dealloc_sf(table->dev, hw_fn_id); - table->sfs[id].allocated = false; - table->sfs[id].pending_delete = false; + mutex_lock(&table->table_lock); + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id); + mlx5_cmd_dealloc_sf(dev, hw_fn_id); + mlx5_sf_hw_table_id_free(table, controller, id); + mutex_unlock(&table->table_lock); } -void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id) +static void mlx5_sf_hw_table_hwc_sf_free(struct mlx5_core_dev *dev, + struct mlx5_sf_hwc_table *hwc, int idx) { - struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; - - mutex_lock(&table->table_lock); - _mlx5_sf_hw_id_free(dev, id); - mutex_unlock(&table->table_lock); + mlx5_cmd_dealloc_sf(dev, hwc->start_fn_id + idx); + hwc->sfs[idx].allocated = false; + hwc->sfs[idx].pending_delete = false; } -void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id) +void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id) { struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + struct mlx5_sf_hwc_table *hwc; u16 hw_fn_id; u8 state; int err; - hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id); + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, controller, id); + hwc = mlx5_sf_controller_to_hwc(dev, controller); mutex_lock(&table->table_lock); - err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, table->ecpu, out, sizeof(out)); + err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, out, sizeof(out)); if (err) goto err; state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); if (state == MLX5_VHCA_STATE_ALLOCATED) { - mlx5_cmd_dealloc_sf(table->dev, hw_fn_id); - table->sfs[id].allocated = false; + mlx5_cmd_dealloc_sf(dev, hw_fn_id); + hwc->sfs[id].allocated = false; } else { - table->sfs[id].pending_delete = true; + hwc->sfs[id].pending_delete = true; } err: mutex_unlock(&table->table_lock); } -static void mlx5_sf_hw_dealloc_all(struct mlx5_sf_hw_table *table) +static void mlx5_sf_hw_table_hwc_dealloc_all(struct mlx5_core_dev *dev, + struct mlx5_sf_hwc_table *hwc) { int i; - for (i = 0; i < table->max_local_functions; i++) { - if (table->sfs[i].allocated) - _mlx5_sf_hw_id_free(table->dev, i); + for (i = 0; i < hwc->max_fn; i++) { + if (hwc->sfs[i].allocated) + mlx5_sf_hw_table_hwc_sf_free(dev, hwc, i); } } +static void mlx5_sf_hw_table_dealloc_all(struct mlx5_sf_hw_table *table) +{ + mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_EXTERNAL]); + mlx5_sf_hw_table_hwc_dealloc_all(table->dev, &table->hwc[MLX5_SF_HWC_LOCAL]); +} + +static int mlx5_sf_hw_table_hwc_init(struct mlx5_sf_hwc_table *hwc, u16 max_fn, u16 base_id) +{ + struct mlx5_sf_hw *sfs; + + if (!max_fn) + return 0; + + sfs = kcalloc(max_fn, sizeof(*sfs), GFP_KERNEL); + if (!sfs) + return -ENOMEM; + + hwc->sfs = sfs; + hwc->max_fn = max_fn; + hwc->start_fn_id = base_id; + return 0; +} + +static void mlx5_sf_hw_table_hwc_cleanup(struct mlx5_sf_hwc_table *hwc) +{ + kfree(hwc->sfs); +} + int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) { struct mlx5_sf_hw_table *table; - struct mlx5_sf_hw *sfs; - int max_functions; + u16 max_ext_fn = 0; + u16 ext_base_id; + u16 max_fn = 0; + u16 base_id; + int err; + + if (!mlx5_vhca_event_supported(dev)) + return 0; + + if (mlx5_sf_supported(dev)) + max_fn = mlx5_sf_max_functions(dev); + + err = mlx5_esw_sf_max_hpf_functions(dev, &max_ext_fn, &ext_base_id); + if (err) + return err; - if (!mlx5_sf_supported(dev) || !mlx5_vhca_event_supported(dev)) + if (!max_fn && !max_ext_fn) return 0; - max_functions = mlx5_sf_max_functions(dev); table = kzalloc(sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; - sfs = kcalloc(max_functions, sizeof(*sfs), GFP_KERNEL); - if (!sfs) - goto table_err; - mutex_init(&table->table_lock); table->dev = dev; - table->sfs = sfs; - table->max_local_functions = max_functions; - table->ecpu = mlx5_read_embedded_cpu(dev); dev->priv.sf_hw_table = table; - mlx5_core_dbg(dev, "SF HW table: max sfs = %d\n", max_functions); + + base_id = mlx5_sf_start_function_id(dev); + err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_LOCAL], max_fn, base_id); + if (err) + goto table_err; + + err = mlx5_sf_hw_table_hwc_init(&table->hwc[MLX5_SF_HWC_EXTERNAL], + max_ext_fn, ext_base_id); + if (err) + goto ext_err; + + mlx5_core_dbg(dev, "SF HW table: max sfs = %d, ext sfs = %d\n", max_fn, max_ext_fn); return 0; +ext_err: + mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]); table_err: + mutex_destroy(&table->table_lock); kfree(table); - return -ENOMEM; + return err; } void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev) @@ -182,7 +296,8 @@ void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev) return; mutex_destroy(&table->table_lock); - kfree(table->sfs); + mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_EXTERNAL]); + mlx5_sf_hw_table_hwc_cleanup(&table->hwc[MLX5_SF_HWC_LOCAL]); kfree(table); } @@ -190,21 +305,26 @@ static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode { struct mlx5_sf_hw_table *table = container_of(nb, struct mlx5_sf_hw_table, vhca_nb); const struct mlx5_vhca_state_event *event = data; + struct mlx5_sf_hwc_table *hwc; struct mlx5_sf_hw *sf_hw; u16 sw_id; if (event->new_vhca_state != MLX5_VHCA_STATE_ALLOCATED) return 0; - sw_id = mlx5_sf_hw_to_sw_id(table->dev, event->function_id); - sf_hw = &table->sfs[sw_id]; + hwc = mlx5_sf_table_fn_to_hwc(table, event->function_id); + if (!hwc) + return 0; + + sw_id = mlx5_sf_hw_to_sw_id(hwc, event->function_id); + sf_hw = &hwc->sfs[sw_id]; mutex_lock(&table->table_lock); /* SF driver notified through firmware that SF is finally detached. * Hence recycle the sf hardware id for reuse. */ if (sf_hw->allocated && sf_hw->pending_delete) - _mlx5_sf_hw_id_free(table->dev, sw_id); + mlx5_sf_hw_table_hwc_sf_free(table->dev, hwc, sw_id); mutex_unlock(&table->table_lock); return 0; } @@ -217,7 +337,7 @@ int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev) return 0; table->vhca_nb.notifier_call = mlx5_sf_hw_vhca_event; - return mlx5_vhca_event_notifier_register(table->dev, &table->vhca_nb); + return mlx5_vhca_event_notifier_register(dev, &table->vhca_nb); } void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev) @@ -227,7 +347,12 @@ void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev) if (!table) return; - mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb); + mlx5_vhca_event_notifier_unregister(dev, &table->vhca_nb); /* Dealloc SFs whose firmware event has been missed. */ - mlx5_sf_hw_dealloc_all(table); + mlx5_sf_hw_table_dealloc_all(table); +} + +bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev) +{ + return !!dev->priv.sf_hw_table; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h index 1daf5a122ba3..4fc870140d71 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h @@ -20,7 +20,7 @@ struct mlx5_ifc_vhca_state_context_bits { u8 sw_function_id[0x20]; - u8 reserved_at_40[0x80]; + u8 reserved_at_40[0x40]; }; struct mlx5_ifc_query_vhca_state_out_bits { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h index cb02a51d0986..7114f3fc335f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h @@ -12,10 +12,11 @@ int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id); int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id); -u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id); +u16 mlx5_sf_sw_to_hw_id(struct mlx5_core_dev *dev, u32 controller, u16 sw_id); -int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum); -void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id); -void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id); +int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 controller, u32 usr_sfnum); +void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u32 controller, u16 id); +void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u32 controller, u16 id); +bool mlx5_sf_hw_table_supported(const struct mlx5_core_dev *dev); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c index af2f2dd9db25..28b14b05086f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c @@ -19,52 +19,51 @@ struct mlx5_vhca_event_work { struct mlx5_vhca_state_event event; }; -int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, - bool ecpu, u32 *out, u32 outlen) +int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, u32 *out, u32 outlen) { u32 in[MLX5_ST_SZ_DW(query_vhca_state_in)] = {}; MLX5_SET(query_vhca_state_in, in, opcode, MLX5_CMD_OP_QUERY_VHCA_STATE); MLX5_SET(query_vhca_state_in, in, function_id, function_id); - MLX5_SET(query_vhca_state_in, in, embedded_cpu_function, ecpu); + MLX5_SET(query_vhca_state_in, in, embedded_cpu_function, 0); return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } static int mlx5_cmd_modify_vhca_state(struct mlx5_core_dev *dev, u16 function_id, - bool ecpu, u32 *in, u32 inlen) + u32 *in, u32 inlen) { u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {}; MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE); MLX5_SET(modify_vhca_state_in, in, function_id, function_id); - MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, ecpu); + MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, 0); return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, bool ecpu, u32 sw_fn_id) +int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, u32 sw_fn_id) { u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {}; u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {}; MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE); MLX5_SET(modify_vhca_state_in, in, function_id, function_id); - MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, ecpu); + MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, 0); MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.sw_function_id, 1); MLX5_SET(modify_vhca_state_in, in, vhca_state_context.sw_function_id, sw_fn_id); return mlx5_cmd_exec_inout(dev, modify_vhca_state, in, out); } -int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id, bool ecpu) +int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id) { u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {}; MLX5_SET(modify_vhca_state_in, in, vhca_state_context.arm_change_event, 1); MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.arm_change_event, 1); - return mlx5_cmd_modify_vhca_state(dev, function_id, ecpu, in, sizeof(in)); + return mlx5_cmd_modify_vhca_state(dev, function_id, in, sizeof(in)); } static void @@ -73,7 +72,7 @@ mlx5_vhca_event_notify(struct mlx5_core_dev *dev, struct mlx5_vhca_state_event * u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; int err; - err = mlx5_cmd_query_vhca_state(dev, event->function_id, event->ecpu, out, sizeof(out)); + err = mlx5_cmd_query_vhca_state(dev, event->function_id, out, sizeof(out)); if (err) return; @@ -82,7 +81,7 @@ mlx5_vhca_event_notify(struct mlx5_core_dev *dev, struct mlx5_vhca_state_event * event->new_vhca_state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); - mlx5_vhca_event_arm(dev, event->function_id, event->ecpu); + mlx5_vhca_event_arm(dev, event->function_id); blocking_notifier_call_chain(&dev->priv.vhca_state_notifier->n_head, 0, event); } @@ -94,6 +93,7 @@ static void mlx5_vhca_state_work_handler(struct work_struct *_work) struct mlx5_core_dev *dev = notifier->dev; mlx5_vhca_event_notify(dev, &work->event); + kfree(work); } static int @@ -110,7 +110,6 @@ mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, v INIT_WORK(&work->work, &mlx5_vhca_state_work_handler); work->notifier = notifier; work->event.function_id = be16_to_cpu(eqe->data.vhca_state.function_id); - work->event.ecpu = be16_to_cpu(eqe->data.vhca_state.ec_function); mlx5_events_work_enqueue(notifier->dev, &work->work); return NOTIFY_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h index 1fe1ec6f4d4b..013cdfe90616 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h @@ -10,7 +10,6 @@ struct mlx5_vhca_state_event { u16 function_id; u16 sw_function_id; u8 new_vhca_state; - bool ecpu; }; static inline bool mlx5_vhca_event_supported(const struct mlx5_core_dev *dev) @@ -25,10 +24,10 @@ void mlx5_vhca_event_start(struct mlx5_core_dev *dev); void mlx5_vhca_event_stop(struct mlx5_core_dev *dev); int mlx5_vhca_event_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); void mlx5_vhca_event_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb); -int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, bool ecpu, u32 sw_fn_id); -int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id, bool ecpu); +int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, u32 sw_fn_id); +int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id); int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, - bool ecpu, u32 *out, u32 outlen); + u32 *out, u32 outlen); #else static inline void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 3094d20297a9..2338989d4403 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -71,8 +71,7 @@ static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int err; - int vf; + int err, vf, num_msix_count; if (!MLX5_ESWITCH_MANAGER(dev)) goto enable_vfs_hca; @@ -85,12 +84,22 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) } enable_vfs_hca: + num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs); for (vf = 0; vf < num_vfs; vf++) { err = mlx5_core_enable_hca(dev, vf + 1); if (err) { mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err); continue; } + + err = mlx5_set_msix_vec_count(dev, vf + 1, num_msix_count); + if (err) { + mlx5_core_warn(dev, + "failed to set MSI-X vector counts VF %d, err %d\n", + vf, err); + continue; + } + sriov->vfs_ctx[vf].enabled = 1; if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { err = sriov_restore_guids(dev, vf); @@ -178,6 +187,41 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) return err ? err : num_vfs; } +int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count) +{ + struct pci_dev *pf = pci_physfn(vf); + struct mlx5_core_sriov *sriov; + struct mlx5_core_dev *dev; + int num_vf_msix, id; + + dev = pci_get_drvdata(pf); + num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); + if (!num_vf_msix) + return -EOPNOTSUPP; + + if (!msix_vec_count) + msix_vec_count = + mlx5_get_default_msix_vec_count(dev, pci_num_vf(pf)); + + sriov = &dev->priv.sriov; + + /* Reversed translation of PCI VF function number to the internal + * function_id, which exists in the name of virtfn symlink. + */ + for (id = 0; id < pci_num_vf(pf); id++) { + if (!sriov->vfs_ctx[id].enabled) + continue; + + if (vf->devfn == pci_iov_virtfn_devfn(pf, id)) + break; + } + + if (id == pci_num_vf(pf) || !sriov->vfs_ctx[id].enabled) + return -EINVAL; + + return mlx5_set_msix_vec_count(dev, id + 1, msix_vec_count); +} + int mlx5_sriov_attach(struct mlx5_core_dev *dev) { if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 28a7971cac6a..949879cf2092 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -313,8 +313,8 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn, * table, since there is an *assumption* that in such case FW * will recalculate the CS. */ - if (dest_action->dest_tbl.is_fw_tbl) { - *final_icm_addr = dest_action->dest_tbl.fw_tbl.rx_icm_addr; + if (dest_action->dest_tbl->is_fw_tbl) { + *final_icm_addr = dest_action->dest_tbl->fw_tbl.rx_icm_addr; } else { mlx5dr_dbg(dmn, "Destination FT should be terminating when modify TTL is used\n"); @@ -326,8 +326,8 @@ static int dr_action_handle_cs_recalc(struct mlx5dr_domain *dmn, /* If destination is vport we will get the FW flow table * that recalculates the CS and forwards to the vport. */ - ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport.dmn, - dest_action->vport.caps->num, + ret = mlx5dr_domain_cache_get_recalc_cs_ft_addr(dest_action->vport->dmn, + dest_action->vport->caps->num, final_icm_addr); if (ret) { mlx5dr_err(dmn, "Failed to get FW cs recalc flow table\n"); @@ -369,6 +369,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->ste_type); for (i = 0; i < num_actions; i++) { + struct mlx5dr_action_dest_tbl *dest_tbl; struct mlx5dr_action *action; int max_actions_type = 1; u32 action_type; @@ -382,37 +383,38 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, break; case DR_ACTION_TYP_FT: dest_action = action; - if (!action->dest_tbl.is_fw_tbl) { - if (action->dest_tbl.tbl->dmn != dmn) { + dest_tbl = action->dest_tbl; + if (!dest_tbl->is_fw_tbl) { + if (dest_tbl->tbl->dmn != dmn) { mlx5dr_err(dmn, "Destination table belongs to a different domain\n"); goto out_invalid_arg; } - if (action->dest_tbl.tbl->level <= matcher->tbl->level) { + if (dest_tbl->tbl->level <= matcher->tbl->level) { mlx5_core_warn_once(dmn->mdev, "Connecting table to a lower/same level destination table\n"); mlx5dr_dbg(dmn, "Connecting table at level %d to a destination table at level %d\n", matcher->tbl->level, - action->dest_tbl.tbl->level); + dest_tbl->tbl->level); } attr.final_icm_addr = rx_rule ? - action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr : - action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr; + dest_tbl->tbl->rx.s_anchor->chunk->icm_addr : + dest_tbl->tbl->tx.s_anchor->chunk->icm_addr; } else { struct mlx5dr_cmd_query_flow_table_details output; int ret; /* get the relevant addresses */ - if (!action->dest_tbl.fw_tbl.rx_icm_addr) { + if (!action->dest_tbl->fw_tbl.rx_icm_addr) { ret = mlx5dr_cmd_query_flow_table(dmn->mdev, - action->dest_tbl.fw_tbl.type, - action->dest_tbl.fw_tbl.id, + dest_tbl->fw_tbl.type, + dest_tbl->fw_tbl.id, &output); if (!ret) { - action->dest_tbl.fw_tbl.tx_icm_addr = + dest_tbl->fw_tbl.tx_icm_addr = output.sw_owner_icm_root_1; - action->dest_tbl.fw_tbl.rx_icm_addr = + dest_tbl->fw_tbl.rx_icm_addr = output.sw_owner_icm_root_0; } else { mlx5dr_err(dmn, @@ -422,50 +424,50 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, } } attr.final_icm_addr = rx_rule ? - action->dest_tbl.fw_tbl.rx_icm_addr : - action->dest_tbl.fw_tbl.tx_icm_addr; + dest_tbl->fw_tbl.rx_icm_addr : + dest_tbl->fw_tbl.tx_icm_addr; } break; case DR_ACTION_TYP_QP: mlx5dr_info(dmn, "Domain doesn't support QP\n"); goto out_invalid_arg; case DR_ACTION_TYP_CTR: - attr.ctr_id = action->ctr.ctr_id + - action->ctr.offeset; + attr.ctr_id = action->ctr->ctr_id + + action->ctr->offeset; break; case DR_ACTION_TYP_TAG: - attr.flow_tag = action->flow_tag; + attr.flow_tag = action->flow_tag->flow_tag; break; case DR_ACTION_TYP_TNL_L2_TO_L2: break; case DR_ACTION_TYP_TNL_L3_TO_L2: - attr.decap_index = action->rewrite.index; - attr.decap_actions = action->rewrite.num_of_actions; + attr.decap_index = action->rewrite->index; + attr.decap_actions = action->rewrite->num_of_actions; attr.decap_with_vlan = attr.decap_actions == WITH_VLAN_NUM_HW_ACTIONS; break; case DR_ACTION_TYP_MODIFY_HDR: - attr.modify_index = action->rewrite.index; - attr.modify_actions = action->rewrite.num_of_actions; - recalc_cs_required = action->rewrite.modify_ttl && + attr.modify_index = action->rewrite->index; + attr.modify_actions = action->rewrite->num_of_actions; + recalc_cs_required = action->rewrite->modify_ttl && !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps); break; case DR_ACTION_TYP_L2_TO_TNL_L2: case DR_ACTION_TYP_L2_TO_TNL_L3: - attr.reformat_size = action->reformat.reformat_size; - attr.reformat_id = action->reformat.reformat_id; + attr.reformat_size = action->reformat->reformat_size; + attr.reformat_id = action->reformat->reformat_id; break; case DR_ACTION_TYP_VPORT: - attr.hit_gvmi = action->vport.caps->vhca_gvmi; + attr.hit_gvmi = action->vport->caps->vhca_gvmi; dest_action = action; if (rx_rule) { /* Loopback on WIRE vport is not supported */ - if (action->vport.caps->num == WIRE_PORT) + if (action->vport->caps->num == WIRE_PORT) goto out_invalid_arg; - attr.final_icm_addr = action->vport.caps->icm_address_rx; + attr.final_icm_addr = action->vport->caps->icm_address_rx; } else { - attr.final_icm_addr = action->vport.caps->icm_address_tx; + attr.final_icm_addr = action->vport->caps->icm_address_tx; } break; case DR_ACTION_TYP_POP_VLAN: @@ -477,7 +479,7 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, if (attr.vlans.count == MLX5DR_MAX_VLANS) return -EINVAL; - attr.vlans.headers[attr.vlans.count++] = action->push_vlan.vlan_hdr; + attr.vlans.headers[attr.vlans.count++] = action->push_vlan->vlan_hdr; break; default: goto out_invalid_arg; @@ -530,17 +532,37 @@ out_invalid_arg: return -EINVAL; } +static unsigned int action_size[DR_ACTION_TYP_MAX] = { + [DR_ACTION_TYP_TNL_L2_TO_L2] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_L2_TO_TNL_L2] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_TNL_L3_TO_L2] = sizeof(struct mlx5dr_action_rewrite), + [DR_ACTION_TYP_L2_TO_TNL_L3] = sizeof(struct mlx5dr_action_reformat), + [DR_ACTION_TYP_FT] = sizeof(struct mlx5dr_action_dest_tbl), + [DR_ACTION_TYP_CTR] = sizeof(struct mlx5dr_action_ctr), + [DR_ACTION_TYP_TAG] = sizeof(struct mlx5dr_action_flow_tag), + [DR_ACTION_TYP_MODIFY_HDR] = sizeof(struct mlx5dr_action_rewrite), + [DR_ACTION_TYP_VPORT] = sizeof(struct mlx5dr_action_vport), + [DR_ACTION_TYP_PUSH_VLAN] = sizeof(struct mlx5dr_action_push_vlan), +}; + static struct mlx5dr_action * dr_action_create_generic(enum mlx5dr_action_type action_type) { struct mlx5dr_action *action; + int extra_size; + + if (action_type < DR_ACTION_TYP_MAX) + extra_size = action_size[action_type]; + else + return NULL; - action = kzalloc(sizeof(*action), GFP_KERNEL); + action = kzalloc(sizeof(*action) + extra_size, GFP_KERNEL); if (!action) return NULL; action->action_type = action_type; refcount_set(&action->refcount, 1); + action->data = action + 1; return action; } @@ -559,10 +581,10 @@ mlx5dr_action_create_dest_table_num(struct mlx5dr_domain *dmn, u32 table_num) if (!action) return NULL; - action->dest_tbl.is_fw_tbl = true; - action->dest_tbl.fw_tbl.dmn = dmn; - action->dest_tbl.fw_tbl.id = table_num; - action->dest_tbl.fw_tbl.type = FS_FT_FDB; + action->dest_tbl->is_fw_tbl = true; + action->dest_tbl->fw_tbl.dmn = dmn; + action->dest_tbl->fw_tbl.id = table_num; + action->dest_tbl->fw_tbl.type = FS_FT_FDB; refcount_inc(&dmn->refcount); return action; @@ -579,7 +601,7 @@ mlx5dr_action_create_dest_table(struct mlx5dr_table *tbl) if (!action) goto dec_ref; - action->dest_tbl.tbl = tbl; + action->dest_tbl->tbl = tbl; return action; @@ -624,12 +646,12 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, case DR_ACTION_TYP_VPORT: hw_dests[i].vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - hw_dests[i].vport.num = dest_action->vport.caps->num; - hw_dests[i].vport.vhca_id = dest_action->vport.caps->vhca_gvmi; + hw_dests[i].vport.num = dest_action->vport->caps->num; + hw_dests[i].vport.vhca_id = dest_action->vport->caps->vhca_gvmi; if (reformat_action) { reformat_req = true; hw_dests[i].vport.reformat_id = - reformat_action->reformat.reformat_id; + reformat_action->reformat->reformat_id; ref_actions[num_of_ref++] = reformat_action; hw_dests[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID; } @@ -637,10 +659,10 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, case DR_ACTION_TYP_FT: hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - if (dest_action->dest_tbl.is_fw_tbl) - hw_dests[i].ft_id = dest_action->dest_tbl.fw_tbl.id; + if (dest_action->dest_tbl->is_fw_tbl) + hw_dests[i].ft_id = dest_action->dest_tbl->fw_tbl.id; else - hw_dests[i].ft_id = dest_action->dest_tbl.tbl->table_id; + hw_dests[i].ft_id = dest_action->dest_tbl->tbl->table_id; break; default: @@ -657,8 +679,8 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, hw_dests, num_of_dests, reformat_req, - &action->dest_tbl.fw_tbl.id, - &action->dest_tbl.fw_tbl.group_id); + &action->dest_tbl->fw_tbl.id, + &action->dest_tbl->fw_tbl.group_id); if (ret) goto free_action; @@ -667,11 +689,11 @@ mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, for (i = 0; i < num_of_ref; i++) refcount_inc(&ref_actions[i]->refcount); - action->dest_tbl.is_fw_tbl = true; - action->dest_tbl.fw_tbl.dmn = dmn; - action->dest_tbl.fw_tbl.type = FS_FT_FDB; - action->dest_tbl.fw_tbl.ref_actions = ref_actions; - action->dest_tbl.fw_tbl.num_of_ref_actions = num_of_ref; + action->dest_tbl->is_fw_tbl = true; + action->dest_tbl->fw_tbl.dmn = dmn; + action->dest_tbl->fw_tbl.type = FS_FT_FDB; + action->dest_tbl->fw_tbl.ref_actions = ref_actions; + action->dest_tbl->fw_tbl.num_of_ref_actions = num_of_ref; kfree(hw_dests); @@ -696,10 +718,10 @@ mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *dmn, if (!action) return NULL; - action->dest_tbl.is_fw_tbl = 1; - action->dest_tbl.fw_tbl.type = ft->type; - action->dest_tbl.fw_tbl.id = ft->id; - action->dest_tbl.fw_tbl.dmn = dmn; + action->dest_tbl->is_fw_tbl = 1; + action->dest_tbl->fw_tbl.type = ft->type; + action->dest_tbl->fw_tbl.id = ft->id; + action->dest_tbl->fw_tbl.dmn = dmn; refcount_inc(&dmn->refcount); @@ -715,7 +737,7 @@ mlx5dr_action_create_flow_counter(u32 counter_id) if (!action) return NULL; - action->ctr.ctr_id = counter_id; + action->ctr->ctr_id = counter_id; return action; } @@ -728,7 +750,7 @@ struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value) if (!action) return NULL; - action->flow_tag = tag_value & 0xffffff; + action->flow_tag->flow_tag = tag_value & 0xffffff; return action; } @@ -794,8 +816,8 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, if (ret) return ret; - action->reformat.reformat_id = reformat_id; - action->reformat.reformat_size = data_sz; + action->reformat->reformat_id = reformat_id; + action->reformat->reformat_size = data_sz; return 0; } case DR_ACTION_TYP_TNL_L2_TO_L2: @@ -811,28 +833,28 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, data, data_sz, hw_actions, ACTION_CACHE_LINE_SIZE, - &action->rewrite.num_of_actions); + &action->rewrite->num_of_actions); if (ret) { mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n"); return ret; } - action->rewrite.chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, - DR_CHUNK_SIZE_8); - if (!action->rewrite.chunk) { + action->rewrite->chunk = mlx5dr_icm_alloc_chunk(dmn->action_icm_pool, + DR_CHUNK_SIZE_8); + if (!action->rewrite->chunk) { mlx5dr_dbg(dmn, "Failed allocating modify header chunk\n"); return -ENOMEM; } - action->rewrite.data = (void *)hw_actions; - action->rewrite.index = (action->rewrite.chunk->icm_addr - + action->rewrite->data = (void *)hw_actions; + action->rewrite->index = (action->rewrite->chunk->icm_addr - dmn->info.caps.hdr_modify_icm_addr) / ACTION_CACHE_LINE_SIZE; ret = mlx5dr_send_postsend_action(dmn, action); if (ret) { mlx5dr_dbg(dmn, "Writing decap l3 actions to ICM failed\n"); - mlx5dr_icm_free_chunk(action->rewrite.chunk); + mlx5dr_icm_free_chunk(action->rewrite->chunk); return ret; } return 0; @@ -867,7 +889,7 @@ struct mlx5dr_action *mlx5dr_action_create_push_vlan(struct mlx5dr_domain *dmn, if (!action) return NULL; - action->push_vlan.vlan_hdr = vlan_hdr_h; + action->push_vlan->vlan_hdr = vlan_hdr_h; return action; } @@ -898,7 +920,7 @@ mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, if (!action) goto dec_ref; - action->reformat.dmn = dmn; + action->reformat->dmn = dmn; ret = dr_action_create_reformat_action(dmn, data_sz, @@ -1104,17 +1126,17 @@ dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action, const __be64 *sw_action) { u16 sw_field = MLX5_GET(set_action_in, sw_action, field); - struct mlx5dr_domain *dmn = action->rewrite.dmn; + struct mlx5dr_domain *dmn = action->rewrite->dmn; if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_A) { - action->rewrite.allow_rx = 0; + action->rewrite->allow_rx = 0; if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) { mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n", sw_field); return -EINVAL; } } else if (sw_field == MLX5_ACTION_IN_FIELD_METADATA_REG_B) { - action->rewrite.allow_tx = 0; + action->rewrite->allow_tx = 0; if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) { mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n", sw_field); @@ -1122,7 +1144,7 @@ dr_action_modify_check_set_field_limitation(struct mlx5dr_action *action, } } - if (!action->rewrite.allow_rx && !action->rewrite.allow_tx) { + if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) { mlx5dr_dbg(dmn, "Modify SET actions not supported on both RX and TX\n"); return -EINVAL; } @@ -1135,7 +1157,7 @@ dr_action_modify_check_add_field_limitation(struct mlx5dr_action *action, const __be64 *sw_action) { u16 sw_field = MLX5_GET(set_action_in, sw_action, field); - struct mlx5dr_domain *dmn = action->rewrite.dmn; + struct mlx5dr_domain *dmn = action->rewrite->dmn; if (sw_field != MLX5_ACTION_IN_FIELD_OUT_IP_TTL && sw_field != MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT && @@ -1153,7 +1175,7 @@ static int dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action, const __be64 *sw_action) { - struct mlx5dr_domain *dmn = action->rewrite.dmn; + struct mlx5dr_domain *dmn = action->rewrite->dmn; u16 sw_fields[2]; int i; @@ -1162,14 +1184,14 @@ dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action, for (i = 0; i < 2; i++) { if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_A) { - action->rewrite.allow_rx = 0; + action->rewrite->allow_rx = 0; if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_TX) { mlx5dr_dbg(dmn, "Unsupported field %d for RX/FDB set action\n", sw_fields[i]); return -EINVAL; } } else if (sw_fields[i] == MLX5_ACTION_IN_FIELD_METADATA_REG_B) { - action->rewrite.allow_tx = 0; + action->rewrite->allow_tx = 0; if (dmn->type != MLX5DR_DOMAIN_TYPE_NIC_RX) { mlx5dr_dbg(dmn, "Unsupported field %d for TX/FDB set action\n", sw_fields[i]); @@ -1178,7 +1200,7 @@ dr_action_modify_check_copy_field_limitation(struct mlx5dr_action *action, } } - if (!action->rewrite.allow_rx && !action->rewrite.allow_tx) { + if (!action->rewrite->allow_rx && !action->rewrite->allow_tx) { mlx5dr_dbg(dmn, "Modify copy actions not supported on both RX and TX\n"); return -EINVAL; } @@ -1190,7 +1212,7 @@ static int dr_action_modify_check_field_limitation(struct mlx5dr_action *action, const __be64 *sw_action) { - struct mlx5dr_domain *dmn = action->rewrite.dmn; + struct mlx5dr_domain *dmn = action->rewrite->dmn; u8 action_type; int ret; @@ -1239,7 +1261,7 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, { const struct mlx5dr_ste_action_modify_field *hw_dst_action_info; const struct mlx5dr_ste_action_modify_field *hw_src_action_info; - struct mlx5dr_domain *dmn = action->rewrite.dmn; + struct mlx5dr_domain *dmn = action->rewrite->dmn; int ret, i, hw_idx = 0; __be64 *sw_action; __be64 hw_action; @@ -1249,8 +1271,8 @@ static int dr_actions_convert_modify_header(struct mlx5dr_action *action, *modify_ttl = false; - action->rewrite.allow_rx = 1; - action->rewrite.allow_tx = 1; + action->rewrite->allow_rx = 1; + action->rewrite->allow_tx = 1; for (i = 0; i < num_sw_actions; i++) { sw_action = &sw_actions[i]; @@ -1358,13 +1380,13 @@ static int dr_action_create_modify_action(struct mlx5dr_domain *dmn, if (ret) goto free_hw_actions; - action->rewrite.chunk = chunk; - action->rewrite.modify_ttl = modify_ttl; - action->rewrite.data = (u8 *)hw_actions; - action->rewrite.num_of_actions = num_hw_actions; - action->rewrite.index = (chunk->icm_addr - - dmn->info.caps.hdr_modify_icm_addr) / - ACTION_CACHE_LINE_SIZE; + action->rewrite->chunk = chunk; + action->rewrite->modify_ttl = modify_ttl; + action->rewrite->data = (u8 *)hw_actions; + action->rewrite->num_of_actions = num_hw_actions; + action->rewrite->index = (chunk->icm_addr - + dmn->info.caps.hdr_modify_icm_addr) / + ACTION_CACHE_LINE_SIZE; ret = mlx5dr_send_postsend_action(dmn, action); if (ret) @@ -1399,7 +1421,7 @@ mlx5dr_action_create_modify_header(struct mlx5dr_domain *dmn, if (!action) goto dec_ref; - action->rewrite.dmn = dmn; + action->rewrite->dmn = dmn; ret = dr_action_create_modify_action(dmn, actions_sz, @@ -1451,8 +1473,8 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, if (!action) return NULL; - action->vport.dmn = vport_dmn; - action->vport.caps = vport_cap; + action->vport->dmn = vport_dmn; + action->vport->caps = vport_cap; return action; } @@ -1464,44 +1486,44 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action) switch (action->action_type) { case DR_ACTION_TYP_FT: - if (action->dest_tbl.is_fw_tbl) - refcount_dec(&action->dest_tbl.fw_tbl.dmn->refcount); + if (action->dest_tbl->is_fw_tbl) + refcount_dec(&action->dest_tbl->fw_tbl.dmn->refcount); else - refcount_dec(&action->dest_tbl.tbl->refcount); + refcount_dec(&action->dest_tbl->tbl->refcount); - if (action->dest_tbl.is_fw_tbl && - action->dest_tbl.fw_tbl.num_of_ref_actions) { + if (action->dest_tbl->is_fw_tbl && + action->dest_tbl->fw_tbl.num_of_ref_actions) { struct mlx5dr_action **ref_actions; int i; - ref_actions = action->dest_tbl.fw_tbl.ref_actions; - for (i = 0; i < action->dest_tbl.fw_tbl.num_of_ref_actions; i++) + ref_actions = action->dest_tbl->fw_tbl.ref_actions; + for (i = 0; i < action->dest_tbl->fw_tbl.num_of_ref_actions; i++) refcount_dec(&ref_actions[i]->refcount); kfree(ref_actions); - mlx5dr_fw_destroy_md_tbl(action->dest_tbl.fw_tbl.dmn, - action->dest_tbl.fw_tbl.id, - action->dest_tbl.fw_tbl.group_id); + mlx5dr_fw_destroy_md_tbl(action->dest_tbl->fw_tbl.dmn, + action->dest_tbl->fw_tbl.id, + action->dest_tbl->fw_tbl.group_id); } break; case DR_ACTION_TYP_TNL_L2_TO_L2: - refcount_dec(&action->reformat.dmn->refcount); + refcount_dec(&action->reformat->dmn->refcount); break; case DR_ACTION_TYP_TNL_L3_TO_L2: - mlx5dr_icm_free_chunk(action->rewrite.chunk); - refcount_dec(&action->reformat.dmn->refcount); + mlx5dr_icm_free_chunk(action->rewrite->chunk); + refcount_dec(&action->rewrite->dmn->refcount); break; case DR_ACTION_TYP_L2_TO_TNL_L2: case DR_ACTION_TYP_L2_TO_TNL_L3: - mlx5dr_cmd_destroy_reformat_ctx((action->reformat.dmn)->mdev, - action->reformat.reformat_id); - refcount_dec(&action->reformat.dmn->refcount); + mlx5dr_cmd_destroy_reformat_ctx((action->reformat->dmn)->mdev, + action->reformat->reformat_id); + refcount_dec(&action->reformat->dmn->refcount); break; case DR_ACTION_TYP_MODIFY_HDR: - mlx5dr_icm_free_chunk(action->rewrite.chunk); - kfree(action->rewrite.data); - refcount_dec(&action->rewrite.dmn->refcount); + mlx5dr_icm_free_chunk(action->rewrite->chunk); + kfree(action->rewrite->data); + refcount_dec(&action->rewrite->dmn->refcount); break; default: break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 30b0136b5bc7..5970cb8fc0c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -85,15 +85,53 @@ int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, return 0; } +static int dr_cmd_query_nic_vport_roce_en(struct mlx5_core_dev *mdev, + u16 vport, bool *roce_en) +{ + u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {}; + int err; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(query_nic_vport_context_in, in, other_vport, !!vport); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *roce_en = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.roce_en); + return 0; +} + int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, struct mlx5dr_cmd_caps *caps) { + bool roce_en; + int err; + caps->prio_tag_required = MLX5_CAP_GEN(mdev, prio_tag_required); caps->eswitch_manager = MLX5_CAP_GEN(mdev, eswitch_manager); caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version); + if (MLX5_CAP_GEN(mdev, roce)) { + err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en); + if (err) + return err; + + caps->roce_caps.roce_en = roce_en; + caps->roce_caps.fl_rc_qp_when_roce_disabled = + MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled); + caps->roce_caps.fl_rc_qp_when_roce_enabled = + MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled); + } + + caps->isolate_vl_tc = MLX5_CAP_GEN(mdev, isolate_vl_tc_new); + if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) { caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1); @@ -106,6 +144,34 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw1); } + if (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_TLV_OPTION_0_ENABLED) + caps->flex_parser_id_geneve_tlv_option_0 = + MLX5_CAP_GEN(mdev, flex_parser_id_geneve_tlv_option_0); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED) + caps->flex_parser_id_mpls_over_gre = + MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_gre); + + if (caps->flex_protocols & mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED) + caps->flex_parser_id_mpls_over_udp = + MLX5_CAP_GEN(mdev, flex_parser_id_outer_first_mpls_over_udp_label); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED) + caps->flex_parser_id_gtpu_dw_0 = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_0); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED) + caps->flex_parser_id_gtpu_teid = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_teid); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED) + caps->flex_parser_id_gtpu_dw_2 = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_dw_2); + + if (caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED) + caps->flex_parser_id_gtpu_first_ext_dw_0 = + MLX5_CAP_GEN(mdev, flex_parser_id_gtpu_first_ext_dw_0); + caps->nic_rx_drop_address = MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_rx_action_drop_icm_address); caps->nic_tx_drop_address = @@ -287,7 +353,7 @@ int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, u32 *in; int err; - in = kzalloc(inlen, GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; @@ -302,7 +368,7 @@ int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev, *group_id = MLX5_GET(create_flow_group_out, out, group_id); out: - kfree(in); + kvfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index 15673cd10039..6f6191d1d5a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -92,15 +92,17 @@ static bool dr_mask_is_tnl_gre_set(struct mlx5dr_match_misc *misc) misc->gre_k_present || misc->gre_s_present); } -#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET(_misc2, gre_udp) ( \ - (_misc2).outer_first_mpls_over_##gre_udp##_label || \ - (_misc2).outer_first_mpls_over_##gre_udp##_exp || \ - (_misc2).outer_first_mpls_over_##gre_udp##_s_bos || \ - (_misc2).outer_first_mpls_over_##gre_udp##_ttl) - -#define DR_MASK_IS_TNL_MPLS_SET(_misc2) ( \ - DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), gre) || \ - DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), udp)) +#define DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\ + (_misc)->outer_first_mpls_over_gre_label || \ + (_misc)->outer_first_mpls_over_gre_exp || \ + (_misc)->outer_first_mpls_over_gre_s_bos || \ + (_misc)->outer_first_mpls_over_gre_ttl) + +#define DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(_misc) (\ + (_misc)->outer_first_mpls_over_udp_label || \ + (_misc)->outer_first_mpls_over_udp_exp || \ + (_misc)->outer_first_mpls_over_udp_s_bos || \ + (_misc)->outer_first_mpls_over_udp_ttl) static bool dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) @@ -133,6 +135,11 @@ static bool dr_mask_is_tnl_geneve_set(struct mlx5dr_match_misc *misc) misc->geneve_opt_len; } +static bool dr_mask_is_tnl_geneve_tlv_opt(struct mlx5dr_match_misc3 *misc3) +{ + return misc3->geneve_tlv_option_0_data; +} + static bool dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps) { @@ -148,6 +155,109 @@ dr_mask_is_tnl_geneve(struct mlx5dr_match_param *mask, dr_matcher_supp_tnl_geneve(&dmn->info.caps); } +static bool dr_mask_is_tnl_gtpu_set(struct mlx5dr_match_misc3 *misc3) +{ + return misc3->gtpu_msg_flags || misc3->gtpu_msg_type || misc3->gtpu_teid; +} + +static bool dr_matcher_supp_tnl_gtpu(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_tnl_gtpu_set(&mask->misc3) && + dr_matcher_supp_tnl_gtpu(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_dw_0(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_0_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_dw_0(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_dw_0 && + dr_matcher_supp_tnl_gtpu_dw_0(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_teid(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_TEID_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_teid(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_teid && + dr_matcher_supp_tnl_gtpu_teid(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_dw_2(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_DW_2_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_dw_2(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_dw_2 && + dr_matcher_supp_tnl_gtpu_dw_2(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_gtpu_first_ext(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_GTPU_FIRST_EXT_DW_0_ENABLED; +} + +static bool dr_mask_is_tnl_gtpu_first_ext(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return mask->misc3.gtpu_first_ext_dw_0 && + dr_matcher_supp_tnl_gtpu_first_ext(&dmn->info.caps); +} + +static bool dr_mask_is_tnl_gtpu_flex_parser_0(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + + return (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_0) && + dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) || + (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_teid) && + dr_mask_is_tnl_gtpu_teid(mask, dmn)) || + (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_dw_2) && + dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) || + (dr_is_flex_parser_0_id(caps->flex_parser_id_gtpu_first_ext_dw_0) && + dr_mask_is_tnl_gtpu_first_ext(mask, dmn)); +} + +static bool dr_mask_is_tnl_gtpu_flex_parser_1(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + struct mlx5dr_cmd_caps *caps = &dmn->info.caps; + + return (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_0) && + dr_mask_is_tnl_gtpu_dw_0(mask, dmn)) || + (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_teid) && + dr_mask_is_tnl_gtpu_teid(mask, dmn)) || + (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_dw_2) && + dr_mask_is_tnl_gtpu_dw_2(mask, dmn)) || + (dr_is_flex_parser_1_id(caps->flex_parser_id_gtpu_first_ext_dw_0) && + dr_mask_is_tnl_gtpu_first_ext(mask, dmn)); +} + +static bool dr_mask_is_tnl_gtpu_any(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return dr_mask_is_tnl_gtpu_flex_parser_0(mask, dmn) || + dr_mask_is_tnl_gtpu_flex_parser_1(mask, dmn) || + dr_mask_is_tnl_gtpu(mask, dmn); +} + static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps) { return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || @@ -199,6 +309,65 @@ static bool dr_mask_is_gvmi_or_qpn_set(struct mlx5dr_match_misc *misc) return (misc->source_sqn || misc->source_port); } +static bool dr_mask_is_flex_parser_id_0_3_set(u32 flex_parser_id, + u32 flex_parser_value) +{ + if (flex_parser_id) + return flex_parser_id <= DR_STE_MAX_FLEX_0_ID; + + /* Using flex_parser 0 means that id is zero, thus value must be set. */ + return flex_parser_value; +} + +static bool dr_mask_is_flex_parser_0_3_set(struct mlx5dr_match_misc4 *misc4) +{ + return (dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_0, + misc4->prog_sample_field_value_0) || + dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_1, + misc4->prog_sample_field_value_1) || + dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_2, + misc4->prog_sample_field_value_2) || + dr_mask_is_flex_parser_id_0_3_set(misc4->prog_sample_field_id_3, + misc4->prog_sample_field_value_3)); +} + +static bool dr_mask_is_flex_parser_id_4_7_set(u32 flex_parser_id) +{ + return flex_parser_id > DR_STE_MAX_FLEX_0_ID && + flex_parser_id <= DR_STE_MAX_FLEX_1_ID; +} + +static bool dr_mask_is_flex_parser_4_7_set(struct mlx5dr_match_misc4 *misc4) +{ + return (dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_0) || + dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_1) || + dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_2) || + dr_mask_is_flex_parser_id_4_7_set(misc4->prog_sample_field_id_3)); +} + +static int dr_matcher_supp_tnl_mpls_over_gre(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_MPLS_OVER_GRE_ENABLED; +} + +static bool dr_mask_is_tnl_mpls_over_gre(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return DR_MASK_IS_OUTER_MPLS_OVER_GRE_SET(&mask->misc2) && + dr_matcher_supp_tnl_mpls_over_gre(&dmn->info.caps); +} + +static int dr_matcher_supp_tnl_mpls_over_udp(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & mlx5_FLEX_PARSER_MPLS_OVER_UDP_ENABLED; +} + +static bool dr_mask_is_tnl_mpls_over_udp(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + return DR_MASK_IS_OUTER_MPLS_OVER_UDP_SET(&mask->misc2) && + dr_matcher_supp_tnl_mpls_over_udp(&dmn->info.caps); +} int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher_rx_tx *nic_matcher, enum mlx5dr_ipv outer_ipv, @@ -251,6 +420,9 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC3) mask.misc3 = matcher->mask.misc3; + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) + mask.misc4 = matcher->mask.misc4; + ret = mlx5dr_ste_build_pre_check(dmn, matcher->match_criteria, &matcher->mask, NULL); if (ret) @@ -321,9 +493,28 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (dr_mask_is_tnl_vxlan_gpe(&mask, dmn)) mlx5dr_ste_build_tnl_vxlan_gpe(ste_ctx, &sb[idx++], &mask, inner, rx); - else if (dr_mask_is_tnl_geneve(&mask, dmn)) + else if (dr_mask_is_tnl_geneve(&mask, dmn)) { mlx5dr_ste_build_tnl_geneve(ste_ctx, &sb[idx++], &mask, inner, rx); + if (dr_mask_is_tnl_geneve_tlv_opt(&mask.misc3)) + mlx5dr_ste_build_tnl_geneve_tlv_opt(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + } else if (dr_mask_is_tnl_gtpu_any(&mask, dmn)) { + if (dr_mask_is_tnl_gtpu_flex_parser_0(&mask, dmn)) + mlx5dr_ste_build_tnl_gtpu_flex_parser_0(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_tnl_gtpu_flex_parser_1(&mask, dmn)) + mlx5dr_ste_build_tnl_gtpu_flex_parser_1(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_tnl_gtpu(&mask, dmn)) + mlx5dr_ste_build_tnl_gtpu(ste_ctx, &sb[idx++], + &mask, inner, rx); + } if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer)) mlx5dr_ste_build_eth_l4_misc(ste_ctx, &sb[idx++], @@ -333,17 +524,20 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++], &mask, inner, rx); - if (DR_MASK_IS_TNL_MPLS_SET(mask.misc2)) - mlx5dr_ste_build_tnl_mpls(ste_ctx, &sb[idx++], - &mask, inner, rx); + if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + + if (dr_mask_is_icmp(&mask, dmn)) + mlx5dr_ste_build_icmp(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); - if (dr_mask_is_icmp(&mask, dmn)) { - ret = mlx5dr_ste_build_icmp(ste_ctx, &sb[idx++], - &mask, &dmn->info.caps, - inner, rx); - if (ret) - return ret; - } if (dr_mask_is_tnl_gre_set(&mask.misc)) mlx5dr_ste_build_tnl_gre(ste_ctx, &sb[idx++], &mask, inner, rx); @@ -404,10 +598,26 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, mlx5dr_ste_build_mpls(ste_ctx, &sb[idx++], &mask, inner, rx); - if (DR_MASK_IS_TNL_MPLS_SET(mask.misc2)) - mlx5dr_ste_build_tnl_mpls(ste_ctx, &sb[idx++], - &mask, inner, rx); + if (dr_mask_is_tnl_mpls_over_gre(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_gre(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); + else if (dr_mask_is_tnl_mpls_over_udp(&mask, dmn)) + mlx5dr_ste_build_tnl_mpls_over_udp(ste_ctx, &sb[idx++], + &mask, &dmn->info.caps, + inner, rx); } + + if (matcher->match_criteria & DR_MATCHER_CRITERIA_MISC4) { + if (dr_mask_is_flex_parser_0_3_set(&mask.misc4)) + mlx5dr_ste_build_flex_parser_0(ste_ctx, &sb[idx++], + &mask, false, rx); + + if (dr_mask_is_flex_parser_4_7_set(&mask.misc4)) + mlx5dr_ste_build_flex_parser_1(ste_ctx, &sb[idx++], + &mask, false, rx); + } + /* Empty matcher, takes all */ if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY) mlx5dr_ste_build_empty_always_hit(&sb[idx++], rx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index b337d6626bff..43356fad53de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -952,6 +952,17 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher, return false; } } + + if (match_criteria & DR_MATCHER_CRITERIA_MISC4) { + s_idx = offsetof(struct mlx5dr_match_param, misc4); + e_idx = min(s_idx + sizeof(param->misc4), value_size); + + if (!dr_rule_cmp_value_to_mask(mask_p, param_p, s_idx, e_idx)) { + mlx5dr_err(matcher->tbl->dmn, + "Rule misc4 parameters contains a value not specified by mask\n"); + return false; + } + } return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 83c4c877d558..12cf323a5943 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -32,6 +32,7 @@ struct dr_qp_rtr_attr { u8 min_rnr_timer; u8 sgid_index; u16 udp_src_port; + u8 fl:1; }; struct dr_qp_rts_attr { @@ -45,6 +46,7 @@ struct dr_qp_init_attr { u32 pdn; u32 max_send_wr; struct mlx5_uars_page *uar; + u8 isolate_vl_tc:1; }; static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) @@ -157,6 +159,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc); MLX5_SET(qpc, qpc, pd, attr->pdn); MLX5_SET(qpc, qpc, uar_page, attr->uar->index); MLX5_SET(qpc, qpc, log_page_size, @@ -169,6 +172,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); + MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); if (MLX5_CAP_GEN(mdev, cqe_version) == 1) MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); @@ -212,7 +216,7 @@ static void dr_destroy_qp(struct mlx5_core_dev *mdev, static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) { dma_wmb(); - *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff); + *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff); /* After wmb() the hw aware of new work */ wmb(); @@ -222,7 +226,7 @@ static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, u32 rkey, struct dr_data_seg *data_seg, - u32 opcode, int nreq) + u32 opcode, bool notify_hw) { struct mlx5_wqe_raddr_seg *wq_raddr; struct mlx5_wqe_ctrl_seg *wq_ctrl; @@ -254,16 +258,16 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++; - if (nreq) + if (notify_hw) dr_cmd_notify_hw(dr_qp, wq_ctrl); } static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) { dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, - &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0); + &send_info->write, MLX5_OPCODE_RDMA_WRITE, false); dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, - &send_info->read, MLX5_OPCODE_RDMA_READ, 1); + &send_info->read, MLX5_OPCODE_RDMA_READ, true); } /** @@ -405,7 +409,7 @@ static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, alloc_size = *num_stes * DR_STE_SIZE; } - *data = kzalloc(alloc_size, GFP_KERNEL); + *data = kvzalloc(alloc_size, GFP_KERNEL); if (!*data) return -ENOMEM; @@ -504,7 +508,7 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, } out_free: - kfree(data); + kvfree(data); return ret; } @@ -561,7 +565,7 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, } out_free: - kfree(data); + kvfree(data); return ret; } @@ -571,12 +575,12 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, struct postsend_info send_info = {}; int ret; - send_info.write.addr = (uintptr_t)action->rewrite.data; - send_info.write.length = action->rewrite.num_of_actions * + send_info.write.addr = (uintptr_t)action->rewrite->data; + send_info.write.length = action->rewrite->num_of_actions * DR_MODIFY_ACTION_SIZE; send_info.write.lkey = 0; - send_info.remote_addr = action->rewrite.chunk->mr_addr; - send_info.rkey = action->rewrite.chunk->rkey; + send_info.remote_addr = action->rewrite->chunk->mr_addr; + send_info.rkey = action->rewrite->chunk->rkey; ret = dr_postsend_icm_data(dmn, &send_info); @@ -649,6 +653,7 @@ static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, attr->udp_src_port); MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); + MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl); MLX5_SET(qpc, qpc, min_rnr_nak, 1); MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP); @@ -657,6 +662,19 @@ static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, return mlx5_cmd_exec_in(mdev, init2rtr_qp, in); } +static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps) +{ + /* Check whether RC RoCE QP creation with force loopback is allowed. + * There are two separate capability bits for this: + * - force loopback when RoCE is enabled + * - force loopback when RoCE is disabled + */ + return ((caps->roce_caps.roce_en && + caps->roce_caps.fl_rc_qp_when_roce_enabled) || + (!caps->roce_caps.roce_en && + caps->roce_caps.fl_rc_qp_when_roce_disabled)); +} + static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) { struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; @@ -675,17 +693,26 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) } /* RTR */ - ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr); - if (ret) - return ret; - rtr_attr.mtu = mtu; rtr_attr.qp_num = dr_qp->qpn; rtr_attr.min_rnr_timer = 12; rtr_attr.port_num = port; - rtr_attr.sgid_index = gid_index; rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; + /* If QP creation with force loopback is allowed, then there + * is no need for GID index when creating the QP. + * Otherwise we query GID attributes and use GID index. + */ + rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps); + if (!rtr_attr.fl) { + ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, + &rtr_attr.dgid_attr); + if (ret) + return ret; + + rtr_attr.sgid_index = gid_index; + } + ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); if (ret) { mlx5dr_err(dmn, "Failed modify QP init2rtr\n"); @@ -899,6 +926,11 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) init_attr.pdn = dmn->pdn; init_attr.uar = dmn->uar; init_attr.max_send_wr = QUEUE_SIZE; + + /* Isolated VL is applicable only if force loopback is supported */ + if (dr_send_allow_fl(&dmn->info.caps)) + init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc; + spin_lock_init(&dmn->send_ring->lock); dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index f49abc7a4b9b..9b1529137cba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -852,6 +852,35 @@ static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec) spec->icmpv4_code = MLX5_GET(fte_match_set_misc3, mask, icmp_code); spec->icmpv6_type = MLX5_GET(fte_match_set_misc3, mask, icmpv6_type); spec->icmpv6_code = MLX5_GET(fte_match_set_misc3, mask, icmpv6_code); + spec->geneve_tlv_option_0_data = + MLX5_GET(fte_match_set_misc3, mask, geneve_tlv_option_0_data); + spec->gtpu_msg_flags = MLX5_GET(fte_match_set_misc3, mask, gtpu_msg_flags); + spec->gtpu_msg_type = MLX5_GET(fte_match_set_misc3, mask, gtpu_msg_type); + spec->gtpu_teid = MLX5_GET(fte_match_set_misc3, mask, gtpu_teid); + spec->gtpu_dw_0 = MLX5_GET(fte_match_set_misc3, mask, gtpu_dw_0); + spec->gtpu_dw_2 = MLX5_GET(fte_match_set_misc3, mask, gtpu_dw_2); + spec->gtpu_first_ext_dw_0 = + MLX5_GET(fte_match_set_misc3, mask, gtpu_first_ext_dw_0); +} + +static void dr_ste_copy_mask_misc4(char *mask, struct mlx5dr_match_misc4 *spec) +{ + spec->prog_sample_field_id_0 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_0); + spec->prog_sample_field_value_0 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_0); + spec->prog_sample_field_id_1 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_1); + spec->prog_sample_field_value_1 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_1); + spec->prog_sample_field_id_2 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_2); + spec->prog_sample_field_value_2 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_2); + spec->prog_sample_field_id_3 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_id_3); + spec->prog_sample_field_value_3 = + MLX5_GET(fte_match_set_misc4, mask, prog_sample_field_value_3); } void mlx5dr_ste_copy_param(u8 match_criteria, @@ -925,6 +954,20 @@ void mlx5dr_ste_copy_param(u8 match_criteria, } dr_ste_copy_mask_misc3(buff, &set_param->misc3); } + + param_location += sizeof(struct mlx5dr_match_misc3); + + if (match_criteria & DR_MATCHER_CRITERIA_MISC4) { + if (mask->match_sz < param_location + + sizeof(struct mlx5dr_match_misc4)) { + memcpy(tail_param, data + param_location, + mask->match_sz - param_location); + buff = tail_param; + } else { + buff = data + param_location; + } + dr_ste_copy_mask_misc4(buff, &set_param->misc4); + } } void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_ctx *ste_ctx, @@ -1051,26 +1094,40 @@ void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_ctx *ste_ctx, ste_ctx->build_tnl_gre_init(sb, mask); } -void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx, - struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx) +void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + sb->caps = caps; + return ste_ctx->build_tnl_mpls_over_gre_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) { sb->rx = rx; sb->inner = inner; - ste_ctx->build_tnl_mpls_init(sb, mask); + sb->caps = caps; + return ste_ctx->build_tnl_mpls_over_udp_init(sb, mask); } -int mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx, - struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - struct mlx5dr_cmd_caps *caps, - bool inner, bool rx) +void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) { sb->rx = rx; sb->inner = inner; sb->caps = caps; - return ste_ctx->build_icmp_init(sb, mask); + ste_ctx->build_icmp_init(sb, mask); } void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx, @@ -1113,6 +1170,52 @@ void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx, ste_ctx->build_tnl_geneve_init(sb, mask); } +void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_geneve_tlv_opt_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_tnl_gtpu_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_gtpu_flex_parser_0_init(sb, mask); +} + +void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) +{ + sb->rx = rx; + sb->caps = caps; + sb->inner = inner; + ste_ctx->build_tnl_gtpu_flex_parser_1_init(sb, mask); +} + void mlx5dr_ste_build_register_0(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, @@ -1148,6 +1251,26 @@ void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx, ste_ctx->build_src_gvmi_qpn_init(sb, mask); } +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_flex_parser_0_init(sb, mask); +} + +void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) +{ + sb->rx = rx; + sb->inner = inner; + ste_ctx->build_flex_parser_1_init(sb, mask); +} + static struct mlx5dr_ste_ctx *mlx5dr_ste_ctx_arr[] = { [MLX5_STEERING_FORMAT_CONNECTX_5] = &ste_ctx_v0, [MLX5_STEERING_FORMAT_CONNECTX_6DX] = &ste_ctx_v1, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h index 06bcb0ee8f96..992b591bf0c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h @@ -62,6 +62,13 @@ in_out##_first_mpls_ttl); \ } while (0) +#define DR_STE_SET_FLEX_PARSER_FIELD(tag, fname, caps, spec) do { \ + u8 parser_id = (caps)->flex_parser_id_##fname; \ + u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); \ + *(__be32 *)parser_ptr = cpu_to_be32((spec)->fname);\ + (spec)->fname = 0;\ +} while (0) + #define DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(_misc) (\ (_misc)->outer_first_mpls_over_gre_label || \ (_misc)->outer_first_mpls_over_gre_exp || \ @@ -86,8 +93,22 @@ enum dr_ste_action_modify_type_l4 { DR_STE_ACTION_MDFY_TYPE_L4_UDP = 0x2, }; +enum { + HDR_MPLS_OFFSET_LABEL = 12, + HDR_MPLS_OFFSET_EXP = 9, + HDR_MPLS_OFFSET_S_BOS = 8, + HDR_MPLS_OFFSET_TTL = 0, +}; + u16 mlx5dr_ste_conv_bit_to_byte_mask(u8 *bit_mask); +static inline u8 * +dr_ste_calc_flex_parser_offset(u8 *tag, u8 parser_id) +{ + /* Calculate tag byte offset based on flex parser id */ + return tag + 4 * (3 - (parser_id % 4)); +} + #define DR_STE_CTX_BUILDER(fname) \ ((*build_##fname##_init)(struct mlx5dr_ste_build *sb, \ struct mlx5dr_match_param *mask)) @@ -106,14 +127,22 @@ struct mlx5dr_ste_ctx { void DR_STE_CTX_BUILDER(mpls); void DR_STE_CTX_BUILDER(tnl_gre); void DR_STE_CTX_BUILDER(tnl_mpls); - int DR_STE_CTX_BUILDER(icmp); + void DR_STE_CTX_BUILDER(tnl_mpls_over_gre); + void DR_STE_CTX_BUILDER(tnl_mpls_over_udp); + void DR_STE_CTX_BUILDER(icmp); void DR_STE_CTX_BUILDER(general_purpose); void DR_STE_CTX_BUILDER(eth_l4_misc); void DR_STE_CTX_BUILDER(tnl_vxlan_gpe); void DR_STE_CTX_BUILDER(tnl_geneve); + void DR_STE_CTX_BUILDER(tnl_geneve_tlv_opt); void DR_STE_CTX_BUILDER(register_0); void DR_STE_CTX_BUILDER(register_1); void DR_STE_CTX_BUILDER(src_gvmi_qpn); + void DR_STE_CTX_BUILDER(flex_parser_0); + void DR_STE_CTX_BUILDER(flex_parser_1); + void DR_STE_CTX_BUILDER(tnl_gtpu); + void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_0); + void DR_STE_CTX_BUILDER(tnl_gtpu_flex_parser_1); /* Getters and Setters */ void (*ste_init)(u8 *hw_ste_p, u16 lu_type, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index 9ec079247c4b..0757a4e8540e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -331,7 +331,7 @@ static void dr_ste_v0_set_tx_push_vlan(u8 *hw_ste_p, u32 vlan_hdr, MLX5_SET(ste_sx_transmit, hw_ste_p, action_type, DR_STE_ACTION_TYPE_PUSH_VLAN); MLX5_SET(ste_sx_transmit, hw_ste_p, encap_pointer_vlan_data, vlan_hdr); - /* Due to HW limitation we need to set this bit, otherwise reforamt + + /* Due to HW limitation we need to set this bit, otherwise reformat + * push vlan will not work. */ if (go_back) @@ -1248,32 +1248,29 @@ dr_ste_v0_build_tnl_mpls_tag(struct mlx5dr_match_param *value, u8 *tag) { struct mlx5dr_match_misc2 *misc_2 = &value->misc2; + u32 mpls_hdr; if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc_2)) { - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label, - misc_2, outer_first_mpls_over_gre_label); - - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp, - misc_2, outer_first_mpls_over_gre_exp); - - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos, - misc_2, outer_first_mpls_over_gre_s_bos); - - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl, - misc_2, outer_first_mpls_over_gre_ttl); + mpls_hdr = misc_2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL; + misc_2->outer_first_mpls_over_gre_label = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP; + misc_2->outer_first_mpls_over_gre_exp = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc_2->outer_first_mpls_over_gre_s_bos = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL; + misc_2->outer_first_mpls_over_gre_ttl = 0; } else { - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_label, - misc_2, outer_first_mpls_over_udp_label); - - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_exp, - misc_2, outer_first_mpls_over_udp_exp); - - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_s_bos, - misc_2, outer_first_mpls_over_udp_s_bos); - - DR_STE_SET_TAG(flex_parser_0, tag, parser_3_ttl, - misc_2, outer_first_mpls_over_udp_ttl); + mpls_hdr = misc_2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL; + misc_2->outer_first_mpls_over_udp_label = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP; + misc_2->outer_first_mpls_over_udp_exp = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc_2->outer_first_mpls_over_udp_s_bos = 0; + mpls_hdr |= misc_2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL; + misc_2->outer_first_mpls_over_udp_ttl = 0; } + + MLX5_SET(ste_flex_parser_0, tag, flex_parser_3, mpls_hdr); return 0; } @@ -1288,6 +1285,91 @@ dr_ste_v0_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_tag; } +static int +dr_ste_v0_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_udp_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_udp_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_udp_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_udp_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_udp; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +static void +dr_ste_v0_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask); + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_udp_tag; +} + +static int +dr_ste_v0_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_gre_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_gre_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_gre_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_gre_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_gre; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +static void +dr_ste_v0_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_mpls_over_gre_tag; +} + #define ICMP_TYPE_OFFSET_FIRST_DW 24 #define ICMP_CODE_OFFSET_FIRST_DW 16 @@ -1300,9 +1382,11 @@ dr_ste_v0_build_icmp_tag(struct mlx5dr_match_param *value, u32 *icmp_header_data; int dw0_location; int dw1_location; + u8 *parser_ptr; u8 *icmp_type; u8 *icmp_code; bool is_ipv4; + u32 icmp_hdr; is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc_3); if (is_ipv4) { @@ -1319,47 +1403,40 @@ dr_ste_v0_build_icmp_tag(struct mlx5dr_match_param *value, dw1_location = sb->caps->flex_parser_id_icmpv6_dw1; } - switch (dw0_location) { - case 4: - MLX5_SET(ste_flex_parser_1, tag, flex_parser_4, - (*icmp_type << ICMP_TYPE_OFFSET_FIRST_DW) | - (*icmp_code << ICMP_TYPE_OFFSET_FIRST_DW)); - - *icmp_type = 0; - *icmp_code = 0; - break; - default: - return -EINVAL; - } + parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw0_location); + icmp_hdr = (*icmp_type << ICMP_TYPE_OFFSET_FIRST_DW) | + (*icmp_code << ICMP_CODE_OFFSET_FIRST_DW); + *(__be32 *)parser_ptr = cpu_to_be32(icmp_hdr); + *icmp_code = 0; + *icmp_type = 0; - switch (dw1_location) { - case 5: - MLX5_SET(ste_flex_parser_1, tag, flex_parser_5, - *icmp_header_data); - *icmp_header_data = 0; - break; - default: - return -EINVAL; - } + parser_ptr = dr_ste_calc_flex_parser_offset(tag, dw1_location); + *(__be32 *)parser_ptr = cpu_to_be32(*icmp_header_data); + *icmp_header_data = 0; return 0; } -static int +static void dr_ste_v0_build_icmp_init(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask) { - int ret; + u8 parser_id; + bool is_ipv4; - ret = dr_ste_v0_build_icmp_tag(mask, sb, sb->bit_mask); - if (ret) - return ret; + dr_ste_v0_build_icmp_tag(mask, sb, sb->bit_mask); - sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1; + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + is_ipv4 = DR_MASK_IS_ICMPV4_SET(&mask->misc3); + parser_id = is_ipv4 ? sb->caps->flex_parser_id_icmp_dw0 : + sb->caps->flex_parser_id_icmpv6_dw0; + sb->lu_type = parser_id > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); sb->ste_build_tag_func = &dr_ste_v0_build_icmp_tag; - - return 0; } static int @@ -1595,6 +1672,185 @@ dr_ste_v0_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v0_build_src_gvmi_qpn_tag; } +static void dr_ste_v0_set_flex_parser(u32 *misc4_field_id, + u32 *misc4_field_value, + bool *parser_is_used, + u8 *tag) +{ + u32 id = *misc4_field_id; + u8 *parser_ptr; + + if (parser_is_used[id]) + return; + + parser_is_used[id] = true; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, id); + + *(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value); + *misc4_field_id = 0; + *misc4_field_value = 0; +} + +static int dr_ste_v0_build_flex_parser_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4; + bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {}; + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_0, + &misc_4_mask->prog_sample_field_value_0, + parser_is_used, tag); + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_1, + &misc_4_mask->prog_sample_field_value_1, + parser_is_used, tag); + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_2, + &misc_4_mask->prog_sample_field_value_2, + parser_is_used, tag); + + dr_ste_v0_set_flex_parser(&misc_4_mask->prog_sample_field_id_3, + &misc_4_mask->prog_sample_field_value_3, + parser_is_used, tag); + + return 0; +} + +static void dr_ste_v0_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag; +} + +static void dr_ste_v0_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1; + dr_ste_v0_build_flex_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tag; +} + +static int +dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; + u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + + MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3, + misc3->geneve_tlv_option_0_data); + misc3->geneve_tlv_option_0_data = 0; + + return 0; +} + +static void +dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ? + DR_STE_V0_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_tag; +} + +static int dr_ste_v0_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, + gtpu_msg_flags, misc3, + gtpu_msg_flags); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, + gtpu_msg_type, misc3, + gtpu_msg_type); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, + gtpu_teid, misc3, + gtpu_teid); + + return 0; +} + +static void dr_ste_v0_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_flex_parser_tnl_gtpu_tag; +} + +static int +dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +static void +dr_ste_v0_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_tag; +} + +static int +dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +static void +dr_ste_v0_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V0_LU_TYPE_FLEX_PARSER_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_tag; +} + struct mlx5dr_ste_ctx ste_ctx_v0 = { /* Builders */ .build_eth_l2_src_dst_init = &dr_ste_v0_build_eth_l2_src_dst_init, @@ -1609,14 +1865,22 @@ struct mlx5dr_ste_ctx ste_ctx_v0 = { .build_mpls_init = &dr_ste_v0_build_mpls_init, .build_tnl_gre_init = &dr_ste_v0_build_tnl_gre_init, .build_tnl_mpls_init = &dr_ste_v0_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v0_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v0_build_tnl_mpls_over_gre_init, .build_icmp_init = &dr_ste_v0_build_icmp_init, .build_general_purpose_init = &dr_ste_v0_build_general_purpose_init, .build_eth_l4_misc_init = &dr_ste_v0_build_eth_l4_misc_init, .build_tnl_vxlan_gpe_init = &dr_ste_v0_build_flex_parser_tnl_vxlan_gpe_init, .build_tnl_geneve_init = &dr_ste_v0_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v0_build_flex_parser_tnl_geneve_tlv_opt_init, .build_register_0_init = &dr_ste_v0_build_register_0_init, .build_register_1_init = &dr_ste_v0_build_register_1_init, .build_src_gvmi_qpn_init = &dr_ste_v0_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v0_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v0_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v0_build_flex_parser_tnl_gtpu_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v0_build_tnl_gtpu_flex_parser_1_init, /* Getters and Setters */ .ste_init = &dr_ste_v0_init, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index 4088d6e51508..054c2e2b6554 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -264,8 +264,8 @@ static void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) static u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p) { u64 index = - (MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) | - MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32) << 26); + ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) | + ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32)) << 26); return index << 6; } @@ -437,21 +437,6 @@ static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action) dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_rx_decap_l3(u8 *hw_ste_p, - u8 *s_action, - u16 decap_actions, - u32 decap_index) -{ - MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id, - DR_STE_V1_ACTION_ID_MODIFY_LIST); - MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions, - decap_actions); - MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr, - decap_index); - - dr_ste_v1_set_reparse(hw_ste_p); -} - static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p, u8 *s_action, u16 num_of_actions, @@ -571,9 +556,6 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, bool allow_ctr = true; if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) { - dr_ste_v1_set_rx_decap_l3(last_ste, action, - attr->decap_actions, - attr->decap_index); dr_ste_v1_set_rewrite_actions(last_ste, action, attr->decap_actions, attr->decap_index); @@ -1324,6 +1306,88 @@ static void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_tag; } +static int dr_ste_v1_build_tnl_mpls_over_udp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_udp_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_udp_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_udp_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_udp_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_udp_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_udp_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_udp; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +static void dr_ste_v1_build_tnl_mpls_over_udp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_mpls_over_udp_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_udp > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_udp_tag; +} + +static int dr_ste_v1_build_tnl_mpls_over_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + u8 *parser_ptr; + u8 parser_id; + u32 mpls_hdr; + + mpls_hdr = misc2->outer_first_mpls_over_gre_label << HDR_MPLS_OFFSET_LABEL; + misc2->outer_first_mpls_over_gre_label = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_exp << HDR_MPLS_OFFSET_EXP; + misc2->outer_first_mpls_over_gre_exp = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_s_bos << HDR_MPLS_OFFSET_S_BOS; + misc2->outer_first_mpls_over_gre_s_bos = 0; + mpls_hdr |= misc2->outer_first_mpls_over_gre_ttl << HDR_MPLS_OFFSET_TTL; + misc2->outer_first_mpls_over_gre_ttl = 0; + + parser_id = sb->caps->flex_parser_id_mpls_over_gre; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + *(__be32 *)parser_ptr = cpu_to_be32(mpls_hdr); + + return 0; +} + +static void dr_ste_v1_build_tnl_mpls_over_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_mpls_over_gre_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_mpls_over_gre > DR_STE_MAX_FLEX_0_ID ? + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_over_gre_tag; +} + static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value, struct mlx5dr_ste_build *sb, u8 *tag) @@ -1355,16 +1419,14 @@ static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value, return 0; } -static int dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask) +static void dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) { dr_ste_v1_build_icmp_tag(mask, sb, sb->bit_mask); sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O; sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); sb->ste_build_tag_func = &dr_ste_v1_build_icmp_tag; - - return 0; } static int dr_ste_v1_build_general_purpose_tag(struct mlx5dr_match_param *value, @@ -1532,6 +1594,7 @@ static void dr_ste_v1_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *val DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_gvmi, misc_mask, source_port); DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_qp, misc_mask, source_sqn); + misc_mask->source_eswitch_owner_vhca_id = 0; } static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, @@ -1588,6 +1651,179 @@ static void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, sb->ste_build_tag_func = &dr_ste_v1_build_src_gvmi_qpn_tag; } +static void dr_ste_v1_set_flex_parser(u32 *misc4_field_id, + u32 *misc4_field_value, + bool *parser_is_used, + u8 *tag) +{ + u32 id = *misc4_field_id; + u8 *parser_ptr; + + if (parser_is_used[id]) + return; + + parser_is_used[id] = true; + parser_ptr = dr_ste_calc_flex_parser_offset(tag, id); + + *(__be32 *)parser_ptr = cpu_to_be32(*misc4_field_value); + *misc4_field_id = 0; + *misc4_field_value = 0; +} + +static int dr_ste_v1_build_felx_parser_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc4 *misc_4_mask = &value->misc4; + bool parser_is_used[DR_NUM_OF_FLEX_PARSERS] = {}; + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_0, + &misc_4_mask->prog_sample_field_value_0, + parser_is_used, tag); + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_1, + &misc_4_mask->prog_sample_field_value_1, + parser_is_used, tag); + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_2, + &misc_4_mask->prog_sample_field_value_2, + parser_is_used, tag); + + dr_ste_v1_set_flex_parser(&misc_4_mask->prog_sample_field_id_3, + &misc_4_mask->prog_sample_field_value_3, + parser_is_used, tag); + + return 0; +} + +static void dr_ste_v1_build_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag; +} + +static void dr_ste_v1_build_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1; + dr_ste_v1_build_felx_parser_tag(mask, sb, sb->bit_mask); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_felx_parser_tag; +} + +static int +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + u8 parser_id = sb->caps->flex_parser_id_geneve_tlv_option_0; + u8 *parser_ptr = dr_ste_calc_flex_parser_offset(tag, parser_id); + + MLX5_SET(ste_flex_parser_0, parser_ptr, flex_parser_3, + misc3->geneve_tlv_option_0_data); + misc3->geneve_tlv_option_0_data = 0; + + return 0; +} + +static void +dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag(mask, sb, sb->bit_mask); + + /* STEs with lookup type FLEX_PARSER_{0/1} includes + * flex parsers_{0-3}/{4-7} respectively. + */ + sb->lu_type = sb->caps->flex_parser_id_geneve_tlv_option_0 > 3 ? + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 : + DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_tag; +} + +static int dr_ste_v1_build_flex_parser_tnl_gtpu_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_flags, misc3, gtpu_msg_flags); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_msg_type, misc3, gtpu_msg_type); + DR_STE_SET_TAG(flex_parser_tnl_gtpu, tag, gtpu_teid, misc3, gtpu_teid); + + return 0; +} + +static void dr_ste_v1_build_flex_parser_tnl_gtpu_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_gtpu_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_gtpu_tag; +} + +static int +dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_0_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +static void +dr_ste_v1_build_tnl_gtpu_flex_parser_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_tag; +} + +static int +dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + uint8_t *tag) +{ + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_0, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_teid)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_teid, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_dw_2)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_dw_2, sb->caps, &value->misc3); + if (dr_is_flex_parser_1_id(sb->caps->flex_parser_id_gtpu_first_ext_dw_0)) + DR_STE_SET_FLEX_PARSER_FIELD(tag, gtpu_first_ext_dw_0, sb->caps, &value->misc3); + return 0; +} + +static void +dr_ste_v1_build_tnl_gtpu_flex_parser_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_tag; +} + struct mlx5dr_ste_ctx ste_ctx_v1 = { /* Builders */ .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, @@ -1602,14 +1838,23 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = { .build_mpls_init = &dr_ste_v1_build_mpls_init, .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init, .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init, .build_icmp_init = &dr_ste_v1_build_icmp_init, .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init, .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init, .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init, .build_register_0_init = &dr_ste_v1_build_register_0_init, .build_register_1_init = &dr_ste_v1_build_register_1_init, .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init, + /* Getters and Setters */ .ste_init = &dr_ste_v1_init, .set_next_lu_type = &dr_ste_v1_set_next_lu_type, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index b599b6beb5b9..30ae3cda6d2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -29,7 +29,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, last_htbl = tbl->rx.s_anchor; tbl->rx.default_icm_addr = action ? - action->dest_tbl.tbl->rx.s_anchor->chunk->icm_addr : + action->dest_tbl->tbl->rx.s_anchor->chunk->icm_addr : tbl->rx.nic_dmn->default_icm_addr; info.type = CONNECT_MISS; @@ -53,7 +53,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, last_htbl = tbl->tx.s_anchor; tbl->tx.default_icm_addr = action ? - action->dest_tbl.tbl->tx.s_anchor->chunk->icm_addr : + action->dest_tbl->tbl->tx.s_anchor->chunk->icm_addr : tbl->tx.nic_dmn->default_icm_addr; info.type = CONNECT_MISS; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 4af0e4e6a13c..67460c42a99b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -12,17 +12,30 @@ #include "mlx5_ifc_dr.h" #include "mlx5dr.h" -#define DR_RULE_MAX_STES 17 +#define DR_RULE_MAX_STES 18 #define DR_ACTION_MAX_STES 5 #define WIRE_PORT 0xFFFF #define DR_STE_SVLAN 0x1 #define DR_STE_CVLAN 0x2 #define DR_SZ_MATCH_PARAM (MLX5_ST_SZ_DW_MATCH_PARAM * 4) +#define DR_NUM_OF_FLEX_PARSERS 8 +#define DR_STE_MAX_FLEX_0_ID 3 +#define DR_STE_MAX_FLEX_1_ID 7 #define mlx5dr_err(dmn, arg...) mlx5_core_err((dmn)->mdev, ##arg) #define mlx5dr_info(dmn, arg...) mlx5_core_info((dmn)->mdev, ##arg) #define mlx5dr_dbg(dmn, arg...) mlx5_core_dbg((dmn)->mdev, ##arg) +static inline bool dr_is_flex_parser_0_id(u8 parser_id) +{ + return parser_id <= DR_STE_MAX_FLEX_0_ID; +} + +static inline bool dr_is_flex_parser_1_id(u8 parser_id) +{ + return parser_id > DR_STE_MAX_FLEX_0_ID; +} + enum mlx5dr_icm_chunk_size { DR_CHUNK_SIZE_1, DR_CHUNK_SIZE_MIN = DR_CHUNK_SIZE_1, /* keep updated when changing */ @@ -87,7 +100,8 @@ enum mlx5dr_matcher_criteria { DR_MATCHER_CRITERIA_INNER = 1 << 2, DR_MATCHER_CRITERIA_MISC2 = 1 << 3, DR_MATCHER_CRITERIA_MISC3 = 1 << 4, - DR_MATCHER_CRITERIA_MAX = 1 << 5, + DR_MATCHER_CRITERIA_MISC4 = 1 << 5, + DR_MATCHER_CRITERIA_MAX = 1 << 6, }; enum mlx5dr_action_type { @@ -389,11 +403,21 @@ void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); -int mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx, - struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - struct mlx5dr_cmd_caps *caps, - bool inner, bool rx); +void mlx5dr_ste_build_tnl_mpls_over_gre(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_mpls_over_udp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_icmp(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, @@ -402,6 +426,25 @@ void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); +void mlx5dr_ste_build_tnl_geneve_tlv_opt(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gtpu(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gtpu_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_gtpu_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, @@ -419,6 +462,14 @@ void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_match_param *mask, struct mlx5dr_domain *dmn, bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +void mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); void mlx5dr_ste_build_empty_always_hit(struct mlx5dr_ste_build *sb, bool rx); /* Actions utils */ @@ -646,7 +697,24 @@ struct mlx5dr_match_misc3 { u8 icmpv6_type; u8 icmpv4_code; u8 icmpv4_type; - u8 reserved_auto3[0x1c]; + u32 geneve_tlv_option_0_data; + u8 gtpu_msg_flags; + u8 gtpu_msg_type; + u32 gtpu_teid; + u32 gtpu_dw_2; + u32 gtpu_first_ext_dw_0; + u32 gtpu_dw_0; +}; + +struct mlx5dr_match_misc4 { + u32 prog_sample_field_value_0; + u32 prog_sample_field_id_0; + u32 prog_sample_field_value_1; + u32 prog_sample_field_id_1; + u32 prog_sample_field_value_2; + u32 prog_sample_field_id_2; + u32 prog_sample_field_value_3; + u32 prog_sample_field_id_3; }; struct mlx5dr_match_param { @@ -655,6 +723,7 @@ struct mlx5dr_match_param { struct mlx5dr_match_spec inner; struct mlx5dr_match_misc2 misc2; struct mlx5dr_match_misc3 misc3; + struct mlx5dr_match_misc4 misc4; }; #define DR_MASK_IS_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ @@ -678,6 +747,12 @@ struct mlx5dr_cmd_vport_cap { u32 num; }; +struct mlx5dr_roce_cap { + u8 roce_en:1; + u8 fl_rc_qp_when_roce_disabled:1; + u8 fl_rc_qp_when_roce_enabled:1; +}; + struct mlx5dr_cmd_caps { u16 gvmi; u64 nic_rx_drop_address; @@ -692,6 +767,13 @@ struct mlx5dr_cmd_caps { u8 flex_parser_id_icmp_dw1; u8 flex_parser_id_icmpv6_dw0; u8 flex_parser_id_icmpv6_dw1; + u8 flex_parser_id_geneve_tlv_option_0; + u8 flex_parser_id_mpls_over_gre; + u8 flex_parser_id_mpls_over_udp; + u8 flex_parser_id_gtpu_dw_0; + u8 flex_parser_id_gtpu_teid; + u8 flex_parser_id_gtpu_dw_2; + u8 flex_parser_id_gtpu_first_ext_dw_0; u8 max_ft_level; u16 roce_min_src_udp; u8 num_esw_ports; @@ -707,6 +789,8 @@ struct mlx5dr_cmd_caps { struct mlx5dr_esw_caps esw_caps; struct mlx5dr_cmd_vport_cap *vports_caps; bool prio_tag_required; + struct mlx5dr_roce_cap roce_caps; + u8 isolate_vl_tc:1; }; struct mlx5dr_domain_rx_tx { @@ -806,53 +890,71 @@ struct mlx5dr_ste_action_modify_field { u8 l4_type; }; +struct mlx5dr_action_rewrite { + struct mlx5dr_domain *dmn; + struct mlx5dr_icm_chunk *chunk; + u8 *data; + u16 num_of_actions; + u32 index; + u8 allow_rx:1; + u8 allow_tx:1; + u8 modify_ttl:1; +}; + +struct mlx5dr_action_reformat { + struct mlx5dr_domain *dmn; + u32 reformat_id; + u32 reformat_size; +}; + +struct mlx5dr_action_dest_tbl { + u8 is_fw_tbl:1; + union { + struct mlx5dr_table *tbl; + struct { + struct mlx5dr_domain *dmn; + u32 id; + u32 group_id; + enum fs_flow_table_type type; + u64 rx_icm_addr; + u64 tx_icm_addr; + struct mlx5dr_action **ref_actions; + u32 num_of_ref_actions; + } fw_tbl; + }; +}; + +struct mlx5dr_action_ctr { + u32 ctr_id; + u32 offeset; +}; + +struct mlx5dr_action_vport { + struct mlx5dr_domain *dmn; + struct mlx5dr_cmd_vport_cap *caps; +}; + +struct mlx5dr_action_push_vlan { + u32 vlan_hdr; /* tpid_pcp_dei_vid */ +}; + +struct mlx5dr_action_flow_tag { + u32 flow_tag; +}; + struct mlx5dr_action { enum mlx5dr_action_type action_type; refcount_t refcount; + union { - struct { - struct mlx5dr_domain *dmn; - struct mlx5dr_icm_chunk *chunk; - u8 *data; - u16 num_of_actions; - u32 index; - u8 allow_rx:1; - u8 allow_tx:1; - u8 modify_ttl:1; - } rewrite; - struct { - struct mlx5dr_domain *dmn; - u32 reformat_id; - u32 reformat_size; - } reformat; - struct { - u8 is_fw_tbl:1; - union { - struct mlx5dr_table *tbl; - struct { - struct mlx5dr_domain *dmn; - u32 id; - u32 group_id; - enum fs_flow_table_type type; - u64 rx_icm_addr; - u64 tx_icm_addr; - struct mlx5dr_action **ref_actions; - u32 num_of_ref_actions; - } fw_tbl; - }; - } dest_tbl; - struct { - u32 ctr_id; - u32 offeset; - } ctr; - struct { - struct mlx5dr_domain *dmn; - struct mlx5dr_cmd_vport_cap *caps; - } vport; - struct { - u32 vlan_hdr; /* tpid_pcp_dei_vid */ - } push_vlan; - u32 flow_tag; + void *data; + struct mlx5dr_action_rewrite *rewrite; + struct mlx5dr_action_reformat *reformat; + struct mlx5dr_action_dest_tbl *dest_tbl; + struct mlx5dr_action_ctr *ctr; + struct mlx5dr_action_vport *vport; + struct mlx5dr_action_push_vlan *push_vlan; + struct mlx5dr_action_flow_tag *flow_tag; }; }; @@ -1063,6 +1165,7 @@ struct mlx5dr_cmd_qp_create_attr { u32 sq_wqe_cnt; u32 rq_wqe_cnt; u32 rq_wqe_shift; + u8 isolate_vl_tc:1; }; int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h index 83df6df6b459..9643ee647f57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr.h @@ -434,10 +434,7 @@ struct mlx5_ifc_ste_gre_bits { }; struct mlx5_ifc_ste_flex_parser_0_bits { - u8 parser_3_label[0x14]; - u8 parser_3_exp[0x3]; - u8 parser_3_s_bos[0x1]; - u8 parser_3_ttl[0x8]; + u8 flex_parser_3[0x20]; u8 flex_parser_2[0x20]; @@ -488,6 +485,17 @@ struct mlx5_ifc_ste_flex_parser_tnl_geneve_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_ste_flex_parser_tnl_gtpu_bits { + u8 reserved_at_0[0x5]; + u8 gtpu_msg_flags[0x3]; + u8 gtpu_msg_type[0x8]; + u8 reserved_at_10[0x10]; + + u8 gtpu_teid[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_ste_general_purpose_bits { u8 general_purpose_lookup_field[0x20]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index e05c5c0f3ae1..457ad42eaa2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1151,20 +1151,6 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); -/** - * mlx5_eswitch_get_total_vports - Get total vports of the eswitch - * - * @dev: Pointer to core device - * - * mlx5_eswitch_get_total_vports returns total number of vports for - * the eswitch. - */ -u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) -{ - return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) + mlx5_sf_max_functions(dev); -} -EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports); - int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out) { u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 01f075fac276..3091dd014650 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -34,11 +34,6 @@ #include "wq.h" #include "mlx5_core.h" -static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride) -{ - return ((u32)1 << log_sz) << log_stride; -} - int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_cyc *wq, struct mlx5_wq_ctrl *wq_ctrl) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 52fdc34251ba..7e9a7cb31720 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1728,7 +1728,7 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor return err; event_id = mlxsw_reg_mfde_event_id_get(mfde_pl); - err = devlink_fmsg_u8_pair_put(fmsg, "id", event_id); + err = devlink_fmsg_u32_pair_put(fmsg, "id", event_id); if (err) return err; switch (event_id) { @@ -1806,6 +1806,10 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor err = devlink_fmsg_u8_pair_put(fmsg, "log_irisc_id", val); if (err) return err; + val = mlxsw_reg_mfde_log_ip_get(mfde_pl); + err = devlink_fmsg_u64_pair_put(fmsg, "log_ip", val); + if (err) + return err; } else if (event_id == MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP) { val = mlxsw_reg_mfde_pipes_mask_get(mfde_pl); err = devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 8af7d9d03475..80712dc803d0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -58,6 +58,25 @@ struct mlxsw_tx_info { bool is_emad; }; +struct mlxsw_rx_md_info { + u32 cookie_index; + u32 latency; + u32 tx_congestion; + union { + /* Valid when 'tx_port_valid' is set. */ + u16 tx_sys_port; + u16 tx_lag_id; + }; + u8 tx_lag_port_index; /* Valid when 'tx_port_is_lag' is set. */ + u8 tx_tc; + u8 latency_valid:1, + tx_congestion_valid:1, + tx_tc_valid:1, + tx_port_valid:1, + tx_port_is_lag:1, + unused:3; +}; + bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, const struct mlxsw_tx_info *tx_info); int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, @@ -515,7 +534,7 @@ enum mlxsw_devlink_param_id { struct mlxsw_skb_cb { union { struct mlxsw_tx_info tx_info; - u32 cookie_index; /* Only used during receive */ + struct mlxsw_rx_md_info rx_md_info; }; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 4d699fe98cb6..78d9c0196f2b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -2007,3 +2007,134 @@ int mlxsw_afa_block_append_l4port(struct mlxsw_afa_block *block, bool is_dport, return 0; } EXPORT_SYMBOL(mlxsw_afa_block_append_l4port); + +/* Mirror Sampler Action + * --------------------- + * The SAMPLER_ACTION is used to mirror packets with a probability (sampling). + */ + +#define MLXSW_AFA_SAMPLER_CODE 0x13 +#define MLXSW_AFA_SAMPLER_SIZE 1 + +/* afa_sampler_mirror_agent + * Mirror (SPAN) agent. + */ +MLXSW_ITEM32(afa, sampler, mirror_agent, 0x04, 0, 3); + +#define MLXSW_AFA_SAMPLER_RATE_MAX (BIT(24) - 1) + +/* afa_sampler_mirror_probability_rate + * Mirroring probability. + * Valid values are 1 to 2^24 - 1 + */ +MLXSW_ITEM32(afa, sampler, mirror_probability_rate, 0x08, 0, 24); + +static void mlxsw_afa_sampler_pack(char *payload, u8 mirror_agent, u32 rate) +{ + mlxsw_afa_sampler_mirror_agent_set(payload, mirror_agent); + mlxsw_afa_sampler_mirror_probability_rate_set(payload, rate); +} + +struct mlxsw_afa_sampler { + struct mlxsw_afa_resource resource; + int span_id; + u8 local_port; + bool ingress; +}; + +static void mlxsw_afa_sampler_destroy(struct mlxsw_afa_block *block, + struct mlxsw_afa_sampler *sampler) +{ + mlxsw_afa_resource_del(&sampler->resource); + block->afa->ops->sampler_del(block->afa->ops_priv, sampler->local_port, + sampler->span_id, sampler->ingress); + kfree(sampler); +} + +static void mlxsw_afa_sampler_destructor(struct mlxsw_afa_block *block, + struct mlxsw_afa_resource *resource) +{ + struct mlxsw_afa_sampler *sampler; + + sampler = container_of(resource, struct mlxsw_afa_sampler, resource); + mlxsw_afa_sampler_destroy(block, sampler); +} + +static struct mlxsw_afa_sampler * +mlxsw_afa_sampler_create(struct mlxsw_afa_block *block, u8 local_port, + struct psample_group *psample_group, u32 rate, + u32 trunc_size, bool truncate, bool ingress, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_sampler *sampler; + int err; + + sampler = kzalloc(sizeof(*sampler), GFP_KERNEL); + if (!sampler) + return ERR_PTR(-ENOMEM); + + err = block->afa->ops->sampler_add(block->afa->ops_priv, local_port, + psample_group, rate, trunc_size, + truncate, ingress, &sampler->span_id, + extack); + if (err) + goto err_sampler_add; + + sampler->ingress = ingress; + sampler->local_port = local_port; + sampler->resource.destructor = mlxsw_afa_sampler_destructor; + mlxsw_afa_resource_add(block, &sampler->resource); + return sampler; + +err_sampler_add: + kfree(sampler); + return ERR_PTR(err); +} + +static int +mlxsw_afa_block_append_allocated_sampler(struct mlxsw_afa_block *block, + u8 mirror_agent, u32 rate) +{ + char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_SAMPLER_CODE, + MLXSW_AFA_SAMPLER_SIZE); + + if (IS_ERR(act)) + return PTR_ERR(act); + mlxsw_afa_sampler_pack(act, mirror_agent, rate); + return 0; +} + +int mlxsw_afa_block_append_sampler(struct mlxsw_afa_block *block, u8 local_port, + struct psample_group *psample_group, + u32 rate, u32 trunc_size, bool truncate, + bool ingress, + struct netlink_ext_ack *extack) +{ + struct mlxsw_afa_sampler *sampler; + int err; + + if (rate > MLXSW_AFA_SAMPLER_RATE_MAX) { + NL_SET_ERR_MSG_MOD(extack, "Sampling rate is too high"); + return -EINVAL; + } + + sampler = mlxsw_afa_sampler_create(block, local_port, psample_group, + rate, trunc_size, truncate, ingress, + extack); + if (IS_ERR(sampler)) + return PTR_ERR(sampler); + + err = mlxsw_afa_block_append_allocated_sampler(block, sampler->span_id, + rate); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Cannot append sampler action"); + goto err_append_allocated_sampler; + } + + return 0; + +err_append_allocated_sampler: + mlxsw_afa_sampler_destroy(block, sampler); + return err; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_sampler); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index b652497b1002..b65bf98eb5ab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -30,6 +30,12 @@ struct mlxsw_afa_ops { u16 *p_policer_index, struct netlink_ext_ack *extack); void (*policer_del)(void *priv, u16 policer_index); + int (*sampler_add)(void *priv, u8 local_port, + struct psample_group *psample_group, u32 rate, + u32 trunc_size, bool truncate, bool ingress, + int *p_span_id, struct netlink_ext_ack *extack); + void (*sampler_del)(void *priv, u8 local_port, int span_id, + bool ingress); bool dummy_first_set; }; @@ -92,5 +98,10 @@ int mlxsw_afa_block_append_police(struct mlxsw_afa_block *block, u32 fa_index, u64 rate_bytes_ps, u32 burst, u16 *p_policer_index, struct netlink_ext_ack *extack); +int mlxsw_afa_block_append_sampler(struct mlxsw_afa_block *block, u8 local_port, + struct psample_group *psample_group, + u32 rate, u32 trunc_size, bool truncate, + bool ingress, + struct netlink_ext_ack *extack); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index bf85ce9835d7..dfea14399607 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -19,7 +19,6 @@ #define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */ #define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */ #define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ -#define MLXSW_THERMAL_ASIC_TEMP_CRIT 140000 /* 140C */ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) #define MLXSW_THERMAL_ZONE_MAX_NAME 16 @@ -45,7 +44,6 @@ enum mlxsw_thermal_trips { MLXSW_THERMAL_TEMP_TRIP_NORM, MLXSW_THERMAL_TEMP_TRIP_HIGH, MLXSW_THERMAL_TEMP_TRIP_HOT, - MLXSW_THERMAL_TEMP_TRIP_CRIT, }; struct mlxsw_thermal_trip { @@ -75,16 +73,9 @@ static const struct mlxsw_thermal_trip default_thermal_trips[] = { { /* Warning */ .type = THERMAL_TRIP_HOT, .temp = MLXSW_THERMAL_ASIC_TEMP_HOT, - .hyst = MLXSW_THERMAL_HYSTERESIS_TEMP, .min_state = MLXSW_THERMAL_MAX_STATE, .max_state = MLXSW_THERMAL_MAX_STATE, }, - { /* Critical - soft poweroff */ - .type = THERMAL_TRIP_CRITICAL, - .temp = MLXSW_THERMAL_ASIC_TEMP_CRIT, - .min_state = MLXSW_THERMAL_MAX_STATE, - .max_state = MLXSW_THERMAL_MAX_STATE, - } }; #define MLXSW_THERMAL_NUM_TRIPS ARRAY_SIZE(default_thermal_trips) @@ -141,7 +132,7 @@ static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal, /* Allow mlxsw thermal zone binding to an external cooling device */ for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) { if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i], - sizeof(cdev->type))) + strlen(cdev->type))) return 0; } @@ -154,7 +145,6 @@ mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz) tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0; - tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = 0; } static int @@ -183,11 +173,10 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, } /* According to the system thermal requirements, the thermal zones are - * defined with four trip points. The critical and emergency + * defined with three trip points. The critical and emergency * temperature thresholds, provided by QSFP module are set as "active" - * and "hot" trip points, "normal" and "critical" trip points are - * derived from "active" and "hot" by subtracting or adding double - * hysteresis value. + * and "hot" trip points, "normal" trip point is derived from "active" + * by subtracting double hysteresis value. */ if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT) tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp - @@ -196,8 +185,6 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp; - tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp + - MLXSW_THERMAL_MODULE_TEMP_SHIFT; return 0; } @@ -210,7 +197,7 @@ static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal, struct mlxsw_thermal_trip *trip = trips; unsigned int score, delta, i, shift = 1; - /* Calculate thermal zone score, if temperature is above the critical + /* Calculate thermal zone score, if temperature is above the hot * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX. */ score = MLXSW_THERMAL_TEMP_SCORE_MAX; @@ -333,8 +320,7 @@ static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev, { struct mlxsw_thermal *thermal = tzdev->devdata; - if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS || - temp > MLXSW_THERMAL_ASIC_TEMP_CRIT) + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) return -EINVAL; thermal->trips[trip].temp = temp; @@ -502,8 +488,7 @@ mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev, { struct mlxsw_thermal_module *tz = tzdev->devdata; - if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS || - temp > tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp) + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) return -EINVAL; tz->trips[trip].temp = temp; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index d0052537e627..8e8456811384 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -540,6 +540,55 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, spin_unlock(&q->lock); } +static void mlxsw_pci_cqe_rdq_md_tx_port_init(struct sk_buff *skb, + const char *cqe) +{ + struct mlxsw_skb_cb *cb = mlxsw_skb_cb(skb); + + if (mlxsw_pci_cqe2_tx_lag_get(cqe)) { + cb->rx_md_info.tx_port_is_lag = true; + cb->rx_md_info.tx_lag_id = mlxsw_pci_cqe2_tx_lag_id_get(cqe); + cb->rx_md_info.tx_lag_port_index = + mlxsw_pci_cqe2_tx_lag_subport_get(cqe); + } else { + cb->rx_md_info.tx_port_is_lag = false; + cb->rx_md_info.tx_sys_port = + mlxsw_pci_cqe2_tx_system_port_get(cqe); + } + + if (cb->rx_md_info.tx_sys_port != MLXSW_PCI_CQE2_TX_PORT_MULTI_PORT && + cb->rx_md_info.tx_sys_port != MLXSW_PCI_CQE2_TX_PORT_INVALID) + cb->rx_md_info.tx_port_valid = 1; + else + cb->rx_md_info.tx_port_valid = 0; +} + +static void mlxsw_pci_cqe_rdq_md_init(struct sk_buff *skb, const char *cqe) +{ + struct mlxsw_skb_cb *cb = mlxsw_skb_cb(skb); + + cb->rx_md_info.tx_congestion = mlxsw_pci_cqe2_mirror_cong_get(cqe); + if (cb->rx_md_info.tx_congestion != MLXSW_PCI_CQE2_MIRROR_CONG_INVALID) + cb->rx_md_info.tx_congestion_valid = 1; + else + cb->rx_md_info.tx_congestion_valid = 0; + cb->rx_md_info.tx_congestion <<= MLXSW_PCI_CQE2_MIRROR_CONG_SHIFT; + + cb->rx_md_info.latency = mlxsw_pci_cqe2_mirror_latency_get(cqe); + if (cb->rx_md_info.latency != MLXSW_PCI_CQE2_MIRROR_LATENCY_INVALID) + cb->rx_md_info.latency_valid = 1; + else + cb->rx_md_info.latency_valid = 0; + + cb->rx_md_info.tx_tc = mlxsw_pci_cqe2_mirror_tclass_get(cqe); + if (cb->rx_md_info.tx_tc != MLXSW_PCI_CQE2_MIRROR_TCLASS_INVALID) + cb->rx_md_info.tx_tc_valid = 1; + else + cb->rx_md_info.tx_tc_valid = 0; + + mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe); +} + static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q, u16 consumer_counter_limit, @@ -581,11 +630,15 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, if (mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) cookie_index = mlxsw_pci_cqe2_user_def_val_orig_pkt_len_get(cqe); - mlxsw_skb_cb(skb)->cookie_index = cookie_index; + mlxsw_skb_cb(skb)->rx_md_info.cookie_index = cookie_index; } else if (rx_info.trap_id >= MLXSW_TRAP_ID_MIRROR_SESSION0 && rx_info.trap_id <= MLXSW_TRAP_ID_MIRROR_SESSION7 && mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) { rx_info.mirror_reason = mlxsw_pci_cqe2_mirror_reason_get(cqe); + mlxsw_pci_cqe_rdq_md_init(skb, cqe); + } else if (rx_info.trap_id == MLXSW_TRAP_ID_PKT_SAMPLE && + mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) { + mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe); } byte_count = mlxsw_pci_cqe_byte_count_get(cqe); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index a2c1fbd3e0d1..7b531228d6c0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -173,6 +173,15 @@ MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16); */ MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14); +#define MLXSW_PCI_CQE2_MIRROR_CONG_INVALID 0xFFFF + +/* pci_cqe_mirror_cong_high + * Congestion level in units of 8KB of the egress traffic class of the original + * packet that does mirroring to the CPU. Value of 0xFFFF means that the + * congestion level is invalid. + */ +MLXSW_ITEM32(pci, cqe2, mirror_cong_high, 0x08, 16, 4); + /* pci_cqe_trap_id * Trap ID that captured the packet. */ @@ -208,6 +217,59 @@ MLXSW_ITEM32(pci, cqe0, dqn, 0x0C, 1, 5); MLXSW_ITEM32(pci, cqe12, dqn, 0x0C, 1, 6); mlxsw_pci_cqe_item_helpers(dqn, 0, 12, 12); +#define MLXSW_PCI_CQE2_MIRROR_TCLASS_INVALID 0x1F + +/* pci_cqe_mirror_tclass + * The egress traffic class of the original packet that does mirroring to the + * CPU. Value of 0x1F means that the traffic class is invalid. + */ +MLXSW_ITEM32(pci, cqe2, mirror_tclass, 0x10, 27, 5); + +/* pci_cqe_tx_lag + * The Tx port of a packet that is mirrored / sampled to the CPU is a LAG. + */ +MLXSW_ITEM32(pci, cqe2, tx_lag, 0x10, 24, 1); + +/* pci_cqe_tx_lag_subport + * The port index within the LAG of a packet that is mirrored / sampled to the + * CPU. Reserved when tx_lag is 0. + */ +MLXSW_ITEM32(pci, cqe2, tx_lag_subport, 0x10, 16, 8); + +#define MLXSW_PCI_CQE2_TX_PORT_MULTI_PORT 0xFFFE +#define MLXSW_PCI_CQE2_TX_PORT_INVALID 0xFFFF + +/* pci_cqe_tx_lag_id + * The Tx LAG ID of the original packet that is mirrored / sampled to the CPU. + * Value of 0xFFFE means multi-port. Value fo 0xFFFF means that the Tx LAG ID + * is invalid. Reserved when tx_lag is 0. + */ +MLXSW_ITEM32(pci, cqe2, tx_lag_id, 0x10, 0, 16); + +/* pci_cqe_tx_system_port + * The Tx port of the original packet that is mirrored / sampled to the CPU. + * Value of 0xFFFE means multi-port. Value fo 0xFFFF means that the Tx port is + * invalid. Reserved when tx_lag is 1. + */ +MLXSW_ITEM32(pci, cqe2, tx_system_port, 0x10, 0, 16); + +/* pci_cqe_mirror_cong_low + * Congestion level in units of 8KB of the egress traffic class of the original + * packet that does mirroring to the CPU. Value of 0xFFFF means that the + * congestion level is invalid. + */ +MLXSW_ITEM32(pci, cqe2, mirror_cong_low, 0x14, 20, 12); + +#define MLXSW_PCI_CQE2_MIRROR_CONG_SHIFT 13 /* Units of 8KB. */ + +static inline u16 mlxsw_pci_cqe2_mirror_cong_get(const char *cqe) +{ + u16 cong_high = mlxsw_pci_cqe2_mirror_cong_high_get(cqe); + u16 cong_low = mlxsw_pci_cqe2_mirror_cong_low_get(cqe); + + return cong_high << 12 | cong_low; +} + /* pci_cqe_user_def_val_orig_pkt_len * When trap_id is an ACL: User defined value from policy engine action. */ @@ -218,6 +280,15 @@ MLXSW_ITEM32(pci, cqe2, user_def_val_orig_pkt_len, 0x14, 0, 20); */ MLXSW_ITEM32(pci, cqe2, mirror_reason, 0x18, 24, 8); +#define MLXSW_PCI_CQE2_MIRROR_LATENCY_INVALID 0xFFFFFF + +/* pci_cqe_mirror_latency + * End-to-end latency of the original packet that does mirroring to the CPU. + * Value of 0xFFFFFF means that the latency is invalid. Units are according to + * MOGCR.mirror_latency_units. + */ +MLXSW_ITEM32(pci, cqe2, mirror_latency, 0x1C, 8, 24); + /* pci_cqe_owner * Ownership bit. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index c4adc7f740d3..900b4bf5bb5b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -842,6 +842,14 @@ MLXSW_ITEM32(reg, spvid, local_port, 0x00, 16, 8); */ MLXSW_ITEM32(reg, spvid, sub_port, 0x00, 8, 8); +/* reg_spvid_egr_et_set + * When VLAN is pushed at ingress (for untagged packets or for + * QinQ push mode) then the EtherType is decided at the egress port. + * Reserved when Spectrum-1. + * Access: RW + */ +MLXSW_ITEM32(reg, spvid, egr_et_set, 0x04, 24, 1); + /* reg_spvid_et_vlan * EtherType used for when VLAN is pushed at ingress (for untagged * packets or for QinQ push mode). @@ -849,6 +857,7 @@ MLXSW_ITEM32(reg, spvid, sub_port, 0x00, 8, 8); * 1: ether_type1 * 2: ether_type2 - Reserved when Spectrum-1, supported by Spectrum-2 * Ethertype IDs are configured by SVER. + * Reserved when egr_et_set = 1. * Access: RW */ MLXSW_ITEM32(reg, spvid, et_vlan, 0x04, 16, 2); @@ -2079,6 +2088,41 @@ static inline void mlxsw_reg_spvc_pack(char *payload, u8 local_port, bool et1, mlxsw_reg_spvc_et0_set(payload, et0); } +/* SPEVET - Switch Port Egress VLAN EtherType + * ------------------------------------------ + * The switch port egress VLAN EtherType configures which EtherType to push at + * egress for packets incoming through a local port for which 'SPVID.egr_et_set' + * is set. + */ +#define MLXSW_REG_SPEVET_ID 0x202A +#define MLXSW_REG_SPEVET_LEN 0x08 + +MLXSW_REG_DEFINE(spevet, MLXSW_REG_SPEVET_ID, MLXSW_REG_SPEVET_LEN); + +/* reg_spevet_local_port + * Egress Local port number. + * Not supported to CPU port. + * Access: Index + */ +MLXSW_ITEM32(reg, spevet, local_port, 0x00, 16, 8); + +/* reg_spevet_et_vlan + * Egress EtherType VLAN to push when SPVID.egr_et_set field set for the packet: + * 0: ether_type0 - (default) + * 1: ether_type1 + * 2: ether_type2 + * Access: RW + */ +MLXSW_ITEM32(reg, spevet, et_vlan, 0x04, 16, 2); + +static inline void mlxsw_reg_spevet_pack(char *payload, u8 local_port, + u8 et_vlan) +{ + MLXSW_REG_ZERO(spevet, payload); + mlxsw_reg_spevet_local_port_set(payload, local_port); + mlxsw_reg_spevet_et_vlan_set(payload, et_vlan); +} + /* CWTP - Congetion WRED ECN TClass Profile * ---------------------------------------- * Configures the profiles for queues of egress port and traffic class @@ -5637,7 +5681,7 @@ static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u8 local_port) MLXSW_REG_DEFINE(pmaos, MLXSW_REG_PMAOS_ID, MLXSW_REG_PMAOS_LEN); -/* reg_slot_index +/* reg_pmaos_slot_index * Slot index. * Access: Index */ @@ -8086,6 +8130,60 @@ mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif, mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key); } +/* RATRAD - Router Adjacency Table Activity Dump Register + * ------------------------------------------------------ + * The RATRAD register is used to dump and optionally clear activity bits of + * router adjacency table entries. + */ +#define MLXSW_REG_RATRAD_ID 0x8022 +#define MLXSW_REG_RATRAD_LEN 0x210 + +MLXSW_REG_DEFINE(ratrad, MLXSW_REG_RATRAD_ID, MLXSW_REG_RATRAD_LEN); + +enum { + /* Read activity */ + MLXSW_REG_RATRAD_OP_READ_ACTIVITY, + /* Read and clear activity */ + MLXSW_REG_RATRAD_OP_READ_CLEAR_ACTIVITY, +}; + +/* reg_ratrad_op + * Access: Operation + */ +MLXSW_ITEM32(reg, ratrad, op, 0x00, 30, 2); + +/* reg_ratrad_ecmp_size + * ecmp_size is the amount of sequential entries from adjacency_index. Valid + * ranges: + * Spectrum-1: 32-64, 512, 1024, 2048, 4096 + * Spectrum-2/3: 32-128, 256, 512, 1024, 2048, 4096 + * Access: Index + */ +MLXSW_ITEM32(reg, ratrad, ecmp_size, 0x00, 0, 13); + +/* reg_ratrad_adjacency_index + * Index into the adjacency table. + * Access: Index + */ +MLXSW_ITEM32(reg, ratrad, adjacency_index, 0x04, 0, 24); + +/* reg_ratrad_activity_vector + * Activity bit per adjacency index. + * Bits higher than ecmp_size are reserved. + * Access: RO + */ +MLXSW_ITEM_BIT_ARRAY(reg, ratrad, activity_vector, 0x10, 0x200, 1); + +static inline void mlxsw_reg_ratrad_pack(char *payload, u32 adjacency_index, + u16 ecmp_size) +{ + MLXSW_REG_ZERO(ratrad, payload); + mlxsw_reg_ratrad_op_set(payload, + MLXSW_REG_RATRAD_OP_READ_CLEAR_ACTIVITY); + mlxsw_reg_ratrad_ecmp_size_set(payload, ecmp_size); + mlxsw_reg_ratrad_adjacency_index_set(payload, adjacency_index); +} + /* RIGR-V2 - Router Interface Group Register Version 2 * --------------------------------------------------- * The RIGR_V2 register is used to add, remove and query egress interface list @@ -9925,15 +10023,28 @@ MLXSW_ITEM32(reg, mpar, enable, 0x04, 31, 1); */ MLXSW_ITEM32(reg, mpar, pa_id, 0x04, 0, 4); +#define MLXSW_REG_MPAR_RATE_MAX 3500000000UL + +/* reg_mpar_probability_rate + * Sampling rate. + * Valid values are: 1 to 3.5*10^9 + * Value of 1 means "sample all". Default is 1. + * Reserved when Spectrum-1. + * Access: RW + */ +MLXSW_ITEM32(reg, mpar, probability_rate, 0x08, 0, 32); + static inline void mlxsw_reg_mpar_pack(char *payload, u8 local_port, enum mlxsw_reg_mpar_i_e i_e, - bool enable, u8 pa_id) + bool enable, u8 pa_id, + u32 probability_rate) { MLXSW_REG_ZERO(mpar, payload); mlxsw_reg_mpar_local_port_set(payload, local_port); mlxsw_reg_mpar_enable_set(payload, enable); mlxsw_reg_mpar_i_e_set(payload, i_e); mlxsw_reg_mpar_pa_id_set(payload, pa_id); + mlxsw_reg_mpar_probability_rate_set(payload, probability_rate); } /* MGIR - Management General Information Register @@ -10577,6 +10688,8 @@ MLXSW_ITEM32(reg, mpagr, trigger, 0x00, 0, 4); */ MLXSW_ITEM32(reg, mpagr, pa_id, 0x04, 0, 4); +#define MLXSW_REG_MPAGR_RATE_MAX 3500000000UL + /* reg_mpagr_probability_rate * Sampling rate. * Valid values are: 1 to 3.5*10^9 @@ -10919,7 +11032,7 @@ MLXSW_REG_DEFINE(mfde, MLXSW_REG_MFDE_ID, MLXSW_REG_MFDE_LEN); * Which irisc triggered the event * Access: RO */ -MLXSW_ITEM32(reg, mfde, irisc_id, 0x00, 8, 4); +MLXSW_ITEM32(reg, mfde, irisc_id, 0x00, 24, 8); enum mlxsw_reg_mfde_event_id { MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO = 1, @@ -10930,7 +11043,7 @@ enum mlxsw_reg_mfde_event_id { /* reg_mfde_event_id * Access: RO */ -MLXSW_ITEM32(reg, mfde, event_id, 0x00, 0, 8); +MLXSW_ITEM32(reg, mfde, event_id, 0x00, 0, 16); enum mlxsw_reg_mfde_method { MLXSW_REG_MFDE_METHOD_QUERY, @@ -10979,6 +11092,13 @@ MLXSW_ITEM32(reg, mfde, log_address, 0x10, 0, 32); */ MLXSW_ITEM32(reg, mfde, log_id, 0x14, 0, 4); +/* reg_mfde_log_ip + * IP (instruction pointer) that triggered the timeout. + * Valid in case event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO + * Access: RO + */ +MLXSW_ITEM64(reg, mfde, log_ip, 0x18, 0, 64); + /* reg_mfde_pipes_mask * Bit per kvh pipe. * Access: RO @@ -11995,6 +12115,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(sfmr), MLXSW_REG(spvmlr), MLXSW_REG(spvc), + MLXSW_REG(spevet), MLXSW_REG(cwtp), MLXSW_REG(cwtpm), MLXSW_REG(pgcr), @@ -12047,6 +12168,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rtar), MLXSW_REG(ratr), MLXSW_REG(rtdp), + MLXSW_REG(ratrad), MLXSW_REG(rdpm), MLXSW_REG(ricnt), MLXSW_REG(rrcr), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1650d9852b5b..bca0354482cb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -23,6 +23,8 @@ #include <linux/netlink.h> #include <linux/jhash.h> #include <linux/log2.h> +#include <linux/refcount.h> +#include <linux/rhashtable.h> #include <net/switchdev.h> #include <net/pkt_cls.h> #include <net/netevent.h> @@ -45,7 +47,7 @@ #define MLXSW_SP1_FWREV_MAJOR 13 #define MLXSW_SP1_FWREV_MINOR 2008 -#define MLXSW_SP1_FWREV_SUBMINOR 2018 +#define MLXSW_SP1_FWREV_SUBMINOR 2406 #define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702 static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { @@ -62,7 +64,7 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = { #define MLXSW_SP2_FWREV_MAJOR 29 #define MLXSW_SP2_FWREV_MINOR 2008 -#define MLXSW_SP2_FWREV_SUBMINOR 2018 +#define MLXSW_SP2_FWREV_SUBMINOR 2406 static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = { .major = MLXSW_SP2_FWREV_MAJOR, @@ -77,7 +79,7 @@ static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = { #define MLXSW_SP3_FWREV_MAJOR 30 #define MLXSW_SP3_FWREV_MINOR 2008 -#define MLXSW_SP3_FWREV_SUBMINOR 2018 +#define MLXSW_SP3_FWREV_SUBMINOR 2406 static const struct mlxsw_fw_rev mlxsw_sp3_fw_rev = { .major = MLXSW_SP3_FWREV_MAJOR, @@ -400,6 +402,22 @@ int mlxsw_sp_ethtype_to_sver_type(u16 ethtype, u8 *p_sver_type) return 0; } +int mlxsw_sp_port_egress_ethtype_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 ethtype) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char spevet_pl[MLXSW_REG_SPEVET_LEN]; + u8 sver_type; + int err; + + err = mlxsw_sp_ethtype_to_sver_type(ethtype, &sver_type); + if (err) + return err; + + mlxsw_reg_spevet_pack(spevet_pl, mlxsw_sp_port->local_port, sver_type); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spevet), spevet_pl); +} + static int __mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, u16 ethtype) { @@ -2212,32 +2230,6 @@ void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port); } -void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, - u8 local_port) -{ - struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; - struct mlxsw_sp_port_sample *sample; - u32 size; - - if (unlikely(!mlxsw_sp_port)) { - dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: sample skb received for non-existent port\n", - local_port); - goto out; - } - - rcu_read_lock(); - sample = rcu_dereference(mlxsw_sp_port->sample); - if (!sample) - goto out_unlock; - size = sample->truncate ? sample->trunc_size : skb->len; - psample_sample_packet(sample->psample_group, skb, size, - mlxsw_sp_port->dev->ifindex, 0, sample->rate); -out_unlock: - rcu_read_unlock(); -out: - consume_skb(skb); -} - #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) @@ -2576,6 +2568,147 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { .get_stats = mlxsw_sp2_get_stats, }; +struct mlxsw_sp_sample_trigger_node { + struct mlxsw_sp_sample_trigger trigger; + struct mlxsw_sp_sample_params params; + struct rhash_head ht_node; + struct rcu_head rcu; + refcount_t refcount; +}; + +static const struct rhashtable_params mlxsw_sp_sample_trigger_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_sample_trigger_node, trigger), + .head_offset = offsetof(struct mlxsw_sp_sample_trigger_node, ht_node), + .key_len = sizeof(struct mlxsw_sp_sample_trigger), + .automatic_shrinking = true, +}; + +static void +mlxsw_sp_sample_trigger_key_init(struct mlxsw_sp_sample_trigger *key, + const struct mlxsw_sp_sample_trigger *trigger) +{ + memset(key, 0, sizeof(*key)); + key->type = trigger->type; + key->local_port = trigger->local_port; +} + +/* RCU read lock must be held */ +struct mlxsw_sp_sample_params * +mlxsw_sp_sample_trigger_params_lookup(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger) +{ + struct mlxsw_sp_sample_trigger_node *trigger_node; + struct mlxsw_sp_sample_trigger key; + + mlxsw_sp_sample_trigger_key_init(&key, trigger); + trigger_node = rhashtable_lookup(&mlxsw_sp->sample_trigger_ht, &key, + mlxsw_sp_sample_trigger_ht_params); + if (!trigger_node) + return NULL; + + return &trigger_node->params; +} + +static int +mlxsw_sp_sample_trigger_node_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger, + const struct mlxsw_sp_sample_params *params) +{ + struct mlxsw_sp_sample_trigger_node *trigger_node; + int err; + + trigger_node = kzalloc(sizeof(*trigger_node), GFP_KERNEL); + if (!trigger_node) + return -ENOMEM; + + trigger_node->trigger = *trigger; + trigger_node->params = *params; + refcount_set(&trigger_node->refcount, 1); + + err = rhashtable_insert_fast(&mlxsw_sp->sample_trigger_ht, + &trigger_node->ht_node, + mlxsw_sp_sample_trigger_ht_params); + if (err) + goto err_rhashtable_insert; + + return 0; + +err_rhashtable_insert: + kfree(trigger_node); + return err; +} + +static void +mlxsw_sp_sample_trigger_node_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_sample_trigger_node *trigger_node) +{ + rhashtable_remove_fast(&mlxsw_sp->sample_trigger_ht, + &trigger_node->ht_node, + mlxsw_sp_sample_trigger_ht_params); + kfree_rcu(trigger_node, rcu); +} + +int +mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger, + const struct mlxsw_sp_sample_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_sample_trigger_node *trigger_node; + struct mlxsw_sp_sample_trigger key; + + ASSERT_RTNL(); + + mlxsw_sp_sample_trigger_key_init(&key, trigger); + + trigger_node = rhashtable_lookup_fast(&mlxsw_sp->sample_trigger_ht, + &key, + mlxsw_sp_sample_trigger_ht_params); + if (!trigger_node) + return mlxsw_sp_sample_trigger_node_init(mlxsw_sp, &key, + params); + + if (trigger_node->trigger.local_port) { + NL_SET_ERR_MSG_MOD(extack, "Sampling already enabled on port"); + return -EINVAL; + } + + if (trigger_node->params.psample_group != params->psample_group || + trigger_node->params.truncate != params->truncate || + trigger_node->params.rate != params->rate || + trigger_node->params.trunc_size != params->trunc_size) { + NL_SET_ERR_MSG_MOD(extack, "Sampling parameters do not match for an existing sampling trigger"); + return -EINVAL; + } + + refcount_inc(&trigger_node->refcount); + + return 0; +} + +void +mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger) +{ + struct mlxsw_sp_sample_trigger_node *trigger_node; + struct mlxsw_sp_sample_trigger key; + + ASSERT_RTNL(); + + mlxsw_sp_sample_trigger_key_init(&key, trigger); + + trigger_node = rhashtable_lookup_fast(&mlxsw_sp->sample_trigger_ht, + &key, + mlxsw_sp_sample_trigger_ht_params); + if (!trigger_node) + return; + + if (!refcount_dec_and_test(&trigger_node->refcount)) + return; + + mlxsw_sp_sample_trigger_node_fini(mlxsw_sp, trigger_node); +} + static int mlxsw_sp_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr); @@ -2730,6 +2863,13 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_port_module_info_init; } + err = rhashtable_init(&mlxsw_sp->sample_trigger_ht, + &mlxsw_sp_sample_trigger_ht_params); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init sampling trigger hashtable\n"); + goto err_sample_trigger_init; + } + err = mlxsw_sp_ports_create(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); @@ -2739,6 +2879,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, return 0; err_ports_create: + rhashtable_destroy(&mlxsw_sp->sample_trigger_ht); +err_sample_trigger_init: mlxsw_sp_port_module_info_fini(mlxsw_sp); err_port_module_info_init: mlxsw_sp_dpipe_fini(mlxsw_sp); @@ -2788,6 +2930,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + mlxsw_sp->switchdev_ops = &mlxsw_sp1_switchdev_ops; mlxsw_sp->kvdl_ops = &mlxsw_sp1_kvdl_ops; mlxsw_sp->afa_ops = &mlxsw_sp1_act_afa_ops; mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops; @@ -2796,7 +2939,6 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops; mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr; mlxsw_sp->mac_mask = mlxsw_sp1_mac_mask; - mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr; mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals; mlxsw_sp->sb_ops = &mlxsw_sp1_sb_ops; mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops; @@ -2804,6 +2946,8 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->span_ops = &mlxsw_sp1_span_ops; mlxsw_sp->policer_core_ops = &mlxsw_sp1_policer_core_ops; mlxsw_sp->trap_ops = &mlxsw_sp1_trap_ops; + mlxsw_sp->mall_ops = &mlxsw_sp1_mall_ops; + mlxsw_sp->router_ops = &mlxsw_sp1_router_ops; mlxsw_sp->listeners = mlxsw_sp1_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener); mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1; @@ -2817,6 +2961,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + mlxsw_sp->switchdev_ops = &mlxsw_sp2_switchdev_ops; mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops; mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops; mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops; @@ -2825,7 +2970,6 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; - mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr; mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; mlxsw_sp->sb_ops = &mlxsw_sp2_sb_ops; mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; @@ -2833,6 +2977,8 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->span_ops = &mlxsw_sp2_span_ops; mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops; mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops; + mlxsw_sp->mall_ops = &mlxsw_sp2_mall_ops; + mlxsw_sp->router_ops = &mlxsw_sp2_router_ops; mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); @@ -2844,6 +2990,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + mlxsw_sp->switchdev_ops = &mlxsw_sp2_switchdev_ops; mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops; mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops; mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops; @@ -2852,7 +2999,6 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; - mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr; mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; mlxsw_sp->sb_ops = &mlxsw_sp3_sb_ops; mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; @@ -2860,6 +3006,8 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->span_ops = &mlxsw_sp3_span_ops; mlxsw_sp->policer_core_ops = &mlxsw_sp2_policer_core_ops; mlxsw_sp->trap_ops = &mlxsw_sp2_trap_ops; + mlxsw_sp->mall_ops = &mlxsw_sp2_mall_ops; + mlxsw_sp->router_ops = &mlxsw_sp2_router_ops; mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); @@ -2870,6 +3018,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); mlxsw_sp_ports_remove(mlxsw_sp); + rhashtable_destroy(&mlxsw_sp->sample_trigger_ht); mlxsw_sp_port_module_info_fini(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), @@ -4283,7 +4432,7 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, if (br_vlan_enabled(br_dev)) { br_vlan_get_proto(br_dev, &proto); if (proto == ETH_P_8021AD) { - NL_SET_ERR_MSG_MOD(extack, "Uppers are not supported on top of an 802.1ad bridge"); + NL_SET_ERR_MSG_MOD(extack, "Upper devices are not supported on top of an 802.1ad bridge"); return -EOPNOTSUPP; } } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index d9d9e1f488f9..f99db88ee884 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -16,11 +16,13 @@ #include <linux/in6.h> #include <linux/notifier.h> #include <linux/net_namespace.h> +#include <linux/spinlock.h> #include <net/psample.h> #include <net/pkt_cls.h> #include <net/red.h> #include <net/vxlan.h> #include <net/flow_offload.h> +#include <net/inet_ecn.h> #include "port.h" #include "core.h" @@ -86,10 +88,15 @@ enum mlxsw_sp_rif_type { MLXSW_SP_RIF_TYPE_MAX, }; -struct mlxsw_sp_rif_ops; +struct mlxsw_sp_router_ops; -extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[]; -extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[]; +extern const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops; +extern const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops; + +struct mlxsw_sp_switchdev_ops; + +extern const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops; +extern const struct mlxsw_sp_switchdev_ops mlxsw_sp2_switchdev_ops; enum mlxsw_sp_fid_type { MLXSW_SP_FID_TYPE_8021Q, @@ -133,6 +140,7 @@ struct mlxsw_sp_ptp_state; struct mlxsw_sp_ptp_ops; struct mlxsw_sp_span_ops; struct mlxsw_sp_qdisc_state; +struct mlxsw_sp_mall_entry; struct mlxsw_sp_port_mapping { u8 module; @@ -148,6 +156,7 @@ struct mlxsw_sp { const unsigned char *mac_mask; struct mlxsw_sp_upper *lags; struct mlxsw_sp_port_mapping **port_mapping; + struct rhashtable sample_trigger_ht; struct mlxsw_sp_sb *sb; struct mlxsw_sp_bridge *bridge; struct mlxsw_sp_router *router; @@ -164,6 +173,7 @@ struct mlxsw_sp { struct mlxsw_sp_counter_pool *counter_pool; struct mlxsw_sp_span *span; struct mlxsw_sp_trap *trap; + const struct mlxsw_sp_switchdev_ops *switchdev_ops; const struct mlxsw_sp_kvdl_ops *kvdl_ops; const struct mlxsw_afa_ops *afa_ops; const struct mlxsw_afk_ops *afk_ops; @@ -171,7 +181,6 @@ struct mlxsw_sp { const struct mlxsw_sp_acl_rulei_ops *acl_rulei_ops; const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; const struct mlxsw_sp_nve_ops **nve_ops_arr; - const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_sb_vals *sb_vals; const struct mlxsw_sp_sb_ops *sb_ops; const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; @@ -179,6 +188,8 @@ struct mlxsw_sp { const struct mlxsw_sp_span_ops *span_ops; const struct mlxsw_sp_policer_core_ops *policer_core_ops; const struct mlxsw_sp_trap_ops *trap_ops; + const struct mlxsw_sp_mall_ops *mall_ops; + const struct mlxsw_sp_router_ops *router_ops; const struct mlxsw_listener *listeners; size_t listeners_count; u32 lowest_shaper_bs; @@ -232,7 +243,18 @@ struct mlxsw_sp_port_pcpu_stats { u32 tx_dropped; }; -struct mlxsw_sp_port_sample { +enum mlxsw_sp_sample_trigger_type { + MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS, + MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS, + MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE, +}; + +struct mlxsw_sp_sample_trigger { + enum mlxsw_sp_sample_trigger_type type; + u8 local_port; /* Reserved when trigger type is not ingress / egress. */ +}; + +struct mlxsw_sp_sample_params { struct psample_group *psample_group; u32 trunc_size; u32 rate; @@ -302,7 +324,6 @@ struct mlxsw_sp_port { struct mlxsw_sp_port_xstats xstats; struct delayed_work update_dw; } periodic_hw_stats; - struct mlxsw_sp_port_sample __rcu *sample; struct list_head vlans_list; struct mlxsw_sp_port_vlan *default_vlan; struct mlxsw_sp_qdisc_state *qdisc; @@ -347,6 +368,20 @@ struct mlxsw_sp_port_type_speed_ops { u32 (*ptys_proto_cap_masked_get)(u32 eth_proto_cap); }; +static inline u8 mlxsw_sp_tunnel_ecn_decap(u8 outer_ecn, u8 inner_ecn, + bool *trap_en) +{ + bool set_ce = false; + + *trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); + if (set_ce) + return INET_ECN_CE; + else if (outer_ecn == INET_ECN_ECT_1 && inner_ecn == INET_ECN_ECT_0) + return INET_ECN_ECT_1; + else + return inner_ecn; +} + static inline struct net_device * mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev) { @@ -531,6 +566,17 @@ void mlxsw_sp_hdroom_bufs_reset_sizes(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_hdroom *hdroom); int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port, const struct mlxsw_sp_hdroom *hdroom); +struct mlxsw_sp_sample_params * +mlxsw_sp_sample_trigger_params_lookup(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger); +int +mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger, + const struct mlxsw_sp_sample_params *params, + struct netlink_ext_ack *extack); +void +mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sample_trigger *trigger); extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals; extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals; @@ -568,8 +614,6 @@ void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, u8 local_port, void *priv); void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, u8 local_port); -void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, - u8 local_port); int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed); int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, @@ -586,6 +630,8 @@ int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable); int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, bool learn_enable); int mlxsw_sp_ethtype_to_sver_type(u16 ethtype, u8 *p_sver_type); +int mlxsw_sp_port_egress_ethtype_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 ethtype); int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, u16 ethtype); struct mlxsw_sp_port_vlan * @@ -924,6 +970,12 @@ int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, u16 fid, struct netlink_ext_ack *extack); +int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_flow_block *block, + struct psample_group *psample_group, u32 rate, + u32 trunc_size, bool truncate, + struct netlink_ext_ack *extack); struct mlxsw_sp_acl_rule; @@ -1033,6 +1085,19 @@ extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops; extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops; /* spectrum_matchall.c */ +struct mlxsw_sp_mall_ops { + int (*sample_add)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack); + void (*sample_del)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry); +}; + +extern const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops; +extern const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops; + enum mlxsw_sp_mall_action_type { MLXSW_SP_MALL_ACTION_TYPE_MIRROR, MLXSW_SP_MALL_ACTION_TYPE_SAMPLE, @@ -1048,6 +1113,11 @@ struct mlxsw_sp_mall_trap_entry { int span_id; }; +struct mlxsw_sp_mall_sample_entry { + struct mlxsw_sp_sample_params params; + int span_id; /* Relevant for Spectrum-2 onwards. */ +}; + struct mlxsw_sp_mall_entry { struct list_head list; unsigned long cookie; @@ -1057,7 +1127,7 @@ struct mlxsw_sp_mall_entry { union { struct mlxsw_sp_mall_mirror_entry mirror; struct mlxsw_sp_mall_trap_entry trap; - struct mlxsw_sp_port_sample sample; + struct mlxsw_sp_mall_sample_entry sample; }; struct rcu_head rcu; }; @@ -1068,7 +1138,8 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block, struct tc_cls_matchall_offload *f); int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block, - struct mlxsw_sp_port *mlxsw_sp_port); + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack); void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block, struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 8cfa03a75374..67cedfa76f78 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -688,6 +688,31 @@ int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, return mlxsw_afa_block_append_fid_set(rulei->act_block, fid, extack); } +int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct mlxsw_sp_flow_block *block, + struct psample_group *psample_group, u32 rate, + u32 trunc_size, bool truncate, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_flow_block_binding *binding; + struct mlxsw_sp_port *mlxsw_sp_port; + + if (!list_is_singular(&block->binding_list)) { + NL_SET_ERR_MSG_MOD(extack, "Only a single sampling source is allowed"); + return -EOPNOTSUPP; + } + binding = list_first_entry(&block->binding_list, + struct mlxsw_sp_flow_block_binding, list); + mlxsw_sp_port = binding->mlxsw_sp_port; + + return mlxsw_afa_block_append_sampler(rulei->act_block, + mlxsw_sp_port->local_port, + psample_group, rate, trunc_size, + truncate, binding->ingress, + extack); +} + struct mlxsw_sp_acl_rule * mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c index 90372d1c28d4..c72aa38424dc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c @@ -192,6 +192,22 @@ static void mlxsw_sp_act_policer_del(void *priv, u16 policer_index) policer_index); } +static int mlxsw_sp1_act_sampler_add(void *priv, u8 local_port, + struct psample_group *psample_group, + u32 rate, u32 trunc_size, bool truncate, + bool ingress, int *p_span_id, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG_MOD(extack, "Sampling action is not supported on Spectrum-1"); + return -EOPNOTSUPP; +} + +static void mlxsw_sp1_act_sampler_del(void *priv, u8 local_port, int span_id, + bool ingress) +{ + WARN_ON_ONCE(1); +} + const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops = { .kvdl_set_add = mlxsw_sp1_act_kvdl_set_add, .kvdl_set_del = mlxsw_sp_act_kvdl_set_del, @@ -204,8 +220,73 @@ const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops = { .mirror_del = mlxsw_sp_act_mirror_del, .policer_add = mlxsw_sp_act_policer_add, .policer_del = mlxsw_sp_act_policer_del, + .sampler_add = mlxsw_sp1_act_sampler_add, + .sampler_del = mlxsw_sp1_act_sampler_del, }; +static int mlxsw_sp2_act_sampler_add(void *priv, u8 local_port, + struct psample_group *psample_group, + u32 rate, u32 trunc_size, bool truncate, + bool ingress, int *p_span_id, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_span_agent_parms agent_parms = { + .session_id = MLXSW_SP_SPAN_SESSION_ID_SAMPLING, + }; + struct mlxsw_sp_sample_trigger trigger = { + .type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE, + }; + struct mlxsw_sp_sample_params params; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp = priv; + int err; + + params.psample_group = psample_group; + params.trunc_size = trunc_size; + params.rate = rate; + params.truncate = truncate; + err = mlxsw_sp_sample_trigger_params_set(mlxsw_sp, &trigger, ¶ms, + extack); + if (err) + return err; + + err = mlxsw_sp_span_agent_get(mlxsw_sp, p_span_id, &agent_parms); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to get SPAN agent"); + goto err_span_agent_get; + } + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, ingress); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to get analyzed port"); + goto err_analyzed_port_get; + } + + return 0; + +err_analyzed_port_get: + mlxsw_sp_span_agent_put(mlxsw_sp, *p_span_id); +err_span_agent_get: + mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger); + return err; +} + +static void mlxsw_sp2_act_sampler_del(void *priv, u8 local_port, int span_id, + bool ingress) +{ + struct mlxsw_sp_sample_trigger trigger = { + .type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE, + }; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, ingress); + mlxsw_sp_span_agent_put(mlxsw_sp, span_id); + mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger); +} + const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops = { .kvdl_set_add = mlxsw_sp2_act_kvdl_set_add, .kvdl_set_del = mlxsw_sp_act_kvdl_set_del, @@ -218,6 +299,8 @@ const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops = { .mirror_del = mlxsw_sp_act_mirror_del, .policer_add = mlxsw_sp_act_policer_add, .policer_del = mlxsw_sp_act_policer_del, + .sampler_add = mlxsw_sp2_act_sampler_add, + .sampler_del = mlxsw_sp2_act_sampler_del, .dummy_first_set = true, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index ed81d4fa48ac..1a2fef2a5379 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -912,9 +912,8 @@ static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp) u64 size = 0; mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) - if (mlxsw_sp_nexthop_offload(nh) && - !mlxsw_sp_nexthop_group_has_ipip(nh) && - !mlxsw_sp_nexthop_is_discard(nh)) + if (mlxsw_sp_nexthop_is_forward(nh) && + !mlxsw_sp_nexthop_group_has_ipip(nh)) size++; return size; } @@ -1105,9 +1104,8 @@ start_again: nh_skip = nh_count; nh_count = 0; mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { - if (!mlxsw_sp_nexthop_offload(nh) || - mlxsw_sp_nexthop_group_has_ipip(nh) || - mlxsw_sp_nexthop_is_discard(nh)) + if (!mlxsw_sp_nexthop_is_forward(nh) || + mlxsw_sp_nexthop_group_has_ipip(nh)) continue; if (nh_count < nh_skip) @@ -1180,6 +1178,7 @@ out: static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) { + char ratr_pl[MLXSW_REG_RATR_LEN]; struct mlxsw_sp *mlxsw_sp = priv; struct mlxsw_sp_nexthop *nh; u32 adj_hash_index = 0; @@ -1187,9 +1186,8 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) u32 adj_size = 0; mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { - if (!mlxsw_sp_nexthop_offload(nh) || - mlxsw_sp_nexthop_group_has_ipip(nh) || - mlxsw_sp_nexthop_is_discard(nh)) + if (!mlxsw_sp_nexthop_is_forward(nh) || + mlxsw_sp_nexthop_group_has_ipip(nh)) continue; mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size, @@ -1198,8 +1196,9 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); else mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); - mlxsw_sp_nexthop_update(mlxsw_sp, - adj_index + adj_hash_index, nh); + mlxsw_sp_nexthop_eth_update(mlxsw_sp, + adj_index + adj_hash_index, nh, + true, ratr_pl); } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 0bd64169bf81..c8061beed6db 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -1059,6 +1059,131 @@ mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info) return mlxsw_sp->ptp_ops->get_ts_info(mlxsw_sp, info); } +static void +mlxsw_sp_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + + if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl)) + return; + + phy_stats->SymbolErrorDuringCarrier = + mlxsw_reg_ppcnt_a_symbol_error_during_carrier_get(ppcnt_pl); +} + +static void +mlxsw_sp_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + + if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl)) + return; + + mac_stats->FramesTransmittedOK = + mlxsw_reg_ppcnt_a_frames_transmitted_ok_get(ppcnt_pl); + mac_stats->FramesReceivedOK = + mlxsw_reg_ppcnt_a_frames_received_ok_get(ppcnt_pl); + mac_stats->FrameCheckSequenceErrors = + mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get(ppcnt_pl); + mac_stats->AlignmentErrors = + mlxsw_reg_ppcnt_a_alignment_errors_get(ppcnt_pl); + mac_stats->OctetsTransmittedOK = + mlxsw_reg_ppcnt_a_octets_transmitted_ok_get(ppcnt_pl); + mac_stats->OctetsReceivedOK = + mlxsw_reg_ppcnt_a_octets_received_ok_get(ppcnt_pl); + mac_stats->MulticastFramesXmittedOK = + mlxsw_reg_ppcnt_a_multicast_frames_xmitted_ok_get(ppcnt_pl); + mac_stats->BroadcastFramesXmittedOK = + mlxsw_reg_ppcnt_a_broadcast_frames_xmitted_ok_get(ppcnt_pl); + mac_stats->MulticastFramesReceivedOK = + mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get(ppcnt_pl); + mac_stats->BroadcastFramesReceivedOK = + mlxsw_reg_ppcnt_a_broadcast_frames_received_ok_get(ppcnt_pl); + mac_stats->InRangeLengthErrors = + mlxsw_reg_ppcnt_a_in_range_length_errors_get(ppcnt_pl); + mac_stats->OutOfRangeLengthField = + mlxsw_reg_ppcnt_a_out_of_range_length_field_get(ppcnt_pl); + mac_stats->FrameTooLongErrors = + mlxsw_reg_ppcnt_a_frame_too_long_errors_get(ppcnt_pl); +} + +static void +mlxsw_sp_get_eth_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + + if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl)) + return; + + ctrl_stats->MACControlFramesTransmitted = + mlxsw_reg_ppcnt_a_mac_control_frames_transmitted_get(ppcnt_pl); + ctrl_stats->MACControlFramesReceived = + mlxsw_reg_ppcnt_a_mac_control_frames_received_get(ppcnt_pl); + ctrl_stats->UnsupportedOpcodesReceived = + mlxsw_reg_ppcnt_a_unsupported_opcodes_received_get(ppcnt_pl); +} + +static const struct ethtool_rmon_hist_range mlxsw_rmon_ranges[] = { + { 0, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 2047 }, + { 2048, 4095 }, + { 4096, 8191 }, + { 8192, 10239 }, + {} +}; + +static void +mlxsw_sp_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon, + const struct ethtool_rmon_hist_range **ranges) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + + if (mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_RFC_2819_CNT, + 0, ppcnt_pl)) + return; + + rmon->undersize_pkts = + mlxsw_reg_ppcnt_ether_stats_undersize_pkts_get(ppcnt_pl); + rmon->oversize_pkts = + mlxsw_reg_ppcnt_ether_stats_oversize_pkts_get(ppcnt_pl); + rmon->fragments = + mlxsw_reg_ppcnt_ether_stats_fragments_get(ppcnt_pl); + + rmon->hist[0] = mlxsw_reg_ppcnt_ether_stats_pkts64octets_get(ppcnt_pl); + rmon->hist[1] = + mlxsw_reg_ppcnt_ether_stats_pkts65to127octets_get(ppcnt_pl); + rmon->hist[2] = + mlxsw_reg_ppcnt_ether_stats_pkts128to255octets_get(ppcnt_pl); + rmon->hist[3] = + mlxsw_reg_ppcnt_ether_stats_pkts256to511octets_get(ppcnt_pl); + rmon->hist[4] = + mlxsw_reg_ppcnt_ether_stats_pkts512to1023octets_get(ppcnt_pl); + rmon->hist[5] = + mlxsw_reg_ppcnt_ether_stats_pkts1024to1518octets_get(ppcnt_pl); + rmon->hist[6] = + mlxsw_reg_ppcnt_ether_stats_pkts1519to2047octets_get(ppcnt_pl); + rmon->hist[7] = + mlxsw_reg_ppcnt_ether_stats_pkts2048to4095octets_get(ppcnt_pl); + rmon->hist[8] = + mlxsw_reg_ppcnt_ether_stats_pkts4096to8191octets_get(ppcnt_pl); + rmon->hist[9] = + mlxsw_reg_ppcnt_ether_stats_pkts8192to10239octets_get(ppcnt_pl); + + *ranges = mlxsw_rmon_ranges; +} + const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .cap_link_lanes_supported = true, .get_drvinfo = mlxsw_sp_port_get_drvinfo, @@ -1075,6 +1200,10 @@ const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_module_info = mlxsw_sp_get_module_info, .get_module_eeprom = mlxsw_sp_get_module_eeprom, .get_ts_info = mlxsw_sp_get_ts_info, + .get_eth_phy_stats = mlxsw_sp_get_eth_phy_stats, + .get_eth_mac_stats = mlxsw_sp_get_eth_mac_stats, + .get_eth_ctrl_stats = mlxsw_sp_get_eth_ctrl_stats, + .get_rmon_stats = mlxsw_sp_get_rmon_stats, }; struct mlxsw_sp1_port_link_mode { @@ -1230,16 +1359,22 @@ mlxsw_sp1_from_ptys_link_mode(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd) { + struct mlxsw_sp1_port_link_mode link; int i; - cmd->link_mode = -1; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->lanes = 0; if (!carrier_ok) return; for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) - cmd->link_mode = mlxsw_sp1_port_link_mode[i].mask_ethtool; + if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) { + link = mlxsw_sp1_port_link_mode[i]; + ethtool_params_from_link_mode(cmd, + link.mask_ethtool); + } } } @@ -1672,7 +1807,9 @@ mlxsw_sp2_from_ptys_link_mode(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, struct mlxsw_sp2_port_link_mode link; int i; - cmd->link_mode = -1; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->lanes = 0; if (!carrier_ok) return; @@ -1680,7 +1817,8 @@ mlxsw_sp2_from_ptys_link_mode(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) { link = mlxsw_sp2_port_link_mode[i]; - cmd->link_mode = link.mask_ethtool[1]; + ethtool_params_from_link_mode(cmd, + link.mask_ethtool[1]); } } } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c index 0456cda33808..9e50c823a354 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c @@ -71,7 +71,7 @@ static int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp, return -EOPNOTSUPP; } - err = mlxsw_sp_mall_port_bind(block, mlxsw_sp_port); + err = mlxsw_sp_mall_port_bind(block, mlxsw_sp_port, extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 41855e58564b..be3791ca6069 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -24,6 +24,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, const struct flow_action_entry *act; int mirror_act_count = 0; int police_act_count = 0; + int sample_act_count = 0; int err, i; if (!flow_action_has_entries(flow_action)) @@ -190,6 +191,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return -EOPNOTSUPP; } + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + /* The kernel might adjust the requested burst size so * that it is not exactly a power of two. Re-adjust it * here since the hardware only supports burst sizes @@ -204,6 +210,23 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return err; break; } + case FLOW_ACTION_SAMPLE: { + if (sample_act_count++) { + NL_SET_ERR_MSG_MOD(extack, "Multiple sample actions per rule are not supported"); + return -EOPNOTSUPP; + } + + err = mlxsw_sp_acl_rulei_act_sample(mlxsw_sp, rulei, + block, + act->sample.psample_group, + act->sample.rate, + act->sample.trunc_size, + act->sample.truncate, + extack); + if (err) + return err; + break; + } default: NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 6ccca39bae84..5facabd86882 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -127,14 +127,16 @@ bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr) static int mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_ipip_entry *ipip_entry) + struct mlxsw_sp_ipip_entry *ipip_entry, + bool force, char *ratr_pl) { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev); - char ratr_pl[MLXSW_REG_RATR_LEN]; + enum mlxsw_reg_ratr_op op; - mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, - true, MLXSW_REG_RATR_TYPE_IPIP, + op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; + mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_IPIP, adj_index, rif_index); mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4)); @@ -335,12 +337,11 @@ static int mlxsw_sp_ipip_ecn_decap_init_one(struct mlxsw_sp *mlxsw_sp, u8 inner_ecn, u8 outer_ecn) { char tidem_pl[MLXSW_REG_TIDEM_LEN]; - bool trap_en, set_ce = false; u8 new_inner_ecn; + bool trap_en; - trap_en = __INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); - new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn; - + new_inner_ecn = mlxsw_sp_tunnel_ecn_decap(outer_ecn, inner_ecn, + &trap_en); mlxsw_reg_tidem_pack(tidem_pl, outer_ecn, inner_ecn, new_inner_ecn, trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tidem), tidem_pl); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 87bef9880e5e..f0837b42d1d6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -40,7 +40,8 @@ struct mlxsw_sp_ipip_ops { enum mlxsw_sp_l3proto ul_proto; /* Underlay. */ int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_ipip_entry *ipip_entry); + struct mlxsw_sp_ipip_entry *ipip_entry, + bool force, char *ratr_pl); bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp, const struct net_device *ol_dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c index f30599ad6019..07b371cd9818 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c @@ -24,7 +24,8 @@ mlxsw_sp_mall_entry_find(struct mlxsw_sp_flow_block *block, unsigned long cookie static int mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_mall_entry *mall_entry) + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_span_agent_parms agent_parms = {}; @@ -33,28 +34,35 @@ mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port, int err; if (!mall_entry->mirror.to_dev) { - netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n"); + NL_SET_ERR_MSG(extack, "Could not find requested device"); return -EINVAL; } agent_parms.to_dev = mall_entry->mirror.to_dev; err = mlxsw_sp_span_agent_get(mlxsw_sp, &mall_entry->mirror.span_id, &agent_parms); - if (err) + if (err) { + NL_SET_ERR_MSG(extack, "Failed to get SPAN agent"); return err; + } err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, mall_entry->ingress); - if (err) + if (err) { + NL_SET_ERR_MSG(extack, "Failed to get analyzed port"); goto err_analyzed_port_get; + } trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS : MLXSW_SP_SPAN_TRIGGER_EGRESS; parms.span_id = mall_entry->mirror.span_id; + parms.probability_rate = 1; err = mlxsw_sp_span_agent_bind(mlxsw_sp, trigger, mlxsw_sp_port, &parms); - if (err) + if (err) { + NL_SET_ERR_MSG(extack, "Failed to bind SPAN agent"); goto err_agent_bind; + } return 0; @@ -93,46 +101,64 @@ static int mlxsw_sp_mall_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_mall_entry *mall_entry) + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_sample_trigger trigger; int err; - if (rtnl_dereference(mlxsw_sp_port->sample)) { - netdev_err(mlxsw_sp_port->dev, "sample already active\n"); - return -EEXIST; - } - rcu_assign_pointer(mlxsw_sp_port->sample, &mall_entry->sample); + if (mall_entry->ingress) + trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS; + else + trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS; + trigger.local_port = mlxsw_sp_port->local_port; + err = mlxsw_sp_sample_trigger_params_set(mlxsw_sp, &trigger, + &mall_entry->sample.params, + extack); + if (err) + return err; - err = mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, true, - mall_entry->sample.rate); + err = mlxsw_sp->mall_ops->sample_add(mlxsw_sp, mlxsw_sp_port, + mall_entry, extack); if (err) goto err_port_sample_set; return 0; err_port_sample_set: - RCU_INIT_POINTER(mlxsw_sp_port->sample, NULL); + mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger); return err; } static void -mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port) +mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry) { - if (!mlxsw_sp_port->sample) - return; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_sample_trigger trigger; - mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, false, 1); - RCU_INIT_POINTER(mlxsw_sp_port->sample, NULL); + if (mall_entry->ingress) + trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS; + else + trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS; + trigger.local_port = mlxsw_sp_port->local_port; + + mlxsw_sp->mall_ops->sample_del(mlxsw_sp, mlxsw_sp_port, mall_entry); + mlxsw_sp_sample_trigger_params_unset(mlxsw_sp, &trigger); } static int mlxsw_sp_mall_port_rule_add(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_mall_entry *mall_entry) + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack) { switch (mall_entry->type) { case MLXSW_SP_MALL_ACTION_TYPE_MIRROR: - return mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mall_entry); + return mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mall_entry, + extack); case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE: - return mlxsw_sp_mall_port_sample_add(mlxsw_sp_port, mall_entry); + return mlxsw_sp_mall_port_sample_add(mlxsw_sp_port, mall_entry, + extack); default: WARN_ON(1); return -EINVAL; @@ -148,7 +174,7 @@ mlxsw_sp_mall_port_rule_del(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_mall_port_mirror_del(mlxsw_sp_port, mall_entry); break; case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE: - mlxsw_sp_mall_port_sample_del(mlxsw_sp_port); + mlxsw_sp_mall_port_sample_del(mlxsw_sp_port, mall_entry); break; default: WARN_ON(1); @@ -212,6 +238,11 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp, flower_prio_valid = true; } + if (protocol != htons(ETH_P_ALL)) { + NL_SET_ERR_MSG(f->common.extack, "matchall rules only supported with 'all' protocol"); + return -EOPNOTSUPP; + } + mall_entry = kzalloc(sizeof(*mall_entry), GFP_KERNEL); if (!mall_entry) return -ENOMEM; @@ -219,54 +250,41 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp, mall_entry->priority = f->common.prio; mall_entry->ingress = mlxsw_sp_flow_block_is_ingress_bound(block); + if (flower_prio_valid && mall_entry->ingress && + mall_entry->priority >= flower_min_prio) { + NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules"); + err = -EOPNOTSUPP; + goto errout; + } + if (flower_prio_valid && !mall_entry->ingress && + mall_entry->priority <= flower_max_prio) { + NL_SET_ERR_MSG(f->common.extack, "Failed to add in front of existing flower rules"); + err = -EOPNOTSUPP; + goto errout; + } + act = &f->rule->action.entries[0]; - if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) { - if (flower_prio_valid && mall_entry->ingress && - mall_entry->priority >= flower_min_prio) { - NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules"); - err = -EOPNOTSUPP; - goto errout; - } - if (flower_prio_valid && !mall_entry->ingress && - mall_entry->priority <= flower_max_prio) { - NL_SET_ERR_MSG(f->common.extack, "Failed to add in front of existing flower rules"); - err = -EOPNOTSUPP; - goto errout; - } + switch (act->id) { + case FLOW_ACTION_MIRRED: mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_MIRROR; mall_entry->mirror.to_dev = act->dev; - } else if (act->id == FLOW_ACTION_SAMPLE && - protocol == htons(ETH_P_ALL)) { - if (!mall_entry->ingress) { - NL_SET_ERR_MSG(f->common.extack, "Sample is not supported on egress"); - err = -EOPNOTSUPP; - goto errout; - } - if (flower_prio_valid && - mall_entry->priority >= flower_min_prio) { - NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules"); - err = -EOPNOTSUPP; - goto errout; - } - if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) { - NL_SET_ERR_MSG(f->common.extack, "Sample rate not supported"); - err = -EOPNOTSUPP; - goto errout; - } + break; + case FLOW_ACTION_SAMPLE: mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_SAMPLE; - mall_entry->sample.psample_group = act->sample.psample_group; - mall_entry->sample.truncate = act->sample.truncate; - mall_entry->sample.trunc_size = act->sample.trunc_size; - mall_entry->sample.rate = act->sample.rate; - } else { + mall_entry->sample.params.psample_group = act->sample.psample_group; + mall_entry->sample.params.truncate = act->sample.truncate; + mall_entry->sample.params.trunc_size = act->sample.trunc_size; + mall_entry->sample.params.rate = act->sample.rate; + break; + default: err = -EOPNOTSUPP; goto errout; } list_for_each_entry(binding, &block->binding_list, list) { err = mlxsw_sp_mall_port_rule_add(binding->mlxsw_sp_port, - mall_entry); + mall_entry, f->common.extack); if (err) goto rollback; } @@ -314,13 +332,15 @@ void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block, } int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block, - struct mlxsw_sp_port *mlxsw_sp_port) + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) { struct mlxsw_sp_mall_entry *mall_entry; int err; list_for_each_entry(mall_entry, &block->mall.list, list) { - err = mlxsw_sp_mall_port_rule_add(mlxsw_sp_port, mall_entry); + err = mlxsw_sp_mall_port_rule_add(mlxsw_sp_port, mall_entry, + extack); if (err) goto rollback; } @@ -355,3 +375,104 @@ int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index, *p_max_prio = block->mall.max_prio; return 0; } + +static int mlxsw_sp1_mall_sample_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack) +{ + u32 rate = mall_entry->sample.params.rate; + + if (!mall_entry->ingress) { + NL_SET_ERR_MSG(extack, "Sampling is not supported on egress"); + return -EOPNOTSUPP; + } + + if (rate > MLXSW_REG_MPSC_RATE_MAX) { + NL_SET_ERR_MSG(extack, "Unsupported sampling rate"); + return -EOPNOTSUPP; + } + + return mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, true, rate); +} + +static void mlxsw_sp1_mall_sample_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry) +{ + mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, false, 1); +} + +const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops = { + .sample_add = mlxsw_sp1_mall_sample_add, + .sample_del = mlxsw_sp1_mall_sample_del, +}; + +static int mlxsw_sp2_mall_sample_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_span_trigger_parms trigger_parms = {}; + struct mlxsw_sp_span_agent_parms agent_parms = { + .to_dev = NULL, /* Mirror to CPU. */ + .session_id = MLXSW_SP_SPAN_SESSION_ID_SAMPLING, + }; + u32 rate = mall_entry->sample.params.rate; + enum mlxsw_sp_span_trigger span_trigger; + int err; + + err = mlxsw_sp_span_agent_get(mlxsw_sp, &mall_entry->sample.span_id, + &agent_parms); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to get SPAN agent"); + return err; + } + + err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, + mall_entry->ingress); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to get analyzed port"); + goto err_analyzed_port_get; + } + + span_trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS : + MLXSW_SP_SPAN_TRIGGER_EGRESS; + trigger_parms.span_id = mall_entry->sample.span_id; + trigger_parms.probability_rate = rate; + err = mlxsw_sp_span_agent_bind(mlxsw_sp, span_trigger, mlxsw_sp_port, + &trigger_parms); + if (err) { + NL_SET_ERR_MSG(extack, "Failed to bind SPAN agent"); + goto err_agent_bind; + } + + return 0; + +err_agent_bind: + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, mall_entry->ingress); +err_analyzed_port_get: + mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->sample.span_id); + return err; +} + +static void mlxsw_sp2_mall_sample_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mall_entry *mall_entry) +{ + struct mlxsw_sp_span_trigger_parms trigger_parms = {}; + enum mlxsw_sp_span_trigger span_trigger; + + span_trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS : + MLXSW_SP_SPAN_TRIGGER_EGRESS; + trigger_parms.span_id = mall_entry->sample.span_id; + mlxsw_sp_span_agent_unbind(mlxsw_sp, span_trigger, mlxsw_sp_port, + &trigger_parms); + mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, mall_entry->ingress); + mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->sample.span_id); +} + +const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops = { + .sample_add = mlxsw_sp2_mall_sample_add, + .sample_del = mlxsw_sp2_mall_sample_del, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c index e5ec595593f4..9eba8fa684ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -909,12 +909,11 @@ static int __mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp, u8 inner_ecn, u8 outer_ecn) { char tndem_pl[MLXSW_REG_TNDEM_LEN]; - bool trap_en, set_ce = false; u8 new_inner_ecn; + bool trap_en; - trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); - new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn; - + new_inner_ecn = mlxsw_sp_tunnel_ecn_decap(outer_ecn, inner_ecn, + &trap_en); mlxsw_reg_tndem_pack(tndem_pl, outer_ecn, inner_ecn, new_inner_ecn, trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tndem), tndem_pl); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h index 2796d3659979..d8104fc6c900 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h @@ -18,7 +18,6 @@ struct mlxsw_sp_nve_config { u32 ul_tb_id; enum mlxsw_sp_l3proto ul_proto; union mlxsw_sp_l3addr ul_sip; - u16 ethertype; }; struct mlxsw_sp_nve { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c index 3e2bb22e9ca6..b84bb4b65098 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c @@ -113,7 +113,6 @@ static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve, config->ul_proto = MLXSW_SP_L3_PROTO_IPV4; config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr; config->udp_dport = cfg->dst_port; - config->ethertype = params->ethertype; } static int __mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp, @@ -318,20 +317,14 @@ static bool mlxsw_sp2_nve_vxlan_learning_set(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp2_nve_decap_ethertype_set(struct mlxsw_sp *mlxsw_sp, u16 ethertype) +mlxsw_sp2_nve_decap_ethertype_set(struct mlxsw_sp *mlxsw_sp) { char spvid_pl[MLXSW_REG_SPVID_LEN] = {}; - u8 sver_type; - int err; mlxsw_reg_spvid_tport_set(spvid_pl, true); mlxsw_reg_spvid_local_port_set(spvid_pl, MLXSW_REG_TUNNEL_PORT_NVE); - err = mlxsw_sp_ethtype_to_sver_type(ethertype, &sver_type); - if (err) - return err; - - mlxsw_reg_spvid_et_vlan_set(spvid_pl, sver_type); + mlxsw_reg_spvid_egr_et_set_set(spvid_pl, true); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvid), spvid_pl); } @@ -367,7 +360,7 @@ mlxsw_sp2_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, if (err) goto err_spvtr_write; - err = mlxsw_sp2_nve_decap_ethertype_set(mlxsw_sp, config->ethertype); + err = mlxsw_sp2_nve_decap_ethertype_set(mlxsw_sp); if (err) goto err_decap_ethertype_set; @@ -392,8 +385,6 @@ static void mlxsw_sp2_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp) char spvtr_pl[MLXSW_REG_SPVTR_LEN]; char tngcr_pl[MLXSW_REG_TNGCR_LEN]; - /* Set default EtherType */ - mlxsw_sp2_nve_decap_ethertype_set(mlxsw_sp, ETH_P_8021Q); mlxsw_reg_spvtr_pack(spvtr_pl, true, MLXSW_REG_TUNNEL_PORT_NVE, MLXSW_REG_SPVTR_IPVID_MODE_IEEE_COMPLIANT_PVID); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvtr), spvtr_pl); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index fd672c6c9133..04672eb5c7f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -29,7 +29,6 @@ struct mlxsw_sp_qdisc; struct mlxsw_sp_qdisc_ops { enum mlxsw_sp_qdisc_type type; int (*check_params)(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); int (*replace)(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); @@ -48,11 +47,14 @@ struct mlxsw_sp_qdisc_ops { */ void (*unoffload)(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); + struct mlxsw_sp_qdisc *(*find_class)(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u32 parent); + unsigned int num_classes; }; struct mlxsw_sp_qdisc { u32 handle; - u8 tclass_num; + int tclass_num; u8 prio_bitmap; union { struct red_stats red; @@ -66,11 +68,13 @@ struct mlxsw_sp_qdisc { } stats_base; struct mlxsw_sp_qdisc_ops *ops; + struct mlxsw_sp_qdisc *parent; + struct mlxsw_sp_qdisc *qdiscs; + unsigned int num_classes; }; struct mlxsw_sp_qdisc_state { struct mlxsw_sp_qdisc root_qdisc; - struct mlxsw_sp_qdisc tclass_qdiscs[IEEE_8021QAZ_MAX_TCS]; /* When a PRIO or ETS are added, the invisible FIFOs in their bands are * created first. When notifications for these FIFOs arrive, it is not @@ -85,15 +89,55 @@ struct mlxsw_sp_qdisc_state { */ u32 future_handle; bool future_fifos[IEEE_8021QAZ_MAX_TCS]; + struct mutex lock; /* Protects qdisc state. */ }; static bool -mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle, - enum mlxsw_sp_qdisc_type type) +mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle) +{ + return mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->handle == handle; +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_walk(struct mlxsw_sp_qdisc *qdisc, + struct mlxsw_sp_qdisc *(*pre)(struct mlxsw_sp_qdisc *, + void *), + void *data) +{ + struct mlxsw_sp_qdisc *tmp; + unsigned int i; + + if (pre) { + tmp = pre(qdisc, data); + if (tmp) + return tmp; + } + + if (qdisc->ops) { + for (i = 0; i < qdisc->num_classes; i++) { + tmp = &qdisc->qdiscs[i]; + if (qdisc->ops) { + tmp = mlxsw_sp_qdisc_walk(tmp, pre, data); + if (tmp) + return tmp; + } + } + } + + return NULL; +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_walk_cb_find(struct mlxsw_sp_qdisc *qdisc, void *data) { - return mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops && - mlxsw_sp_qdisc->ops->type == type && - mlxsw_sp_qdisc->handle == handle; + u32 parent = *(u32 *)data; + + if (qdisc->ops && TC_H_MAJ(qdisc->handle) == TC_H_MAJ(parent)) { + if (qdisc->ops->find_class) + return qdisc->ops->find_class(qdisc, parent); + } + + return NULL; } static struct mlxsw_sp_qdisc * @@ -101,39 +145,46 @@ mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent, bool root_only) { struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; - int tclass, child_index; + if (!qdisc_state) + return NULL; if (parent == TC_H_ROOT) return &qdisc_state->root_qdisc; - - if (root_only || !qdisc_state || - !qdisc_state->root_qdisc.ops || - TC_H_MAJ(parent) != qdisc_state->root_qdisc.handle || - TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS) + if (root_only) return NULL; + return mlxsw_sp_qdisc_walk(&qdisc_state->root_qdisc, + mlxsw_sp_qdisc_walk_cb_find, &parent); +} - child_index = TC_H_MIN(parent); - tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index); - return &qdisc_state->tclass_qdiscs[tclass]; +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_walk_cb_find_by_handle(struct mlxsw_sp_qdisc *qdisc, void *data) +{ + u32 handle = *(u32 *)data; + + if (qdisc->ops && qdisc->handle == handle) + return qdisc; + return NULL; } static struct mlxsw_sp_qdisc * mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle) { struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; - int i; - - if (qdisc_state->root_qdisc.handle == handle) - return &qdisc_state->root_qdisc; - if (qdisc_state->root_qdisc.handle == TC_H_UNSPEC) + if (!qdisc_state) return NULL; + return mlxsw_sp_qdisc_walk(&qdisc_state->root_qdisc, + mlxsw_sp_qdisc_walk_cb_find_by_handle, + &handle); +} - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - if (qdisc_state->tclass_qdiscs[i].handle == handle) - return &qdisc_state->tclass_qdiscs[i]; +static void +mlxsw_sp_qdisc_reduce_parent_backlog(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc *tmp; - return NULL; + for (tmp = mlxsw_sp_qdisc->parent; tmp; tmp = tmp->parent) + tmp->stats_base.backlog -= mlxsw_sp_qdisc->stats_base.backlog; } static int @@ -157,32 +208,48 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, err_hdroom = mlxsw_sp_hdroom_configure(mlxsw_sp_port, &hdroom); } - if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->destroy) + if (!mlxsw_sp_qdisc->ops) + return 0; + + mlxsw_sp_qdisc_reduce_parent_backlog(mlxsw_sp_qdisc); + if (mlxsw_sp_qdisc->ops->destroy) err = mlxsw_sp_qdisc->ops->destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + if (mlxsw_sp_qdisc->ops->clean_stats) + mlxsw_sp_qdisc->ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc); mlxsw_sp_qdisc->handle = TC_H_UNSPEC; mlxsw_sp_qdisc->ops = NULL; - + mlxsw_sp_qdisc->num_classes = 0; + kfree(mlxsw_sp_qdisc->qdiscs); + mlxsw_sp_qdisc->qdiscs = NULL; return err_hdroom ?: err; } -static int -mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - struct mlxsw_sp_qdisc_ops *ops, void *params) +static int mlxsw_sp_qdisc_create(struct mlxsw_sp_port *mlxsw_sp_port, + u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_ops *ops, void *params) { struct mlxsw_sp_qdisc *root_qdisc = &mlxsw_sp_port->qdisc->root_qdisc; struct mlxsw_sp_hdroom orig_hdroom; + unsigned int i; int err; - if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->type != ops->type) - /* In case this location contained a different qdisc of the - * same type we can override the old qdisc configuration. - * Otherwise, we need to remove the old qdisc before setting the - * new one. - */ - mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + err = ops->check_params(mlxsw_sp_port, params); + if (err) + return err; + + if (ops->num_classes) { + mlxsw_sp_qdisc->qdiscs = kcalloc(ops->num_classes, + sizeof(*mlxsw_sp_qdisc->qdiscs), + GFP_KERNEL); + if (!mlxsw_sp_qdisc->qdiscs) + return -ENOMEM; + + for (i = 0; i < ops->num_classes; i++) + mlxsw_sp_qdisc->qdiscs[i].parent = mlxsw_sp_qdisc; + } orig_hdroom = *mlxsw_sp_port->hdroom; if (root_qdisc == mlxsw_sp_qdisc) { @@ -198,20 +265,46 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, goto err_hdroom_configure; } - err = ops->check_params(mlxsw_sp_port, mlxsw_sp_qdisc, params); + mlxsw_sp_qdisc->num_classes = ops->num_classes; + mlxsw_sp_qdisc->ops = ops; + mlxsw_sp_qdisc->handle = handle; + err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params); if (err) - goto err_bad_param; + goto err_replace; + + return 0; + +err_replace: + mlxsw_sp_qdisc->handle = TC_H_UNSPEC; + mlxsw_sp_qdisc->ops = NULL; + mlxsw_sp_qdisc->num_classes = 0; + mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom); +err_hdroom_configure: + kfree(mlxsw_sp_qdisc->qdiscs); + mlxsw_sp_qdisc->qdiscs = NULL; + return err; +} + +static int +mlxsw_sp_qdisc_change(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) +{ + struct mlxsw_sp_qdisc_ops *ops = mlxsw_sp_qdisc->ops; + int err; + + err = ops->check_params(mlxsw_sp_port, params); + if (err) + goto unoffload; err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params); if (err) - goto err_config; + goto unoffload; /* Check if the Qdisc changed. That includes a situation where an * invisible Qdisc replaces another one, or is being added for the * first time. */ - if (mlxsw_sp_qdisc->handle != handle || handle == TC_H_UNSPEC) { - mlxsw_sp_qdisc->ops = ops; + if (mlxsw_sp_qdisc->handle != handle) { if (ops->clean_stats) ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc); } @@ -219,11 +312,8 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, mlxsw_sp_qdisc->handle = handle; return 0; -err_bad_param: -err_config: - mlxsw_sp_hdroom_configure(mlxsw_sp_port, &orig_hdroom); -err_hdroom_configure: - if (mlxsw_sp_qdisc->handle == handle && ops->unoffload) +unoffload: + if (ops->unoffload) ops->unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, params); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); @@ -231,6 +321,27 @@ err_hdroom_configure: } static int +mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_ops *ops, void *params) +{ + if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->type != ops->type) + /* In case this location contained a different qdisc of the + * same type we can override the old qdisc configuration. + * Otherwise, we need to remove the old qdisc before setting the + * new one. + */ + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + + if (!mlxsw_sp_qdisc->ops) + return mlxsw_sp_qdisc_create(mlxsw_sp_port, handle, + mlxsw_sp_qdisc, ops, params); + else + return mlxsw_sp_qdisc_change(mlxsw_sp_port, handle, + mlxsw_sp_qdisc, params); +} + +static int mlxsw_sp_qdisc_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) @@ -295,7 +406,7 @@ mlxsw_sp_qdisc_collect_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port, u64 *p_tx_bytes, u64 *p_tx_packets, u64 *p_drops, u64 *p_backlog) { - u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_port_xstats *xstats; u64 tx_bytes, tx_packets; @@ -395,7 +506,7 @@ static void mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; struct red_stats *red_base; @@ -421,20 +532,12 @@ static int mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; - struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc; - - if (root_qdisc != mlxsw_sp_qdisc) - root_qdisc->stats_base.backlog -= - mlxsw_sp_qdisc->stats_base.backlog; - return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, mlxsw_sp_qdisc->tclass_num); } static int mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -467,7 +570,7 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct tc_red_qopt_offload_params *p = params; - u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + int tclass_num = mlxsw_sp_qdisc->tclass_num; u32 min, max; u64 prob; @@ -512,7 +615,7 @@ mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, void *xstats_ptr) { struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base.red; - u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_port_xstats *xstats; struct red_stats *res = xstats_ptr; int early_drops, pdrops; @@ -536,7 +639,7 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) { - u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + int tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; u64 overlimits; @@ -553,6 +656,13 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_leaf_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u32 parent) +{ + return NULL; +} + #define MLXSW_SP_PORT_DEFAULT_TCLASS 0 static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = { @@ -564,10 +674,11 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = { .get_stats = mlxsw_sp_qdisc_get_red_stats, .get_xstats = mlxsw_sp_qdisc_get_red_xstats, .clean_stats = mlxsw_sp_setup_tc_qdisc_red_clean_stats, + .find_class = mlxsw_sp_qdisc_leaf_find_class, }; -int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_red_qopt_offload *p) +static int __mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; @@ -581,8 +692,7 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, &mlxsw_sp_qdisc_ops_red, &p->set); - if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, - MLXSW_SP_QDISC_RED)) + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) return -EOPNOTSUPP; switch (p->command) { @@ -599,6 +709,18 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, } } +int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_red(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + static void mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) @@ -622,13 +744,6 @@ static int mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; - struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc; - - if (root_qdisc != mlxsw_sp_qdisc) - root_qdisc->stats_base.backlog -= - mlxsw_sp_qdisc->stats_base.backlog; - return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, mlxsw_sp_qdisc->tclass_num, 0, @@ -678,7 +793,6 @@ mlxsw_sp_qdisc_tbf_rate_kbps(struct tc_tbf_qopt_offload_replace_params *p) static int mlxsw_sp_qdisc_tbf_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_tbf_qopt_offload_replace_params *p = params; @@ -766,10 +880,11 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_tbf = { .destroy = mlxsw_sp_qdisc_tbf_destroy, .get_stats = mlxsw_sp_qdisc_get_tbf_stats, .clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats, + .find_class = mlxsw_sp_qdisc_leaf_find_class, }; -int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_tbf_qopt_offload *p) +static int __mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_tbf_qopt_offload *p) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; @@ -783,8 +898,7 @@ int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, &mlxsw_sp_qdisc_ops_tbf, &p->replace_params); - if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, - MLXSW_SP_QDISC_TBF)) + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) return -EOPNOTSUPP; switch (p->command) { @@ -798,22 +912,20 @@ int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, } } -static int -mlxsw_sp_qdisc_fifo_destroy(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_tbf_qopt_offload *p) { - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; - struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc; + int err; - if (root_qdisc != mlxsw_sp_qdisc) - root_qdisc->stats_base.backlog -= - mlxsw_sp_qdisc->stats_base.backlog; - return 0; + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_tbf(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; } static int mlxsw_sp_qdisc_fifo_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { return 0; @@ -841,25 +953,18 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_fifo = { .type = MLXSW_SP_QDISC_FIFO, .check_params = mlxsw_sp_qdisc_fifo_check_params, .replace = mlxsw_sp_qdisc_fifo_replace, - .destroy = mlxsw_sp_qdisc_fifo_destroy, .get_stats = mlxsw_sp_qdisc_get_fifo_stats, .clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats, }; -int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_fifo_qopt_offload *p) +static int __mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_fifo_qopt_offload *p) { struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - int tclass, child_index; + unsigned int band; u32 parent_handle; - /* Invisible FIFOs are tracked in future_handle and future_fifos. Make - * sure that not more than one qdisc is created for a port at a time. - * RTNL is a simple proxy for that. - */ - ASSERT_RTNL(); - mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false); if (!mlxsw_sp_qdisc && p->handle == TC_H_UNSPEC) { parent_handle = TC_H_MAJ(p->parent); @@ -872,13 +977,12 @@ int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, qdisc_state->future_handle = parent_handle; } - child_index = TC_H_MIN(p->parent); - tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index); - if (tclass < IEEE_8021QAZ_MAX_TCS) { + band = TC_H_MIN(p->parent) - 1; + if (band < IEEE_8021QAZ_MAX_TCS) { if (p->command == TC_FIFO_REPLACE) - qdisc_state->future_fifos[tclass] = true; + qdisc_state->future_fifos[band] = true; else if (p->command == TC_FIFO_DESTROY) - qdisc_state->future_fifos[tclass] = false; + qdisc_state->future_fifos[band] = false; } } if (!mlxsw_sp_qdisc) @@ -890,16 +994,12 @@ int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, &mlxsw_sp_qdisc_ops_fifo, NULL); } - if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, - MLXSW_SP_QDISC_FIFO)) + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) return -EOPNOTSUPP; switch (p->command) { case TC_FIFO_DESTROY: - if (p->handle == mlxsw_sp_qdisc->handle) - return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, - mlxsw_sp_qdisc); - return 0; + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); case TC_FIFO_STATS: return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, &p->stats); @@ -910,21 +1010,32 @@ int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } -static int -__mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port) +int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_fifo_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_fifo(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + +static int __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; int i; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) { mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, MLXSW_SP_PORT_DEFAULT_TCLASS); mlxsw_sp_port_ets_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, false, 0); mlxsw_sp_qdisc_destroy(mlxsw_sp_port, - &qdisc_state->tclass_qdiscs[i]); - qdisc_state->tclass_qdiscs[i].prio_bitmap = 0; + &mlxsw_sp_qdisc->qdiscs[i]); + mlxsw_sp_qdisc->qdiscs[i].prio_bitmap = 0; } return 0; @@ -934,7 +1045,7 @@ static int mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port); + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); } static int @@ -948,7 +1059,6 @@ __mlxsw_sp_qdisc_ets_check_params(unsigned int nbands) static int mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_prio_qopt_offload_params *p = params; @@ -957,8 +1067,9 @@ mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, } static int -__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, - unsigned int nbands, +__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u32 handle, unsigned int nbands, const unsigned int *quanta, const unsigned int *weights, const u8 *priomap) @@ -971,7 +1082,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, for (band = 0; band < nbands; band++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); - child_qdisc = &qdisc_state->tclass_qdiscs[tclass]; + child_qdisc = &mlxsw_sp_qdisc->qdiscs[band]; old_priomap = child_qdisc->prio_bitmap; child_qdisc->prio_bitmap = 0; @@ -993,6 +1104,9 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, return err; } } + + child_qdisc->tclass_num = tclass; + if (old_priomap != child_qdisc->prio_bitmap && child_qdisc->ops && child_qdisc->ops->clean_stats) { backlog = child_qdisc->stats_base.backlog; @@ -1002,7 +1116,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, } if (handle == qdisc_state->future_handle && - qdisc_state->future_fifos[tclass]) { + qdisc_state->future_fifos[band]) { err = mlxsw_sp_qdisc_replace(mlxsw_sp_port, TC_H_UNSPEC, child_qdisc, &mlxsw_sp_qdisc_ops_fifo, @@ -1013,7 +1127,7 @@ __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, } for (; band < IEEE_8021QAZ_MAX_TCS; band++) { tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); - child_qdisc = &qdisc_state->tclass_qdiscs[tclass]; + child_qdisc = &mlxsw_sp_qdisc->qdiscs[band]; child_qdisc->prio_bitmap = 0; mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); mlxsw_sp_port_ets_set(mlxsw_sp_port, @@ -1034,8 +1148,9 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, struct tc_prio_qopt_offload_params *p = params; unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0}; - return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, handle, p->bands, - zeroes, zeroes, p->priomap); + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, mlxsw_sp_qdisc, + handle, p->bands, zeroes, + zeroes, p->priomap); } static void @@ -1066,7 +1181,6 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) { - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; struct mlxsw_sp_qdisc *tc_qdisc; u64 tx_packets = 0; u64 tx_bytes = 0; @@ -1074,8 +1188,8 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, u64 drops = 0; int i; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - tc_qdisc = &qdisc_state->tclass_qdiscs[i]; + for (i = 0; i < mlxsw_sp_qdisc->num_classes; i++) { + tc_qdisc = &mlxsw_sp_qdisc->qdiscs[i]; mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, tc_qdisc, &tx_bytes, &tx_packets, &drops, &backlog); @@ -1112,6 +1226,18 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_qdisc->stats_base.backlog = 0; } +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_prio_find_class(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u32 parent) +{ + int child_index = TC_H_MIN(parent); + int band = child_index - 1; + + if (band < 0 || band >= mlxsw_sp_qdisc->num_classes) + return NULL; + return &mlxsw_sp_qdisc->qdiscs[band]; +} + static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .type = MLXSW_SP_QDISC_PRIO, .check_params = mlxsw_sp_qdisc_prio_check_params, @@ -1120,11 +1246,12 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .destroy = mlxsw_sp_qdisc_prio_destroy, .get_stats = mlxsw_sp_qdisc_get_prio_stats, .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, + .find_class = mlxsw_sp_qdisc_prio_find_class, + .num_classes = IEEE_8021QAZ_MAX_TCS, }; static int mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_ets_qopt_offload_replace_params *p = params; @@ -1139,8 +1266,9 @@ mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, { struct tc_ets_qopt_offload_replace_params *p = params; - return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, handle, p->bands, - p->quanta, p->weights, p->priomap); + return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, mlxsw_sp_qdisc, + handle, p->bands, p->quanta, + p->weights, p->priomap); } static void @@ -1158,7 +1286,7 @@ static int mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port); + return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); } static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = { @@ -1169,6 +1297,8 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = { .destroy = mlxsw_sp_qdisc_ets_destroy, .get_stats = mlxsw_sp_qdisc_get_prio_stats, .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, + .find_class = mlxsw_sp_qdisc_prio_find_class, + .num_classes = IEEE_8021QAZ_MAX_TCS, }; /* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting @@ -1201,12 +1331,10 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u8 band, u32 child_handle) { - struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc; - int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); struct mlxsw_sp_qdisc *old_qdisc; - if (band < IEEE_8021QAZ_MAX_TCS && - qdisc_state->tclass_qdiscs[tclass_num].handle == child_handle) + if (band < mlxsw_sp_qdisc->num_classes && + mlxsw_sp_qdisc->qdiscs[band].handle == child_handle) return 0; if (!child_handle) { @@ -1224,8 +1352,10 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port, if (old_qdisc) mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); - mlxsw_sp_qdisc_destroy(mlxsw_sp_port, - &qdisc_state->tclass_qdiscs[tclass_num]); + mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc, band); + if (!WARN_ON(!mlxsw_sp_qdisc)) + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + return -EOPNOTSUPP; } @@ -1238,8 +1368,8 @@ mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, p->band, p->child_handle); } -int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_prio_qopt_offload *p) +static int __mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_prio_qopt_offload *p) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; @@ -1253,8 +1383,7 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, &mlxsw_sp_qdisc_ops_prio, &p->replace_params); - if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, - MLXSW_SP_QDISC_PRIO)) + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) return -EOPNOTSUPP; switch (p->command) { @@ -1271,8 +1400,20 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, } } -int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_ets_qopt_offload *p) +int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_prio_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_prio(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + +static int __mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; @@ -1286,8 +1427,7 @@ int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, &mlxsw_sp_qdisc_ops_ets, &p->replace_params); - if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, - MLXSW_SP_QDISC_ETS)) + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle)) return -EOPNOTSUPP; switch (p->command) { @@ -1305,6 +1445,18 @@ int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, } } +int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_ets_qopt_offload *p) +{ + int err; + + mutex_lock(&mlxsw_sp_port->qdisc->lock); + err = __mlxsw_sp_setup_tc_ets(mlxsw_sp_port, p); + mutex_unlock(&mlxsw_sp_port->qdisc->lock); + + return err; +} + struct mlxsw_sp_qevent_block { struct list_head binding_list; struct list_head mall_entry_list; @@ -1341,6 +1493,7 @@ static int mlxsw_sp_qevent_span_configure(struct mlxsw_sp *mlxsw_sp, goto err_analyzed_port_get; trigger_parms.span_id = span_id; + trigger_parms.probability_rate = 1; err = mlxsw_sp_span_agent_bind(mlxsw_sp, qevent_binding->span_trigger, mlxsw_sp_port, &trigger_parms); if (err) @@ -1404,7 +1557,9 @@ static int mlxsw_sp_qevent_trap_configure(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mall_entry *mall_entry, struct mlxsw_sp_qevent_binding *qevent_binding) { - struct mlxsw_sp_span_agent_parms agent_parms = {}; + struct mlxsw_sp_span_agent_parms agent_parms = { + .session_id = MLXSW_SP_SPAN_SESSION_ID_BUFFER, + }; int err; err = mlxsw_sp_trap_group_policer_hw_id_get(mlxsw_sp, @@ -1831,22 +1986,20 @@ int mlxsw_sp_setup_tc_block_qevent_early_drop(struct mlxsw_sp_port *mlxsw_sp_por int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp_qdisc_state *qdisc_state; - int i; qdisc_state = kzalloc(sizeof(*qdisc_state), GFP_KERNEL); if (!qdisc_state) return -ENOMEM; + mutex_init(&qdisc_state->lock); qdisc_state->root_qdisc.prio_bitmap = 0xff; qdisc_state->root_qdisc.tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) - qdisc_state->tclass_qdiscs[i].tclass_num = i; - mlxsw_sp_port->qdisc = qdisc_state; return 0; } void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port) { + mutex_destroy(&mlxsw_sp_port->qdisc->lock); kfree(mlxsw_sp_port->qdisc); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index eda99d82766a..41259c0004d1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -113,6 +113,10 @@ struct mlxsw_sp_rif_ops { void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac); }; +struct mlxsw_sp_router_ops { + int (*init)(struct mlxsw_sp *mlxsw_sp); +}; + static struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); @@ -2662,6 +2666,10 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) goto out; } + if (neigh_entry->connected && entry_connected && + !memcmp(neigh_entry->ha, ha, ETH_ALEN)) + goto out; + memcpy(neigh_entry->ha, ha, ETH_ALEN); mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected); mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected, @@ -2842,6 +2850,15 @@ enum mlxsw_sp_nexthop_type { MLXSW_SP_NEXTHOP_TYPE_IPIP, }; +enum mlxsw_sp_nexthop_action { + /* Nexthop forwards packets to an egress RIF */ + MLXSW_SP_NEXTHOP_ACTION_FORWARD, + /* Nexthop discards packets */ + MLXSW_SP_NEXTHOP_ACTION_DISCARD, + /* Nexthop traps packets */ + MLXSW_SP_NEXTHOP_ACTION_TRAP, +}; + struct mlxsw_sp_nexthop_key { struct fib_nh *fib_nh; }; @@ -2862,16 +2879,16 @@ struct mlxsw_sp_nexthop { int norm_nh_weight; int num_adj_entries; struct mlxsw_sp_rif *rif; - u8 should_offload:1, /* set indicates this neigh is connected and - * should be put to KVD linear area of this group. + u8 should_offload:1, /* set indicates this nexthop should be written + * to the adjacency table. */ - offloaded:1, /* set in case the neigh is actually put into - * KVD linear area of this group. + offloaded:1, /* set indicates this nexthop was written to the + * adjacency table. */ - update:1, /* set indicates that MAC of this neigh should be - * updated in HW + update:1; /* set indicates this nexthop should be updated in the + * adjacency table (f.e., its MAC changed). */ - discard:1; /* nexthop is programmed to discard packets */ + enum mlxsw_sp_nexthop_action action; enum mlxsw_sp_nexthop_type type; union { struct mlxsw_sp_neigh_entry *neigh_entry; @@ -2894,7 +2911,9 @@ struct mlxsw_sp_nexthop_group_info { u16 count; int sum_norm_weight; u8 adj_index_valid:1, - gateway:1; /* routes using the group use a gateway */ + gateway:1, /* routes using the group use a gateway */ + is_resilient:1; + struct list_head list; /* member in nh_res_grp_list */ struct mlxsw_sp_nexthop nexthops[0]; #define nh_rif nexthops[0].rif }; @@ -2979,14 +2998,15 @@ struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, return list_next_entry(nh, router_list_node); } -bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh) +bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh) { - return nh->offloaded; + return nh->offloaded && nh->action == MLXSW_SP_NEXTHOP_ACTION_FORWARD; } unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh) { - if (!nh->offloaded) + if (nh->type != MLXSW_SP_NEXTHOP_TYPE_ETH || + !mlxsw_sp_nexthop_is_forward(nh)) return NULL; return nh->neigh_entry->ha; } @@ -3036,11 +3056,6 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) return false; } -bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh) -{ - return nh->discard; -} - static const struct rhashtable_params mlxsw_sp_nexthop_group_vr_ht_params = { .key_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, key), .head_offset = offsetof(struct mlxsw_sp_nexthop_group_vr_entry, ht_node), @@ -3403,20 +3418,38 @@ err_mass_update_vr: return err; } -static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) +static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, + struct mlxsw_sp_nexthop *nh, + bool force, char *ratr_pl) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; - char ratr_pl[MLXSW_REG_RATR_LEN]; + enum mlxsw_reg_ratr_op op; + u16 rif_index; - mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, - true, MLXSW_REG_RATR_TYPE_ETHERNET, - adj_index, nh->rif->rif_index); - if (nh->discard) + rif_index = nh->rif ? nh->rif->rif_index : + mlxsw_sp->router->lb_rif_index; + op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; + mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET, + adj_index, rif_index); + switch (nh->action) { + case MLXSW_SP_NEXTHOP_ACTION_FORWARD: + mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); + break; + case MLXSW_SP_NEXTHOP_ACTION_DISCARD: mlxsw_reg_ratr_trap_action_set(ratr_pl, MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS); - else - mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); + break; + case MLXSW_SP_NEXTHOP_ACTION_TRAP: + mlxsw_reg_ratr_trap_action_set(ratr_pl, + MLXSW_REG_RATR_TRAP_ACTION_TRAP); + mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0); + break; + default: + WARN_ON_ONCE(1); + return -EINVAL; + } if (nh->counter_valid) mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true); else @@ -3425,15 +3458,17 @@ static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); } -int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) +int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl) { int i; for (i = 0; i < nh->num_adj_entries; i++) { int err; - err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh); + err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i, + nh, force, ratr_pl); if (err) return err; } @@ -3443,17 +3478,20 @@ int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) + struct mlxsw_sp_nexthop *nh, + bool force, char *ratr_pl) { const struct mlxsw_sp_ipip_ops *ipip_ops; ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt]; - return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry); + return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry, + force, ratr_pl); } static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl) { int i; @@ -3461,7 +3499,7 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, int err; err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i, - nh); + nh, force, ratr_pl); if (err) return err; } @@ -3469,11 +3507,29 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl) +{ + /* When action is discard or trap, the nexthop must be + * programmed as an Ethernet nexthop. + */ + if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH || + nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD || + nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP) + return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh, + force, ratr_pl); + else + return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh, + force, ratr_pl); +} + static int mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group_info *nhgi, bool reallocate) { + char ratr_pl[MLXSW_REG_RATR_LEN]; u32 adj_index = nhgi->adj_index; /* base */ struct mlxsw_sp_nexthop *nh; int i; @@ -3489,16 +3545,8 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, if (nh->update || reallocate) { int err = 0; - switch (nh->type) { - case MLXSW_SP_NEXTHOP_TYPE_ETH: - err = mlxsw_sp_nexthop_update - (mlxsw_sp, adj_index, nh); - break; - case MLXSW_SP_NEXTHOP_TYPE_IPIP: - err = mlxsw_sp_nexthop_ipip_update - (mlxsw_sp, adj_index, nh); - break; - } + err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, + true, ratr_pl); if (err) return err; nh->update = 0; @@ -3524,34 +3572,69 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size) +struct mlxsw_sp_adj_grp_size_range { + u16 start; /* Inclusive */ + u16 end; /* Inclusive */ +}; + +/* Ordered by range start value */ +static const struct mlxsw_sp_adj_grp_size_range +mlxsw_sp1_adj_grp_size_ranges[] = { + { .start = 1, .end = 64 }, + { .start = 512, .end = 512 }, + { .start = 1024, .end = 1024 }, + { .start = 2048, .end = 2048 }, + { .start = 4096, .end = 4096 }, +}; + +/* Ordered by range start value */ +static const struct mlxsw_sp_adj_grp_size_range +mlxsw_sp2_adj_grp_size_ranges[] = { + { .start = 1, .end = 128 }, + { .start = 256, .end = 256 }, + { .start = 512, .end = 512 }, + { .start = 1024, .end = 1024 }, + { .start = 2048, .end = 2048 }, + { .start = 4096, .end = 4096 }, +}; + +static void mlxsw_sp_adj_grp_size_round_up(const struct mlxsw_sp *mlxsw_sp, + u16 *p_adj_grp_size) { - /* Valid sizes for an adjacency group are: - * 1-64, 512, 1024, 2048 and 4096. - */ - if (*p_adj_grp_size <= 64) - return; - else if (*p_adj_grp_size <= 512) - *p_adj_grp_size = 512; - else if (*p_adj_grp_size <= 1024) - *p_adj_grp_size = 1024; - else if (*p_adj_grp_size <= 2048) - *p_adj_grp_size = 2048; - else - *p_adj_grp_size = 4096; + int i; + + for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) { + const struct mlxsw_sp_adj_grp_size_range *size_range; + + size_range = &mlxsw_sp->router->adj_grp_size_ranges[i]; + + if (*p_adj_grp_size >= size_range->start && + *p_adj_grp_size <= size_range->end) + return; + + if (*p_adj_grp_size <= size_range->end) { + *p_adj_grp_size = size_range->end; + return; + } + } } -static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size, +static void mlxsw_sp_adj_grp_size_round_down(const struct mlxsw_sp *mlxsw_sp, + u16 *p_adj_grp_size, unsigned int alloc_size) { - if (alloc_size >= 4096) - *p_adj_grp_size = 4096; - else if (alloc_size >= 2048) - *p_adj_grp_size = 2048; - else if (alloc_size >= 1024) - *p_adj_grp_size = 1024; - else if (alloc_size >= 512) - *p_adj_grp_size = 512; + int i; + + for (i = mlxsw_sp->router->adj_grp_size_ranges_count - 1; i >= 0; i--) { + const struct mlxsw_sp_adj_grp_size_range *size_range; + + size_range = &mlxsw_sp->router->adj_grp_size_ranges[i]; + + if (alloc_size >= size_range->end) { + *p_adj_grp_size = size_range->end; + return; + } + } } static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, @@ -3563,7 +3646,7 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, /* Round up the requested group size to the next size supported * by the device and make sure the request can be satisfied. */ - mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size); + mlxsw_sp_adj_grp_size_round_up(mlxsw_sp, p_adj_grp_size); err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, *p_adj_grp_size, &alloc_size); @@ -3573,7 +3656,7 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, * entries than requested. Try to use as much of them as * possible. */ - mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size); + mlxsw_sp_adj_grp_size_round_down(mlxsw_sp, p_adj_grp_size, alloc_size); return 0; } @@ -3681,9 +3764,29 @@ mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, } static void +mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nexthop *nh, + u16 bucket_index) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp; + bool offload = false, trap = false; + + if (nh->offloaded) { + if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP) + trap = true; + else + offload = true; + } + nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, + bucket_index, offload, trap); +} + +static void mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { + int i; + /* Do not update the flags if the nexthop group is being destroyed * since: * 1. The nexthop objects is being deleted, in which case the flags are @@ -3697,6 +3800,18 @@ mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, nh_grp->nhgi->adj_index_valid, false); + + /* Update flags of individual nexthop buckets in case of a resilient + * nexthop group. + */ + if (!nh_grp->nhgi->is_resilient) + return; + + for (i = 0; i < nh_grp->nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; + + mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i); + } } static void @@ -3750,6 +3865,10 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; } + /* Flags of individual nexthop buckets might need to be + * updated. + */ + mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); return 0; } mlxsw_sp_nexthop_group_normalize(nhgi); @@ -3832,10 +3951,15 @@ set_trap: static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, bool removing) { - if (!removing) + if (!removing) { + nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD; nh->should_offload = 1; - else + } else if (nh->nhgi->is_resilient) { + nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP; + nh->should_offload = 1; + } else { nh->should_offload = 0; + } nh->update = 1; } @@ -4250,6 +4374,85 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, } } +static void +mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nexthop_group *nh_grp, + unsigned long *activity) +{ + char *ratrad_pl; + int i, err; + + ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL); + if (!ratrad_pl) + return; + + mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index, + nh_grp->nhgi->count); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl); + if (err) + goto out; + + for (i = 0; i < nh_grp->nhgi->count; i++) { + if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i)) + continue; + bitmap_set(activity, i, 1); + } + +out: + kfree(ratrad_pl); +} + +#define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */ + +static void +mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nexthop_group *nh_grp) +{ + unsigned long *activity; + + activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL); + if (!activity) + return; + + mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity); + nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, + nh_grp->nhgi->count, activity); + + bitmap_free(activity); +} + +static void +mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL; + + mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw, + msecs_to_jiffies(interval)); +} + +static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work) +{ + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_router *router; + bool reschedule = false; + + router = container_of(work, struct mlxsw_sp_router, + nh_grp_activity_dw.work); + + mutex_lock(&router->lock); + + list_for_each_entry(nhgi, &router->nh_res_grp_list, list) { + mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp); + reschedule = true; + } + + mutex_unlock(&router->lock); + + if (!reschedule) + return; + mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp); +} + static int mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp, const struct nh_notifier_single_info *nh, @@ -4268,6 +4471,29 @@ mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp, } static int +mlxsw_sp_nexthop_obj_group_entry_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_single_info *nh, + struct netlink_ext_ack *extack) +{ + int err; + + err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, extack); + if (err) + return err; + + /* Device only nexthops with an IPIP device are programmed as + * encapsulating adjacency entries. + */ + if (!nh->gw_family && !nh->is_reject && + !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway"); + return -EINVAL; + } + + return 0; +} + +static int mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp, const struct nh_notifier_grp_info *nh_grp, struct netlink_ext_ack *extack) @@ -4284,21 +4510,83 @@ mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp, int err; nh = &nh_grp->nh_entries[i].nh; - err = mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, nh, - extack); + err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh, + extack); if (err) return err; + } - /* Device only nexthops with an IPIP device are programmed as - * encapsulating adjacency entries. - */ - if (!nh->gw_family && !nh->is_reject && - !mlxsw_sp_netdev_ipip_type(mlxsw_sp, nh->dev, NULL)) { - NL_SET_ERR_MSG_MOD(extack, "Nexthop group entry does not have a gateway"); - return -EINVAL; + return 0; +} + +static int +mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_res_table_info *nh_res_table, + struct netlink_ext_ack *extack) +{ + unsigned int alloc_size; + bool valid_size = false; + int err, i; + + if (nh_res_table->num_nh_buckets < 32) { + NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32"); + return -EINVAL; + } + + for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) { + const struct mlxsw_sp_adj_grp_size_range *size_range; + + size_range = &mlxsw_sp->router->adj_grp_size_ranges[i]; + + if (nh_res_table->num_nh_buckets >= size_range->start && + nh_res_table->num_nh_buckets <= size_range->end) { + valid_size = true; + break; } } + if (!valid_size) { + NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets"); + return -EINVAL; + } + + err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp, + MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + nh_res_table->num_nh_buckets, + &alloc_size); + if (err || nh_res_table->num_nh_buckets != alloc_size) { + NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition"); + return -EINVAL; + } + + return 0; +} + +static int +mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_res_table_info *nh_res_table, + struct netlink_ext_ack *extack) +{ + int err; + u16 i; + + err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp, + nh_res_table, + extack); + if (err) + return err; + + for (i = 0; i < nh_res_table->num_nh_buckets; i++) { + const struct nh_notifier_single_info *nh; + int err; + + nh = &nh_res_table->nhs[i]; + err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh, + extack); + if (err) + return err; + } + return 0; } @@ -4306,7 +4594,11 @@ static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp, unsigned long event, struct nh_notifier_info *info) { - if (event != NEXTHOP_EVENT_REPLACE) + struct nh_notifier_single_info *nh; + + if (event != NEXTHOP_EVENT_REPLACE && + event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE && + event != NEXTHOP_EVENT_BUCKET_REPLACE) return 0; switch (info->type) { @@ -4317,6 +4609,14 @@ static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp, info->nh_grp, info->extack); + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp, + info->nh_res_table, + info->extack); + case NH_NOTIFIER_INFO_TYPE_RES_BUCKET: + nh = &info->nh_res_bucket->new_nh; + return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh, + info->extack); default: NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type"); return -EOPNOTSUPP; @@ -4334,6 +4634,7 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp, return info->nh->gw_family || info->nh->is_reject || mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); case NH_NOTIFIER_INFO_TYPE_GRP: + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: /* Already validated earlier. */ return true; default: @@ -4346,7 +4647,7 @@ static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp, { u16 lb_rif_index = mlxsw_sp->router->lb_rif_index; - nh->discard = 1; + nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD; nh->should_offload = 1; /* While nexthops that discard packets do not forward packets * via an egress RIF, they still need to be programmed using a @@ -4398,6 +4699,15 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp, if (nh_obj->is_reject) mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh); + /* In a resilient nexthop group, all the nexthops must be written to + * the adjacency table. Even if they do not have a valid neighbour or + * RIF. + */ + if (nh_grp->nhgi->is_resilient && !nh->should_offload) { + nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP; + nh->should_offload = 1; + } + return 0; err_type_init: @@ -4409,11 +4719,12 @@ err_type_init: static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - if (nh->discard) + if (nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD) mlxsw_sp_nexthop_obj_blackhole_fini(mlxsw_sp, nh); mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); list_del(&nh->router_list_node); mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + nh->should_offload = 0; } static int @@ -4423,6 +4734,7 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop_group_info *nhgi; struct mlxsw_sp_nexthop *nh; + bool is_resilient = false; unsigned int nhs; int err, i; @@ -4433,6 +4745,10 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, case NH_NOTIFIER_INFO_TYPE_GRP: nhs = info->nh_grp->num_nh; break; + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + nhs = info->nh_res_table->num_nh_buckets; + is_resilient = true; + break; default: return -EINVAL; } @@ -4443,6 +4759,7 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, nh_grp->nhgi = nhgi; nhgi->nh_grp = nh_grp; nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info); + nhgi->is_resilient = is_resilient; nhgi->count = nhs; for (i = 0; i < nhgi->count; i++) { struct nh_notifier_single_info *nh_obj; @@ -4458,6 +4775,10 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, nh_obj = &info->nh_grp->nh_entries[i].nh; weight = info->nh_grp->nh_entries[i].weight; break; + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + nh_obj = &info->nh_res_table->nhs[i]; + weight = 1; + break; default: err = -EINVAL; goto err_nexthop_obj_init; @@ -4473,6 +4794,15 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, goto err_group_refresh; } + /* Add resilient nexthop groups to a list so that the activity of their + * nexthop buckets will be periodically queried and cleared. + */ + if (nhgi->is_resilient) { + if (list_empty(&mlxsw_sp->router->nh_res_grp_list)) + mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp); + list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list); + } + return 0; err_group_refresh: @@ -4491,8 +4821,15 @@ mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + struct mlxsw_sp_router *router = mlxsw_sp->router; int i; + if (nhgi->is_resilient) { + list_del(&nhgi->list); + if (list_empty(&mlxsw_sp->router->nh_res_grp_list)) + cancel_delayed_work(&router->nh_grp_activity_dw); + } + for (i = nhgi->count - 1; i >= 0; i--) { struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; @@ -4685,6 +5022,136 @@ static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); } +static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, char *ratr_pl) +{ + MLXSW_REG_ZERO(ratr, ratr_pl); + mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ); + mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index); + mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16); + + return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new) +{ + /* Clear the opcode and activity on both the old and new payload as + * they are irrelevant for the comparison. + */ + mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ); + mlxsw_reg_ratr_a_set(ratr_pl, 0); + mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ); + mlxsw_reg_ratr_a_set(ratr_pl_new, 0); + + /* If the contents of the adjacency entry are consistent with the + * replacement request, then replacement was successful. + */ + if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN)) + return 0; + + return -EINVAL; +} + +static int +mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct nh_notifier_info *info) +{ + u16 bucket_index = info->nh_res_bucket->bucket_index; + struct netlink_ext_ack *extack = info->extack; + bool force = info->nh_res_bucket->force; + char ratr_pl_new[MLXSW_REG_RATR_LEN]; + char ratr_pl[MLXSW_REG_RATR_LEN]; + u32 adj_index; + int err; + + /* No point in trying an atomic replacement if the idle timer interval + * is smaller than the interval in which we query and clear activity. + */ + if (!force && info->nh_res_bucket->idle_timer_ms < + MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL) + force = true; + + adj_index = nh->nhgi->adj_index + bucket_index; + err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket"); + return err; + } + + if (!force) { + err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index, + ratr_pl_new); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent"); + return err; + } + + err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement"); + return err; + } + } + + nh->update = 0; + nh->offloaded = 1; + mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index); + + return 0; +} + +static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + u16 bucket_index = info->nh_res_bucket->bucket_index; + struct netlink_ext_ack *extack = info->extack; + struct mlxsw_sp_nexthop_group_info *nhgi; + struct nh_notifier_single_info *nh_obj; + struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_nexthop *nh; + int err; + + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id); + if (!nh_grp) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found"); + return -EINVAL; + } + + nhgi = nh_grp->nhgi; + + if (bucket_index >= nhgi->count) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range"); + return -EINVAL; + } + + nh = &nhgi->nexthops[bucket_index]; + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); + + nh_obj = &info->nh_res_bucket->new_nh; + err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement"); + goto err_nexthop_obj_init; + } + + err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info); + if (err) + goto err_nexthop_obj_bucket_adj_update; + + return 0; + +err_nexthop_obj_bucket_adj_update: + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); +err_nexthop_obj_init: + nh_obj = &info->nh_res_bucket->old_nh; + mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1); + /* The old adjacency entry was not overwritten */ + nh->update = 0; + nh->offloaded = 1; + return err; +} + static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -4699,8 +5166,6 @@ static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb, mutex_lock(&router->lock); - ASSERT_RTNL(); - switch (event) { case NEXTHOP_EVENT_REPLACE: err = mlxsw_sp_nexthop_obj_new(router->mlxsw_sp, info); @@ -4708,6 +5173,10 @@ static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb, case NEXTHOP_EVENT_DEL: mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info); break; + case NEXTHOP_EVENT_BUCKET_REPLACE: + err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp, + info); + break; default: break; } @@ -7667,7 +8136,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, int i, err; type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev); - ops = mlxsw_sp->rif_ops_arr[type]; + ops = mlxsw_sp->router->rif_ops_arr[type]; vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack); if (IS_ERR(vr)) @@ -8865,7 +9334,7 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = { .deconfigure = mlxsw_sp1_rif_ipip_lb_deconfigure, }; -const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = { +static const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, @@ -9050,7 +9519,7 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = { .deconfigure = mlxsw_sp2_rif_ipip_lb_deconfigure, }; -const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = { +static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_emu_ops, [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, @@ -9302,6 +9771,36 @@ static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index); } +static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp) +{ + size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp1_adj_grp_size_ranges); + + mlxsw_sp->router->rif_ops_arr = mlxsw_sp1_rif_ops_arr; + mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp1_adj_grp_size_ranges; + mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count; + + return 0; +} + +const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops = { + .init = mlxsw_sp1_router_init, +}; + +static int mlxsw_sp2_router_init(struct mlxsw_sp *mlxsw_sp) +{ + size_t size_ranges_count = ARRAY_SIZE(mlxsw_sp2_adj_grp_size_ranges); + + mlxsw_sp->router->rif_ops_arr = mlxsw_sp2_rif_ops_arr; + mlxsw_sp->router->adj_grp_size_ranges = mlxsw_sp2_adj_grp_size_ranges; + mlxsw_sp->router->adj_grp_size_ranges_count = size_ranges_count; + + return 0; +} + +const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops = { + .init = mlxsw_sp2_router_init, +}; + int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack) { @@ -9315,6 +9814,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, mlxsw_sp->router = router; router->mlxsw_sp = mlxsw_sp; + err = mlxsw_sp->router_ops->init(mlxsw_sp); + if (err) + goto err_router_ops_init; + err = mlxsw_sp_router_xm_init(mlxsw_sp); if (err) goto err_xm_init; @@ -9328,6 +9831,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_ll_op_ctx_init; + INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list); + INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw, + mlxsw_sp_nh_grp_activity_work); + INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list); err = __mlxsw_sp_router_init(mlxsw_sp); if (err) @@ -9451,10 +9958,12 @@ err_ipips_init: err_rifs_init: __mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); mlxsw_sp_router_ll_op_ctx_fini(router); err_ll_op_ctx_init: mlxsw_sp_router_xm_fini(mlxsw_sp); err_xm_init: +err_router_ops_init: mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); return err; @@ -9481,6 +9990,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_ipips_fini(mlxsw_sp); mlxsw_sp_rifs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); + cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); mlxsw_sp_router_ll_op_ctx_fini(mlxsw_sp->router); mlxsw_sp_router_xm_fini(mlxsw_sp); mutex_destroy(&mlxsw_sp->router->lock); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 2875ee8ec537..be7708a375e1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -78,6 +78,10 @@ struct mlxsw_sp_router { struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx; u16 lb_rif_index; struct mlxsw_sp_router_xm *xm; + const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges; + size_t adj_grp_size_ranges_count; + struct delayed_work nh_grp_activity_dw; + struct list_head nh_res_grp_list; }; struct mlxsw_sp_fib_entry_priv { @@ -195,20 +199,20 @@ mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_ipip_entry *except); struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, struct mlxsw_sp_nexthop *nh); -bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh); +bool mlxsw_sp_nexthop_is_forward(const struct mlxsw_sp_nexthop *nh); unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh); int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, u32 *p_adj_size, u32 *p_adj_hash_index); struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh); bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); -bool mlxsw_sp_nexthop_is_discard(const struct mlxsw_sp_nexthop *nh); #define mlxsw_sp_nexthop_for_each(nh, router) \ for (nh = mlxsw_sp_nexthop_next(router, NULL); nh; \ nh = mlxsw_sp_nexthop_next(router, nh)) int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, u64 *p_counter); -int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh); +int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl); void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh); void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 1892cea05ee7..3398cc01e5ec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -186,6 +186,7 @@ mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry, /* Create a new port analayzer entry for local_port. */ mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true, MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH); + mlxsw_reg_mpat_session_id_set(mpat_pl, sparms.session_id); mlxsw_reg_mpat_pide_set(mpat_pl, sparms.policer_enable); mlxsw_reg_mpat_pid_set(mpat_pl, sparms.policer_id); @@ -203,6 +204,7 @@ mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry, int pa_id = span_entry->id; mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type); + mlxsw_reg_mpat_session_id_set(mpat_pl, span_entry->parms.session_id); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); } @@ -938,7 +940,8 @@ mlxsw_sp_span_entry_find_by_parms(struct mlxsw_sp *mlxsw_sp, if (refcount_read(&curr->ref_count) && curr->to_dev == to_dev && curr->parms.policer_enable == sparms->policer_enable && - curr->parms.policer_id == sparms->policer_id) + curr->parms.policer_id == sparms->policer_id && + curr->parms.session_id == sparms->session_id) return curr; } return NULL; @@ -1085,6 +1088,7 @@ int mlxsw_sp_span_agent_get(struct mlxsw_sp *mlxsw_sp, int *p_span_id, sparms.policer_id = parms->policer_id; sparms.policer_enable = parms->policer_enable; + sparms.session_id = parms->session_id; span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms); if (!span_entry) return -ENOBUFS; @@ -1227,8 +1231,12 @@ __mlxsw_sp_span_trigger_port_bind(struct mlxsw_sp_span *span, return -EINVAL; } + if (trigger_entry->parms.probability_rate > MLXSW_REG_MPAR_RATE_MAX) + return -EINVAL; + mlxsw_reg_mpar_pack(mpar_pl, trigger_entry->local_port, i_e, enable, - trigger_entry->parms.span_id); + trigger_entry->parms.span_id, + trigger_entry->parms.probability_rate); return mlxsw_reg_write(span->mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); } @@ -1362,8 +1370,11 @@ mlxsw_sp2_span_trigger_global_bind(struct mlxsw_sp_span_trigger_entry * return -EINVAL; } + if (trigger_entry->parms.probability_rate > MLXSW_REG_MPAGR_RATE_MAX) + return -EINVAL; + mlxsw_reg_mpagr_pack(mpagr_pl, trigger, trigger_entry->parms.span_id, - 1); + trigger_entry->parms.probability_rate); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpagr), mpagr_pl); } @@ -1561,7 +1572,9 @@ int mlxsw_sp_span_agent_bind(struct mlxsw_sp *mlxsw_sp, trigger, mlxsw_sp_port); if (trigger_entry) { - if (trigger_entry->parms.span_id != parms->span_id) + if (trigger_entry->parms.span_id != parms->span_id || + trigger_entry->parms.probability_rate != + parms->probability_rate) return -EINVAL; refcount_inc(&trigger_entry->ref_count); goto out; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h index aa1cd409c0e2..efaefd1ae863 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h @@ -13,6 +13,19 @@ struct mlxsw_sp; struct mlxsw_sp_port; +/* SPAN session identifiers that correspond to MLXSW_TRAP_ID_MIRROR_SESSION<i> + * trap identifiers. The session identifier is an attribute of the SPAN agent, + * which determines the trap identifier of packets that are mirrored to the + * CPU. Packets that are trapped to the CPU for the same logical reason (e.g., + * buffer drops) should use the same session identifier. + */ +enum mlxsw_sp_span_session_id { + MLXSW_SP_SPAN_SESSION_ID_BUFFER, + MLXSW_SP_SPAN_SESSION_ID_SAMPLING, + + __MLXSW_SP_SPAN_SESSION_ID_MAX = 8, +}; + struct mlxsw_sp_span_parms { struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */ unsigned int ttl; @@ -23,6 +36,7 @@ struct mlxsw_sp_span_parms { u16 vid; u16 policer_id; bool policer_enable; + enum mlxsw_sp_span_session_id session_id; }; enum mlxsw_sp_span_trigger { @@ -35,12 +49,14 @@ enum mlxsw_sp_span_trigger { struct mlxsw_sp_span_trigger_parms { int span_id; + u32 probability_rate; }; struct mlxsw_sp_span_agent_parms { const struct net_device *to_dev; u16 policer_id; bool policer_enable; + enum mlxsw_sp_span_session_id session_id; }; struct mlxsw_sp_span_entry_ops; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 23b7e8d6386b..eeccd586e781 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -98,6 +98,10 @@ struct mlxsw_sp_bridge_ops { const struct mlxsw_sp_fid *fid); }; +struct mlxsw_sp_switchdev_ops { + void (*init)(struct mlxsw_sp *mlxsw_sp); +}; + static int mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_bridge_port *bridge_port, @@ -2296,7 +2300,7 @@ mlxsw_sp_bridge_8021ad_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, vid, ETH_P_8021AD, extack); } -static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021ad_ops = { +static const struct mlxsw_sp_bridge_ops mlxsw_sp1_bridge_8021ad_ops = { .port_join = mlxsw_sp_bridge_8021ad_port_join, .port_leave = mlxsw_sp_bridge_8021ad_port_leave, .vxlan_join = mlxsw_sp_bridge_8021ad_vxlan_join, @@ -2305,6 +2309,53 @@ static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021ad_ops = { .fid_vid = mlxsw_sp_bridge_8021q_fid_vid, }; +static int +mlxsw_sp2_bridge_8021ad_port_join(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) +{ + int err; + + /* The EtherType of decapsulated packets is determined at the egress + * port to allow 802.1d and 802.1ad bridges with VXLAN devices to + * co-exist. + */ + err = mlxsw_sp_port_egress_ethtype_set(mlxsw_sp_port, ETH_P_8021AD); + if (err) + return err; + + err = mlxsw_sp_bridge_8021ad_port_join(bridge_device, bridge_port, + mlxsw_sp_port, extack); + if (err) + goto err_bridge_8021ad_port_join; + + return 0; + +err_bridge_8021ad_port_join: + mlxsw_sp_port_egress_ethtype_set(mlxsw_sp_port, ETH_P_8021Q); + return err; +} + +static void +mlxsw_sp2_bridge_8021ad_port_leave(struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_bridge_8021ad_port_leave(bridge_device, bridge_port, + mlxsw_sp_port); + mlxsw_sp_port_egress_ethtype_set(mlxsw_sp_port, ETH_P_8021Q); +} + +static const struct mlxsw_sp_bridge_ops mlxsw_sp2_bridge_8021ad_ops = { + .port_join = mlxsw_sp2_bridge_8021ad_port_join, + .port_leave = mlxsw_sp2_bridge_8021ad_port_leave, + .vxlan_join = mlxsw_sp_bridge_8021ad_vxlan_join, + .fid_get = mlxsw_sp_bridge_8021q_fid_get, + .fid_lookup = mlxsw_sp_bridge_8021q_fid_lookup, + .fid_vid = mlxsw_sp_bridge_8021q_fid_vid, +}; + int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, struct net_device *br_dev, @@ -2865,7 +2916,8 @@ mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work * return; if (switchdev_work->event == SWITCHDEV_FDB_ADD_TO_DEVICE && - !switchdev_work->fdb_info.added_by_user) + (!switchdev_work->fdb_info.added_by_user || + switchdev_work->fdb_info.is_local)) return; if (!netif_running(dev)) @@ -2920,7 +2972,7 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; err = mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, true); if (err) @@ -3535,6 +3587,24 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp) unregister_switchdev_notifier(&mlxsw_sp_switchdev_notifier); } +static void mlxsw_sp1_switchdev_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->bridge->bridge_8021ad_ops = &mlxsw_sp1_bridge_8021ad_ops; +} + +const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops = { + .init = mlxsw_sp1_switchdev_init, +}; + +static void mlxsw_sp2_switchdev_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->bridge->bridge_8021ad_ops = &mlxsw_sp2_bridge_8021ad_ops; +} + +const struct mlxsw_sp_switchdev_ops mlxsw_sp2_switchdev_ops = { + .init = mlxsw_sp2_switchdev_init, +}; + int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_bridge *bridge; @@ -3549,7 +3619,8 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) bridge->bridge_8021q_ops = &mlxsw_sp_bridge_8021q_ops; bridge->bridge_8021d_ops = &mlxsw_sp_bridge_8021d_ops; - bridge->bridge_8021ad_ops = &mlxsw_sp_bridge_8021ad_ops; + + mlxsw_sp->switchdev_ops->init(mlxsw_sp); return mlxsw_sp_fdb_init(mlxsw_sp); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 4ef12e3e021a..26d01adbedad 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -49,8 +49,14 @@ enum { #define MLXSW_SP_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT enum { + /* Packet was mirrored from ingress. */ + MLXSW_SP_MIRROR_REASON_INGRESS = 1, + /* Packet was mirrored from policy engine. */ + MLXSW_SP_MIRROR_REASON_POLICY_ENGINE = 2, /* Packet was early dropped. */ MLXSW_SP_MIRROR_REASON_INGRESS_WRED = 9, + /* Packet was mirrored from egress. */ + MLXSW_SP_MIRROR_REASON_EGRESS = 14, }; static int mlxsw_sp_rx_listener(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, @@ -106,7 +112,7 @@ static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, static void mlxsw_sp_rx_acl_drop_listener(struct sk_buff *skb, u8 local_port, void *trap_ctx) { - u32 cookie_index = mlxsw_skb_cb(skb)->cookie_index; + u32 cookie_index = mlxsw_skb_cb(skb)->rx_md_info.cookie_index; const struct flow_action_cookie *fa_cookie; struct devlink_port *in_devlink_port; struct mlxsw_sp_port *mlxsw_sp_port; @@ -202,21 +208,175 @@ static void mlxsw_sp_rx_ptp_listener(struct sk_buff *skb, u8 local_port, mlxsw_sp_ptp_receive(mlxsw_sp, skb, local_port); } +static struct mlxsw_sp_port * +mlxsw_sp_sample_tx_port_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_rx_md_info *rx_md_info) +{ + u8 local_port; + + if (!rx_md_info->tx_port_valid) + return NULL; + + if (rx_md_info->tx_port_is_lag) + local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core, + rx_md_info->tx_lag_id, + rx_md_info->tx_lag_port_index); + else + local_port = rx_md_info->tx_sys_port; + + if (local_port >= mlxsw_core_max_ports(mlxsw_sp->core)) + return NULL; + + return mlxsw_sp->ports[local_port]; +} + +/* The latency units are determined according to MOGCR.mirror_latency_units. It + * defaults to 64 nanoseconds. + */ +#define MLXSW_SP_MIRROR_LATENCY_SHIFT 6 + +static void mlxsw_sp_psample_md_init(struct mlxsw_sp *mlxsw_sp, + struct psample_metadata *md, + struct sk_buff *skb, int in_ifindex, + bool truncate, u32 trunc_size) +{ + struct mlxsw_rx_md_info *rx_md_info = &mlxsw_skb_cb(skb)->rx_md_info; + struct mlxsw_sp_port *mlxsw_sp_port; + + md->trunc_size = truncate ? trunc_size : skb->len; + md->in_ifindex = in_ifindex; + mlxsw_sp_port = mlxsw_sp_sample_tx_port_get(mlxsw_sp, rx_md_info); + md->out_ifindex = mlxsw_sp_port && mlxsw_sp_port->dev ? + mlxsw_sp_port->dev->ifindex : 0; + md->out_tc_valid = rx_md_info->tx_tc_valid; + md->out_tc = rx_md_info->tx_tc; + md->out_tc_occ_valid = rx_md_info->tx_congestion_valid; + md->out_tc_occ = rx_md_info->tx_congestion; + md->latency_valid = rx_md_info->latency_valid; + md->latency = rx_md_info->latency; + md->latency <<= MLXSW_SP_MIRROR_LATENCY_SHIFT; +} + static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port, void *trap_ctx) { struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + struct mlxsw_sp_sample_trigger trigger; + struct mlxsw_sp_sample_params *params; + struct mlxsw_sp_port *mlxsw_sp_port; + struct psample_metadata md = {}; + int err; + + err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx); + if (err) + return; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + goto out; + + trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS; + trigger.local_port = local_port; + params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger); + if (!params) + goto out; + + /* The psample module expects skb->data to point to the start of the + * Ethernet header. + */ + skb_push(skb, ETH_HLEN); + mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb, + mlxsw_sp_port->dev->ifindex, params->truncate, + params->trunc_size); + psample_sample_packet(params->psample_group, skb, params->rate, &md); +out: + consume_skb(skb); +} + +static void mlxsw_sp_rx_sample_tx_listener(struct sk_buff *skb, u8 local_port, + void *trap_ctx) +{ + struct mlxsw_rx_md_info *rx_md_info = &mlxsw_skb_cb(skb)->rx_md_info; + struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + struct mlxsw_sp_port *mlxsw_sp_port, *mlxsw_sp_port_tx; + struct mlxsw_sp_sample_trigger trigger; + struct mlxsw_sp_sample_params *params; + struct psample_metadata md = {}; + int err; + + /* Locally generated packets are not reported from the policy engine + * trigger, so do not report them from the egress trigger as well. + */ + if (local_port == MLXSW_PORT_CPU_PORT) + goto out; + + err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx); + if (err) + return; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + goto out; + + /* Packet was sampled from Tx, so we need to retrieve the sample + * parameters based on the Tx port and not the Rx port. + */ + mlxsw_sp_port_tx = mlxsw_sp_sample_tx_port_get(mlxsw_sp, rx_md_info); + if (!mlxsw_sp_port_tx) + goto out; + + trigger.type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS; + trigger.local_port = mlxsw_sp_port_tx->local_port; + params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger); + if (!params) + goto out; + + /* The psample module expects skb->data to point to the start of the + * Ethernet header. + */ + skb_push(skb, ETH_HLEN); + mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb, + mlxsw_sp_port->dev->ifindex, params->truncate, + params->trunc_size); + psample_sample_packet(params->psample_group, skb, params->rate, &md); +out: + consume_skb(skb); +} + +static void mlxsw_sp_rx_sample_acl_listener(struct sk_buff *skb, u8 local_port, + void *trap_ctx) +{ + struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + struct mlxsw_sp_sample_trigger trigger = { + .type = MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE, + }; + struct mlxsw_sp_sample_params *params; + struct mlxsw_sp_port *mlxsw_sp_port; + struct psample_metadata md = {}; int err; err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx); if (err) return; - /* The sample handler expects skb->data to point to the start of the + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + goto out; + + params = mlxsw_sp_sample_trigger_params_lookup(mlxsw_sp, &trigger); + if (!params) + goto out; + + /* The psample module expects skb->data to point to the start of the * Ethernet header. */ skb_push(skb, ETH_HLEN); - mlxsw_sp_sample_receive(mlxsw_sp, skb, local_port); + mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb, + mlxsw_sp_port->dev->ifindex, params->truncate, + params->trunc_size); + psample_sample_packet(params->psample_group, skb, params->rate, &md); +out: + consume_skb(skb); } #define MLXSW_SP_TRAP_DROP(_id, _group_id) \ @@ -464,11 +624,6 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = { .priority = 2, }, { - .group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0), - .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE, - .priority = 0, - }, - { .group = DEVLINK_TRAP_GROUP_GENERIC(ACL_TRAP, 18), .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING, .priority = 4, @@ -993,14 +1148,6 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = { }, }, { - .trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE, - MIRROR), - .listeners_arr = { - MLXSW_RXL(mlxsw_sp_rx_sample_listener, PKT_SAMPLE, - MIRROR_TO_CPU, false, SP_PKT_SAMPLE, DISCARD), - }, - }, - { .trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_TRAP, ACL_TRAP, TRAP), .listeners_arr = { MLXSW_SP_RXL_NO_MARK(ACL0, FLOW_LOGGING, TRAP_TO_CPU, @@ -1709,10 +1856,23 @@ int mlxsw_sp_trap_group_policer_hw_id_get(struct mlxsw_sp *mlxsw_sp, u16 id, static const struct mlxsw_sp_trap_group_item mlxsw_sp1_trap_group_items_arr[] = { + { + .group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE, + .priority = 0, + }, }; static const struct mlxsw_sp_trap_item mlxsw_sp1_trap_items_arr[] = { + { + .trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE, + MIRROR), + .listeners_arr = { + MLXSW_RXL(mlxsw_sp_rx_sample_listener, PKT_SAMPLE, + MIRROR_TO_CPU, false, SP_PKT_SAMPLE, DISCARD), + }, + }, }; static int @@ -1749,6 +1909,12 @@ mlxsw_sp2_trap_group_items_arr[] = { .priority = 0, .fixed_policer = true, }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE, + .priority = 0, + .fixed_policer = true, + }, }; static const struct mlxsw_sp_trap_item @@ -1760,6 +1926,21 @@ mlxsw_sp2_trap_items_arr[] = { }, .is_source = true, }, + { + .trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE, + MIRROR), + .listeners_arr = { + MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_listener, 1, + SP_PKT_SAMPLE, + MLXSW_SP_MIRROR_REASON_INGRESS), + MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_tx_listener, 1, + SP_PKT_SAMPLE, + MLXSW_SP_MIRROR_REASON_EGRESS), + MLXSW_RXL_MIRROR(mlxsw_sp_rx_sample_acl_listener, 1, + SP_PKT_SAMPLE, + MLXSW_SP_MIRROR_REASON_POLICY_ENGINE), + }, + }, }; static int diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 2feed6ce19d3..13eef6e9bd2d 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -193,11 +193,10 @@ static void ks8851_read_mac_addr(struct net_device *dev) static void ks8851_init_mac(struct ks8851_net *ks, struct device_node *np) { struct net_device *dev = ks->netdev; - const u8 *mac_addr; + int ret; - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) { - ether_addr_copy(dev->dev_addr, mac_addr); + ret = of_get_mac_address(np, dev->dev_addr); + if (!ret) { ks8851_write_mac_addr(dev); return; } diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index 2c0dcd7acf3f..3658c4ae3c37 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -222,7 +222,6 @@ static int encx24j600_wait_for_autoneg(struct encx24j600_priv *priv) unsigned long timeout = jiffies + msecs_to_jiffies(2000); u16 phstat1; u16 estat; - int ret = 0; phstat1 = encx24j600_read_phy(priv, PHSTAT1); while ((phstat1 & ANDONE) == 0) { @@ -258,7 +257,7 @@ static int encx24j600_wait_for_autoneg(struct encx24j600_priv *priv) encx24j600_write_reg(priv, MACLCON, 0x370f); } - return ret; + return 0; } /* Access the PHY to determine link status */ @@ -1118,17 +1117,7 @@ static struct spi_driver encx24j600_spi_net_driver = { .id_table = encx24j600_spi_id_table, }; -static int __init encx24j600_init(void) -{ - return spi_register_driver(&encx24j600_spi_net_driver); -} -module_init(encx24j600_init); - -static void encx24j600_exit(void) -{ - spi_unregister_driver(&encx24j600_spi_net_driver); -} -module_exit(encx24j600_exit); +module_spi_driver(encx24j600_spi_net_driver); MODULE_DESCRIPTION(DRV_NAME " ethernet driver"); MODULE_AUTHOR("Jon Ringle <jringle@gridpoint.com>"); diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index c5de8f46cdd3..91a755efe2e6 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -730,8 +730,8 @@ static int lan743x_ethtool_get_eee(struct net_device *netdev, static int lan743x_ethtool_set_eee(struct net_device *netdev, struct ethtool_eee *eee) { - struct lan743x_adapter *adapter = netdev_priv(netdev); - struct phy_device *phydev = NULL; + struct lan743x_adapter *adapter; + struct phy_device *phydev; u32 buf = 0; int ret = 0; diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 1c3e204d727c..dae10328c6cf 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -885,8 +885,8 @@ static int lan743x_mac_set_mtu(struct lan743x_adapter *adapter, int new_mtu) } mac_rx &= ~(MAC_RX_MAX_SIZE_MASK_); - mac_rx |= (((new_mtu + ETH_HLEN + 4) << MAC_RX_MAX_SIZE_SHIFT_) & - MAC_RX_MAX_SIZE_MASK_); + mac_rx |= (((new_mtu + ETH_HLEN + ETH_FCS_LEN) + << MAC_RX_MAX_SIZE_SHIFT_) & MAC_RX_MAX_SIZE_MASK_); lan743x_csr_write(adapter, MAC_RX, mac_rx); if (enabled) { @@ -1944,7 +1944,7 @@ static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index) struct sk_buff *skb; dma_addr_t dma_ptr; - buffer_length = netdev->mtu + ETH_HLEN + 4 + RX_HEAD_PADDING; + buffer_length = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + RX_HEAD_PADDING; descriptor = &rx->ring_cpu_ptr[index]; buffer_info = &rx->buffer_info[index]; @@ -2040,7 +2040,7 @@ lan743x_rx_trim_skb(struct sk_buff *skb, int frame_length) dev_kfree_skb_irq(skb); return NULL; } - frame_length = max_t(int, 0, frame_length - RX_HEAD_PADDING - 4); + frame_length = max_t(int, 0, frame_length - ETH_FCS_LEN); if (skb->len > frame_length) { skb->tail -= skb->len - frame_length; skb->len = frame_length; @@ -2771,7 +2771,6 @@ static int lan743x_pcidev_probe(struct pci_dev *pdev, { struct lan743x_adapter *adapter = NULL; struct net_device *netdev = NULL; - const void *mac_addr; int ret = -ENODEV; netdev = devm_alloc_etherdev(&pdev->dev, @@ -2788,9 +2787,7 @@ static int lan743x_pcidev_probe(struct pci_dev *pdev, NETIF_MSG_IFDOWN | NETIF_MSG_TX_QUEUED; netdev->max_mtu = LAN743X_MAX_FRAME_SIZE; - mac_addr = of_get_mac_address(pdev->dev.of_node); - if (!IS_ERR(mac_addr)) - ether_addr_copy(adapter->mac_address, mac_addr); + of_get_mac_address(pdev->dev.of_node, adapter->mac_address); ret = lan743x_pci_init(adapter, pdev); if (ret) @@ -3004,7 +3001,7 @@ static int lan743x_pm_suspend(struct device *dev) lan743x_pm_set_wol(adapter); /* Host sets PME_En, put D3hot */ - return pci_prepare_to_sleep(pdev);; + return pci_prepare_to_sleep(pdev); } static int lan743x_pm_resume(struct device *dev) diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig new file mode 100644 index 000000000000..fe4e7a7d9c0b --- /dev/null +++ b/drivers/net/ethernet/microsoft/Kconfig @@ -0,0 +1,29 @@ +# +# Microsoft Azure network device configuration +# + +config NET_VENDOR_MICROSOFT + bool "Microsoft Network Devices" + default y + help + If you have a network (Ethernet) device belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip the + question about Microsoft network devices. If you say Y, you will be + asked for your specific device in the following question. + +if NET_VENDOR_MICROSOFT + +config MICROSOFT_MANA + tristate "Microsoft Azure Network Adapter (MANA) support" + depends on PCI_MSI && X86_64 + depends on PCI_HYPERV + help + This driver supports Microsoft Azure Network Adapter (MANA). + So far, the driver is only supported on X86_64. + + To compile this driver as a module, choose M here. + The module will be called mana. + +endif #NET_VENDOR_MICROSOFT diff --git a/drivers/net/ethernet/microsoft/Makefile b/drivers/net/ethernet/microsoft/Makefile new file mode 100644 index 000000000000..d2ddc218135f --- /dev/null +++ b/drivers/net/ethernet/microsoft/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Microsoft Azure network device driver. +# + +obj-$(CONFIG_MICROSOFT_MANA) += mana/ diff --git a/drivers/net/ethernet/microsoft/mana/Makefile b/drivers/net/ethernet/microsoft/mana/Makefile new file mode 100644 index 000000000000..0edd5bb685f3 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# Makefile for the Microsoft Azure Network Adapter driver + +obj-$(CONFIG_MICROSOFT_MANA) += mana.o +mana-objs := gdma_main.o shm_channel.o hw_channel.o mana_en.o mana_ethtool.o diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h new file mode 100644 index 000000000000..33e53d32e891 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/gdma.h @@ -0,0 +1,673 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _GDMA_H +#define _GDMA_H + +#include <linux/dma-mapping.h> +#include <linux/netdevice.h> + +#include "shm_channel.h" + +/* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +enum gdma_request_type { + GDMA_VERIFY_VF_DRIVER_VERSION = 1, + GDMA_QUERY_MAX_RESOURCES = 2, + GDMA_LIST_DEVICES = 3, + GDMA_REGISTER_DEVICE = 4, + GDMA_DEREGISTER_DEVICE = 5, + GDMA_GENERATE_TEST_EQE = 10, + GDMA_CREATE_QUEUE = 12, + GDMA_DISABLE_QUEUE = 13, + GDMA_CREATE_DMA_REGION = 25, + GDMA_DMA_REGION_ADD_PAGES = 26, + GDMA_DESTROY_DMA_REGION = 27, +}; + +enum gdma_queue_type { + GDMA_INVALID_QUEUE, + GDMA_SQ, + GDMA_RQ, + GDMA_CQ, + GDMA_EQ, +}; + +enum gdma_work_request_flags { + GDMA_WR_NONE = 0, + GDMA_WR_OOB_IN_SGL = BIT(0), + GDMA_WR_PAD_BY_SGE0 = BIT(1), +}; + +enum gdma_eqe_type { + GDMA_EQE_COMPLETION = 3, + GDMA_EQE_TEST_EVENT = 64, + GDMA_EQE_HWC_INIT_EQ_ID_DB = 129, + GDMA_EQE_HWC_INIT_DATA = 130, + GDMA_EQE_HWC_INIT_DONE = 131, +}; + +enum { + GDMA_DEVICE_NONE = 0, + GDMA_DEVICE_HWC = 1, + GDMA_DEVICE_MANA = 2, +}; + +struct gdma_resource { + /* Protect the bitmap */ + spinlock_t lock; + + /* The bitmap size in bits. */ + u32 size; + + /* The bitmap tracks the resources. */ + unsigned long *map; +}; + +union gdma_doorbell_entry { + u64 as_uint64; + + struct { + u64 id : 24; + u64 reserved : 8; + u64 tail_ptr : 31; + u64 arm : 1; + } cq; + + struct { + u64 id : 24; + u64 wqe_cnt : 8; + u64 tail_ptr : 32; + } rq; + + struct { + u64 id : 24; + u64 reserved : 8; + u64 tail_ptr : 32; + } sq; + + struct { + u64 id : 16; + u64 reserved : 16; + u64 tail_ptr : 31; + u64 arm : 1; + } eq; +}; /* HW DATA */ + +struct gdma_msg_hdr { + u32 hdr_type; + u32 msg_type; + u16 msg_version; + u16 hwc_msg_id; + u32 msg_size; +}; /* HW DATA */ + +struct gdma_dev_id { + union { + struct { + u16 type; + u16 instance; + }; + + u32 as_uint32; + }; +}; /* HW DATA */ + +struct gdma_req_hdr { + struct gdma_msg_hdr req; + struct gdma_msg_hdr resp; /* The expected response */ + struct gdma_dev_id dev_id; + u32 activity_id; +}; /* HW DATA */ + +struct gdma_resp_hdr { + struct gdma_msg_hdr response; + struct gdma_dev_id dev_id; + u32 activity_id; + u32 status; + u32 reserved; +}; /* HW DATA */ + +struct gdma_general_req { + struct gdma_req_hdr hdr; +}; /* HW DATA */ + +#define GDMA_MESSAGE_V1 1 + +struct gdma_general_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +#define GDMA_STANDARD_HEADER_TYPE 0 + +static inline void mana_gd_init_req_hdr(struct gdma_req_hdr *hdr, u32 code, + u32 req_size, u32 resp_size) +{ + hdr->req.hdr_type = GDMA_STANDARD_HEADER_TYPE; + hdr->req.msg_type = code; + hdr->req.msg_version = GDMA_MESSAGE_V1; + hdr->req.msg_size = req_size; + + hdr->resp.hdr_type = GDMA_STANDARD_HEADER_TYPE; + hdr->resp.msg_type = code; + hdr->resp.msg_version = GDMA_MESSAGE_V1; + hdr->resp.msg_size = resp_size; +} + +/* The 16-byte struct is part of the GDMA work queue entry (WQE). */ +struct gdma_sge { + u64 address; + u32 mem_key; + u32 size; +}; /* HW DATA */ + +struct gdma_wqe_request { + struct gdma_sge *sgl; + u32 num_sge; + + u32 inline_oob_size; + const void *inline_oob_data; + + u32 flags; + u32 client_data_unit; +}; + +enum gdma_page_type { + GDMA_PAGE_TYPE_4K, +}; + +#define GDMA_INVALID_DMA_REGION 0 + +struct gdma_mem_info { + struct device *dev; + + dma_addr_t dma_handle; + void *virt_addr; + u64 length; + + /* Allocated by the PF driver */ + u64 gdma_region; +}; + +#define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8 + +struct gdma_dev { + struct gdma_context *gdma_context; + + struct gdma_dev_id dev_id; + + u32 pdid; + u32 doorbell; + u32 gpa_mkey; + + /* GDMA driver specific pointer */ + void *driver_data; +}; + +#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE + +#define GDMA_CQE_SIZE 64 +#define GDMA_EQE_SIZE 16 +#define GDMA_MAX_SQE_SIZE 512 +#define GDMA_MAX_RQE_SIZE 256 + +#define GDMA_COMP_DATA_SIZE 0x3C + +#define GDMA_EVENT_DATA_SIZE 0xC + +/* The WQE size must be a multiple of the Basic Unit, which is 32 bytes. */ +#define GDMA_WQE_BU_SIZE 32 + +#define INVALID_PDID UINT_MAX +#define INVALID_DOORBELL UINT_MAX +#define INVALID_MEM_KEY UINT_MAX +#define INVALID_QUEUE_ID UINT_MAX +#define INVALID_PCI_MSIX_INDEX UINT_MAX + +struct gdma_comp { + u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; + u32 wq_num; + bool is_sq; +}; + +struct gdma_event { + u32 details[GDMA_EVENT_DATA_SIZE / 4]; + u8 type; +}; + +struct gdma_queue; + +#define CQE_POLLING_BUFFER 512 +struct mana_eq { + struct gdma_queue *eq; + struct gdma_comp cqe_poll[CQE_POLLING_BUFFER]; +}; + +typedef void gdma_eq_callback(void *context, struct gdma_queue *q, + struct gdma_event *e); + +typedef void gdma_cq_callback(void *context, struct gdma_queue *q); + +/* The 'head' is the producer index. For SQ/RQ, when the driver posts a WQE + * (Note: the WQE size must be a multiple of the 32-byte Basic Unit), the + * driver increases the 'head' in BUs rather than in bytes, and notifies + * the HW of the updated head. For EQ/CQ, the driver uses the 'head' to track + * the HW head, and increases the 'head' by 1 for every processed EQE/CQE. + * + * The 'tail' is the consumer index for SQ/RQ. After the CQE of the SQ/RQ is + * processed, the driver increases the 'tail' to indicate that WQEs have + * been consumed by the HW, so the driver can post new WQEs into the SQ/RQ. + * + * The driver doesn't use the 'tail' for EQ/CQ, because the driver ensures + * that the EQ/CQ is big enough so they can't overflow, and the driver uses + * the owner bits mechanism to detect if the queue has become empty. + */ +struct gdma_queue { + struct gdma_dev *gdma_dev; + + enum gdma_queue_type type; + u32 id; + + struct gdma_mem_info mem_info; + + void *queue_mem_ptr; + u32 queue_size; + + bool monitor_avl_buf; + + u32 head; + u32 tail; + + /* Extra fields specific to EQ/CQ. */ + union { + struct { + bool disable_needed; + + gdma_eq_callback *callback; + void *context; + + unsigned int msix_index; + + u32 log2_throttle_limit; + + /* NAPI data */ + struct napi_struct napi; + int work_done; + int budget; + } eq; + + struct { + gdma_cq_callback *callback; + void *context; + + struct gdma_queue *parent; /* For CQ/EQ relationship */ + } cq; + }; +}; + +struct gdma_queue_spec { + enum gdma_queue_type type; + bool monitor_avl_buf; + unsigned int queue_size; + + /* Extra fields specific to EQ/CQ. */ + union { + struct { + gdma_eq_callback *callback; + void *context; + + unsigned long log2_throttle_limit; + + /* Only used by the MANA device. */ + struct net_device *ndev; + } eq; + + struct { + gdma_cq_callback *callback; + void *context; + + struct gdma_queue *parent_eq; + + } cq; + }; +}; + +struct gdma_irq_context { + void (*handler)(void *arg); + void *arg; +}; + +struct gdma_context { + struct device *dev; + + /* Per-vPort max number of queues */ + unsigned int max_num_queues; + unsigned int max_num_msix; + unsigned int num_msix_usable; + struct gdma_resource msix_resource; + struct gdma_irq_context *irq_contexts; + + /* This maps a CQ index to the queue structure. */ + unsigned int max_num_cqs; + struct gdma_queue **cq_table; + + /* Protect eq_test_event and test_event_eq_id */ + struct mutex eq_test_event_mutex; + struct completion eq_test_event; + u32 test_event_eq_id; + + void __iomem *bar0_va; + void __iomem *shm_base; + void __iomem *db_page_base; + u32 db_page_size; + + /* Shared memory chanenl (used to bootstrap HWC) */ + struct shm_channel shm_channel; + + /* Hardware communication channel (HWC) */ + struct gdma_dev hwc; + + /* Azure network adapter */ + struct gdma_dev mana; +}; + +#define MAX_NUM_GDMA_DEVICES 4 + +static inline bool mana_gd_is_mana(struct gdma_dev *gd) +{ + return gd->dev_id.type == GDMA_DEVICE_MANA; +} + +static inline bool mana_gd_is_hwc(struct gdma_dev *gd) +{ + return gd->dev_id.type == GDMA_DEVICE_HWC; +} + +u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset); +u32 mana_gd_wq_avail_space(struct gdma_queue *wq); + +int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq); + +int mana_gd_create_hwc_queue(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + +int mana_gd_create_mana_eq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + +int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + +void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue); + +int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe); + +void mana_gd_arm_cq(struct gdma_queue *cq); + +struct gdma_wqe { + u32 reserved :24; + u32 last_vbytes :8; + + union { + u32 flags; + + struct { + u32 num_sge :8; + u32 inline_oob_size_div4:3; + u32 client_oob_in_sgl :1; + u32 reserved1 :4; + u32 client_data_unit :14; + u32 reserved2 :2; + }; + }; +}; /* HW DATA */ + +#define INLINE_OOB_SMALL_SIZE 8 +#define INLINE_OOB_LARGE_SIZE 24 + +#define MAX_TX_WQE_SIZE 512 +#define MAX_RX_WQE_SIZE 256 + +struct gdma_cqe { + u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; + + union { + u32 as_uint32; + + struct { + u32 wq_num : 24; + u32 is_sq : 1; + u32 reserved : 4; + u32 owner_bits : 3; + }; + } cqe_info; +}; /* HW DATA */ + +#define GDMA_CQE_OWNER_BITS 3 + +#define GDMA_CQE_OWNER_MASK ((1 << GDMA_CQE_OWNER_BITS) - 1) + +#define SET_ARM_BIT 1 + +#define GDMA_EQE_OWNER_BITS 3 + +union gdma_eqe_info { + u32 as_uint32; + + struct { + u32 type : 8; + u32 reserved1 : 8; + u32 client_id : 2; + u32 reserved2 : 11; + u32 owner_bits : 3; + }; +}; /* HW DATA */ + +#define GDMA_EQE_OWNER_MASK ((1 << GDMA_EQE_OWNER_BITS) - 1) +#define INITIALIZED_OWNER_BIT(log2_num_entries) (1UL << (log2_num_entries)) + +struct gdma_eqe { + u32 details[GDMA_EVENT_DATA_SIZE / 4]; + u32 eqe_info; +}; /* HW DATA */ + +#define GDMA_REG_DB_PAGE_OFFSET 8 +#define GDMA_REG_DB_PAGE_SIZE 0x10 +#define GDMA_REG_SHM_OFFSET 0x18 + +struct gdma_posted_wqe_info { + u32 wqe_size_in_bu; +}; + +/* GDMA_GENERATE_TEST_EQE */ +struct gdma_generate_test_event_req { + struct gdma_req_hdr hdr; + u32 queue_index; +}; /* HW DATA */ + +/* GDMA_VERIFY_VF_DRIVER_VERSION */ +enum { + GDMA_PROTOCOL_V1 = 1, + GDMA_PROTOCOL_FIRST = GDMA_PROTOCOL_V1, + GDMA_PROTOCOL_LAST = GDMA_PROTOCOL_V1, +}; + +struct gdma_verify_ver_req { + struct gdma_req_hdr hdr; + + /* Mandatory fields required for protocol establishment */ + u64 protocol_ver_min; + u64 protocol_ver_max; + u64 drv_cap_flags1; + u64 drv_cap_flags2; + u64 drv_cap_flags3; + u64 drv_cap_flags4; + + /* Advisory fields */ + u64 drv_ver; + u32 os_type; /* Linux = 0x10; Windows = 0x20; Other = 0x30 */ + u32 reserved; + u32 os_ver_major; + u32 os_ver_minor; + u32 os_ver_build; + u32 os_ver_platform; + u64 reserved_2; + u8 os_ver_str1[128]; + u8 os_ver_str2[128]; + u8 os_ver_str3[128]; + u8 os_ver_str4[128]; +}; /* HW DATA */ + +struct gdma_verify_ver_resp { + struct gdma_resp_hdr hdr; + u64 gdma_protocol_ver; + u64 pf_cap_flags1; + u64 pf_cap_flags2; + u64 pf_cap_flags3; + u64 pf_cap_flags4; +}; /* HW DATA */ + +/* GDMA_QUERY_MAX_RESOURCES */ +struct gdma_query_max_resources_resp { + struct gdma_resp_hdr hdr; + u32 status; + u32 max_sq; + u32 max_rq; + u32 max_cq; + u32 max_eq; + u32 max_db; + u32 max_mst; + u32 max_cq_mod_ctx; + u32 max_mod_cq; + u32 max_msix; +}; /* HW DATA */ + +/* GDMA_LIST_DEVICES */ +struct gdma_list_devices_resp { + struct gdma_resp_hdr hdr; + u32 num_of_devs; + u32 reserved; + struct gdma_dev_id devs[64]; +}; /* HW DATA */ + +/* GDMA_REGISTER_DEVICE */ +struct gdma_register_device_resp { + struct gdma_resp_hdr hdr; + u32 pdid; + u32 gpa_mkey; + u32 db_id; +}; /* HW DATA */ + +/* GDMA_CREATE_QUEUE */ +struct gdma_create_queue_req { + struct gdma_req_hdr hdr; + u32 type; + u32 reserved1; + u32 pdid; + u32 doolbell_id; + u64 gdma_region; + u32 reserved2; + u32 queue_size; + u32 log2_throttle_limit; + u32 eq_pci_msix_index; + u32 cq_mod_ctx_id; + u32 cq_parent_eq_id; + u8 rq_drop_on_overrun; + u8 rq_err_on_wqe_overflow; + u8 rq_chain_rec_wqes; + u8 sq_hw_db; + u32 reserved3; +}; /* HW DATA */ + +struct gdma_create_queue_resp { + struct gdma_resp_hdr hdr; + u32 queue_index; +}; /* HW DATA */ + +/* GDMA_DISABLE_QUEUE */ +struct gdma_disable_queue_req { + struct gdma_req_hdr hdr; + u32 type; + u32 queue_index; + u32 alloc_res_id_on_creation; +}; /* HW DATA */ + +/* GDMA_CREATE_DMA_REGION */ +struct gdma_create_dma_region_req { + struct gdma_req_hdr hdr; + + /* The total size of the DMA region */ + u64 length; + + /* The offset in the first page */ + u32 offset_in_page; + + /* enum gdma_page_type */ + u32 gdma_page_type; + + /* The total number of pages */ + u32 page_count; + + /* If page_addr_list_len is smaller than page_count, + * the remaining page addresses will be added via the + * message GDMA_DMA_REGION_ADD_PAGES. + */ + u32 page_addr_list_len; + u64 page_addr_list[]; +}; /* HW DATA */ + +struct gdma_create_dma_region_resp { + struct gdma_resp_hdr hdr; + u64 gdma_region; +}; /* HW DATA */ + +/* GDMA_DMA_REGION_ADD_PAGES */ +struct gdma_dma_region_add_pages_req { + struct gdma_req_hdr hdr; + + u64 gdma_region; + + u32 page_addr_list_len; + u32 reserved3; + + u64 page_addr_list[]; +}; /* HW DATA */ + +/* GDMA_DESTROY_DMA_REGION */ +struct gdma_destroy_dma_region_req { + struct gdma_req_hdr hdr; + + u64 gdma_region; +}; /* HW DATA */ + +int mana_gd_verify_vf_version(struct pci_dev *pdev); + +int mana_gd_register_device(struct gdma_dev *gd); +int mana_gd_deregister_device(struct gdma_dev *gd); + +int mana_gd_post_work_request(struct gdma_queue *wq, + const struct gdma_wqe_request *wqe_req, + struct gdma_posted_wqe_info *wqe_info); + +int mana_gd_post_and_ring(struct gdma_queue *queue, + const struct gdma_wqe_request *wqe, + struct gdma_posted_wqe_info *wqe_info); + +int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r); +void mana_gd_free_res_map(struct gdma_resource *r); + +void mana_gd_wq_ring_doorbell(struct gdma_context *gc, + struct gdma_queue *queue); + +int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, + struct gdma_mem_info *gmi); + +void mana_gd_free_memory(struct gdma_mem_info *gmi); + +int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, + u32 resp_len, void *resp); +#endif /* _GDMA_H */ diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c new file mode 100644 index 000000000000..2f87bf90f8ec --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -0,0 +1,1415 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include <linux/module.h> +#include <linux/pci.h> + +#include "mana.h" + +static u32 mana_gd_r32(struct gdma_context *g, u64 offset) +{ + return readl(g->bar0_va + offset); +} + +static u64 mana_gd_r64(struct gdma_context *g, u64 offset) +{ + return readq(g->bar0_va + offset); +} + +static void mana_gd_init_registers(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + + gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF; + + gc->db_page_base = gc->bar0_va + + mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET); + + gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); +} + +static int mana_gd_query_max_resources(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_query_max_resources_resp resp = {}; + struct gdma_general_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES, + sizeof(req), sizeof(resp)); + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to query resource info: %d, 0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + if (gc->num_msix_usable > resp.max_msix) + gc->num_msix_usable = resp.max_msix; + + if (gc->num_msix_usable <= 1) + return -ENOSPC; + + gc->max_num_queues = num_online_cpus(); + if (gc->max_num_queues > MANA_MAX_NUM_QUEUES) + gc->max_num_queues = MANA_MAX_NUM_QUEUES; + + if (gc->max_num_queues > resp.max_eq) + gc->max_num_queues = resp.max_eq; + + if (gc->max_num_queues > resp.max_cq) + gc->max_num_queues = resp.max_cq; + + if (gc->max_num_queues > resp.max_sq) + gc->max_num_queues = resp.max_sq; + + if (gc->max_num_queues > resp.max_rq) + gc->max_num_queues = resp.max_rq; + + return 0; +} + +static int mana_gd_detect_devices(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_list_devices_resp resp = {}; + struct gdma_general_req req = {}; + struct gdma_dev_id dev; + u32 i, max_num_devs; + u16 dev_type; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req), + sizeof(resp)); + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to detect devices: %d, 0x%x\n", err, + resp.hdr.status); + return err ? err : -EPROTO; + } + + max_num_devs = min_t(u32, MAX_NUM_GDMA_DEVICES, resp.num_of_devs); + + for (i = 0; i < max_num_devs; i++) { + dev = resp.devs[i]; + dev_type = dev.type; + + /* HWC is already detected in mana_hwc_create_channel(). */ + if (dev_type == GDMA_DEVICE_HWC) + continue; + + if (dev_type == GDMA_DEVICE_MANA) { + gc->mana.gdma_context = gc; + gc->mana.dev_id = dev; + } + } + + return gc->mana.dev_id.type == 0 ? -ENODEV : 0; +} + +int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, + u32 resp_len, void *resp) +{ + struct hw_channel_context *hwc = gc->hwc.driver_data; + + return mana_hwc_send_request(hwc, req_len, req, resp_len, resp); +} + +int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, + struct gdma_mem_info *gmi) +{ + dma_addr_t dma_handle; + void *buf; + + if (length < PAGE_SIZE || !is_power_of_2(length)) + return -EINVAL; + + gmi->dev = gc->dev; + buf = dma_alloc_coherent(gmi->dev, length, &dma_handle, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + gmi->dma_handle = dma_handle; + gmi->virt_addr = buf; + gmi->length = length; + + return 0; +} + +void mana_gd_free_memory(struct gdma_mem_info *gmi) +{ + dma_free_coherent(gmi->dev, gmi->length, gmi->virt_addr, + gmi->dma_handle); +} + +static int mana_gd_create_hw_eq(struct gdma_context *gc, + struct gdma_queue *queue) +{ + struct gdma_create_queue_resp resp = {}; + struct gdma_create_queue_req req = {}; + int err; + + if (queue->type != GDMA_EQ) + return -EINVAL; + + mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_QUEUE, + sizeof(req), sizeof(resp)); + + req.hdr.dev_id = queue->gdma_dev->dev_id; + req.type = queue->type; + req.pdid = queue->gdma_dev->pdid; + req.doolbell_id = queue->gdma_dev->doorbell; + req.gdma_region = queue->mem_info.gdma_region; + req.queue_size = queue->queue_size; + req.log2_throttle_limit = queue->eq.log2_throttle_limit; + req.eq_pci_msix_index = queue->eq.msix_index; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to create queue: %d, 0x%x\n", err, + resp.hdr.status); + return err ? err : -EPROTO; + } + + queue->id = resp.queue_index; + queue->eq.disable_needed = true; + queue->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; + return 0; +} + +static int mana_gd_disable_queue(struct gdma_queue *queue) +{ + struct gdma_context *gc = queue->gdma_dev->gdma_context; + struct gdma_disable_queue_req req = {}; + struct gdma_general_resp resp = {}; + int err; + + WARN_ON(queue->type != GDMA_EQ); + + mana_gd_init_req_hdr(&req.hdr, GDMA_DISABLE_QUEUE, + sizeof(req), sizeof(resp)); + + req.hdr.dev_id = queue->gdma_dev->dev_id; + req.type = queue->type; + req.queue_index = queue->id; + req.alloc_res_id_on_creation = 1; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, + resp.hdr.status); + return err ? err : -EPROTO; + } + + return 0; +} + +#define DOORBELL_OFFSET_SQ 0x0 +#define DOORBELL_OFFSET_RQ 0x400 +#define DOORBELL_OFFSET_CQ 0x800 +#define DOORBELL_OFFSET_EQ 0xFF8 + +static void mana_gd_ring_doorbell(struct gdma_context *gc, u32 db_index, + enum gdma_queue_type q_type, u32 qid, + u32 tail_ptr, u8 num_req) +{ + void __iomem *addr = gc->db_page_base + gc->db_page_size * db_index; + union gdma_doorbell_entry e = {}; + + switch (q_type) { + case GDMA_EQ: + e.eq.id = qid; + e.eq.tail_ptr = tail_ptr; + e.eq.arm = num_req; + + addr += DOORBELL_OFFSET_EQ; + break; + + case GDMA_CQ: + e.cq.id = qid; + e.cq.tail_ptr = tail_ptr; + e.cq.arm = num_req; + + addr += DOORBELL_OFFSET_CQ; + break; + + case GDMA_RQ: + e.rq.id = qid; + e.rq.tail_ptr = tail_ptr; + e.rq.wqe_cnt = num_req; + + addr += DOORBELL_OFFSET_RQ; + break; + + case GDMA_SQ: + e.sq.id = qid; + e.sq.tail_ptr = tail_ptr; + + addr += DOORBELL_OFFSET_SQ; + break; + + default: + WARN_ON(1); + return; + } + + /* Ensure all writes are done before ring doorbell */ + wmb(); + + writeq(e.as_uint64, addr); +} + +void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue) +{ + mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type, + queue->id, queue->head * GDMA_WQE_BU_SIZE, 1); +} + +void mana_gd_arm_cq(struct gdma_queue *cq) +{ + struct gdma_context *gc = cq->gdma_dev->gdma_context; + + u32 num_cqe = cq->queue_size / GDMA_CQE_SIZE; + + u32 head = cq->head % (num_cqe << GDMA_CQE_OWNER_BITS); + + mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id, + head, SET_ARM_BIT); +} + +static void mana_gd_process_eqe(struct gdma_queue *eq) +{ + u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE); + struct gdma_context *gc = eq->gdma_dev->gdma_context; + struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr; + union gdma_eqe_info eqe_info; + enum gdma_eqe_type type; + struct gdma_event event; + struct gdma_queue *cq; + struct gdma_eqe *eqe; + u32 cq_id; + + eqe = &eq_eqe_ptr[head]; + eqe_info.as_uint32 = eqe->eqe_info; + type = eqe_info.type; + + switch (type) { + case GDMA_EQE_COMPLETION: + cq_id = eqe->details[0] & 0xFFFFFF; + if (WARN_ON_ONCE(cq_id >= gc->max_num_cqs)) + break; + + cq = gc->cq_table[cq_id]; + if (WARN_ON_ONCE(!cq || cq->type != GDMA_CQ || cq->id != cq_id)) + break; + + if (cq->cq.callback) + cq->cq.callback(cq->cq.context, cq); + + break; + + case GDMA_EQE_TEST_EVENT: + gc->test_event_eq_id = eq->id; + complete(&gc->eq_test_event); + break; + + case GDMA_EQE_HWC_INIT_EQ_ID_DB: + case GDMA_EQE_HWC_INIT_DATA: + case GDMA_EQE_HWC_INIT_DONE: + if (!eq->eq.callback) + break; + + event.type = type; + memcpy(&event.details, &eqe->details, GDMA_EVENT_DATA_SIZE); + eq->eq.callback(eq->eq.context, eq, &event); + break; + + default: + break; + } +} + +static void mana_gd_process_eq_events(void *arg) +{ + u32 owner_bits, new_bits, old_bits; + union gdma_eqe_info eqe_info; + struct gdma_eqe *eq_eqe_ptr; + struct gdma_queue *eq = arg; + struct gdma_context *gc; + struct gdma_eqe *eqe; + unsigned int arm_bit; + u32 head, num_eqe; + int i; + + gc = eq->gdma_dev->gdma_context; + + num_eqe = eq->queue_size / GDMA_EQE_SIZE; + eq_eqe_ptr = eq->queue_mem_ptr; + + /* Process up to 5 EQEs at a time, and update the HW head. */ + for (i = 0; i < 5; i++) { + eqe = &eq_eqe_ptr[eq->head % num_eqe]; + eqe_info.as_uint32 = eqe->eqe_info; + owner_bits = eqe_info.owner_bits; + + old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK; + /* No more entries */ + if (owner_bits == old_bits) + break; + + new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK; + if (owner_bits != new_bits) { + dev_err(gc->dev, "EQ %d: overflow detected\n", eq->id); + break; + } + + mana_gd_process_eqe(eq); + + eq->head++; + } + + /* Always rearm the EQ for HWC. For MANA, rearm it when NAPI is done. */ + if (mana_gd_is_hwc(eq->gdma_dev)) { + arm_bit = SET_ARM_BIT; + } else if (eq->eq.work_done < eq->eq.budget && + napi_complete_done(&eq->eq.napi, eq->eq.work_done)) { + arm_bit = SET_ARM_BIT; + } else { + arm_bit = 0; + } + + head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS); + + mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id, + head, arm_bit); +} + +static int mana_poll(struct napi_struct *napi, int budget) +{ + struct gdma_queue *eq = container_of(napi, struct gdma_queue, eq.napi); + + eq->eq.work_done = 0; + eq->eq.budget = budget; + + mana_gd_process_eq_events(eq); + + return min(eq->eq.work_done, budget); +} + +static void mana_gd_schedule_napi(void *arg) +{ + struct gdma_queue *eq = arg; + struct napi_struct *napi; + + napi = &eq->eq.napi; + napi_schedule_irqoff(napi); +} + +static int mana_gd_register_irq(struct gdma_queue *queue, + const struct gdma_queue_spec *spec) +{ + struct gdma_dev *gd = queue->gdma_dev; + bool is_mana = mana_gd_is_mana(gd); + struct gdma_irq_context *gic; + struct gdma_context *gc; + struct gdma_resource *r; + unsigned int msi_index; + unsigned long flags; + int err; + + gc = gd->gdma_context; + r = &gc->msix_resource; + + spin_lock_irqsave(&r->lock, flags); + + msi_index = find_first_zero_bit(r->map, r->size); + if (msi_index >= r->size) { + err = -ENOSPC; + } else { + bitmap_set(r->map, msi_index, 1); + queue->eq.msix_index = msi_index; + err = 0; + } + + spin_unlock_irqrestore(&r->lock, flags); + + if (err) + return err; + + WARN_ON(msi_index >= gc->num_msix_usable); + + gic = &gc->irq_contexts[msi_index]; + + if (is_mana) { + netif_napi_add(spec->eq.ndev, &queue->eq.napi, mana_poll, + NAPI_POLL_WEIGHT); + napi_enable(&queue->eq.napi); + } + + WARN_ON(gic->handler || gic->arg); + + gic->arg = queue; + + if (is_mana) + gic->handler = mana_gd_schedule_napi; + else + gic->handler = mana_gd_process_eq_events; + + return 0; +} + +static void mana_gd_deregiser_irq(struct gdma_queue *queue) +{ + struct gdma_dev *gd = queue->gdma_dev; + struct gdma_irq_context *gic; + struct gdma_context *gc; + struct gdma_resource *r; + unsigned int msix_index; + unsigned long flags; + + gc = gd->gdma_context; + r = &gc->msix_resource; + + /* At most num_online_cpus() + 1 interrupts are used. */ + msix_index = queue->eq.msix_index; + if (WARN_ON(msix_index >= gc->num_msix_usable)) + return; + + gic = &gc->irq_contexts[msix_index]; + gic->handler = NULL; + gic->arg = NULL; + + spin_lock_irqsave(&r->lock, flags); + bitmap_clear(r->map, msix_index, 1); + spin_unlock_irqrestore(&r->lock, flags); + + queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; +} + +int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq) +{ + struct gdma_generate_test_event_req req = {}; + struct gdma_general_resp resp = {}; + struct device *dev = gc->dev; + int err; + + mutex_lock(&gc->eq_test_event_mutex); + + init_completion(&gc->eq_test_event); + gc->test_event_eq_id = INVALID_QUEUE_ID; + + mana_gd_init_req_hdr(&req.hdr, GDMA_GENERATE_TEST_EQE, + sizeof(req), sizeof(resp)); + + req.hdr.dev_id = eq->gdma_dev->dev_id; + req.queue_index = eq->id; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + dev_err(dev, "test_eq failed: %d\n", err); + goto out; + } + + err = -EPROTO; + + if (resp.hdr.status) { + dev_err(dev, "test_eq failed: 0x%x\n", resp.hdr.status); + goto out; + } + + if (!wait_for_completion_timeout(&gc->eq_test_event, 30 * HZ)) { + dev_err(dev, "test_eq timed out on queue %d\n", eq->id); + goto out; + } + + if (eq->id != gc->test_event_eq_id) { + dev_err(dev, "test_eq got an event on wrong queue %d (%d)\n", + gc->test_event_eq_id, eq->id); + goto out; + } + + err = 0; +out: + mutex_unlock(&gc->eq_test_event_mutex); + return err; +} + +static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets, + struct gdma_queue *queue) +{ + int err; + + if (flush_evenets) { + err = mana_gd_test_eq(gc, queue); + if (err) + dev_warn(gc->dev, "Failed to flush EQ: %d\n", err); + } + + mana_gd_deregiser_irq(queue); + + if (mana_gd_is_mana(queue->gdma_dev)) { + napi_disable(&queue->eq.napi); + netif_napi_del(&queue->eq.napi); + } + + if (queue->eq.disable_needed) + mana_gd_disable_queue(queue); +} + +static int mana_gd_create_eq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + bool create_hwq, struct gdma_queue *queue) +{ + struct gdma_context *gc = gd->gdma_context; + struct device *dev = gc->dev; + u32 log2_num_entries; + int err; + + queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; + + log2_num_entries = ilog2(queue->queue_size / GDMA_EQE_SIZE); + + if (spec->eq.log2_throttle_limit > log2_num_entries) { + dev_err(dev, "EQ throttling limit (%lu) > maximum EQE (%u)\n", + spec->eq.log2_throttle_limit, log2_num_entries); + return -EINVAL; + } + + err = mana_gd_register_irq(queue, spec); + if (err) { + dev_err(dev, "Failed to register irq: %d\n", err); + return err; + } + + queue->eq.callback = spec->eq.callback; + queue->eq.context = spec->eq.context; + queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries); + queue->eq.log2_throttle_limit = spec->eq.log2_throttle_limit ?: 1; + + if (create_hwq) { + err = mana_gd_create_hw_eq(gc, queue); + if (err) + goto out; + + err = mana_gd_test_eq(gc, queue); + if (err) + goto out; + } + + return 0; +out: + dev_err(dev, "Failed to create EQ: %d\n", err); + mana_gd_destroy_eq(gc, false, queue); + return err; +} + +static void mana_gd_create_cq(const struct gdma_queue_spec *spec, + struct gdma_queue *queue) +{ + u32 log2_num_entries = ilog2(spec->queue_size / GDMA_CQE_SIZE); + + queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries); + queue->cq.parent = spec->cq.parent_eq; + queue->cq.context = spec->cq.context; + queue->cq.callback = spec->cq.callback; +} + +static void mana_gd_destroy_cq(struct gdma_context *gc, + struct gdma_queue *queue) +{ + u32 id = queue->id; + + if (id >= gc->max_num_cqs) + return; + + if (!gc->cq_table[id]) + return; + + gc->cq_table[id] = NULL; +} + +int mana_gd_create_hwc_queue(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_mem_info *gmi; + struct gdma_queue *queue; + int err; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -ENOMEM; + + gmi = &queue->mem_info; + err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); + if (err) + goto free_q; + + queue->head = 0; + queue->tail = 0; + queue->queue_mem_ptr = gmi->virt_addr; + queue->queue_size = spec->queue_size; + queue->monitor_avl_buf = spec->monitor_avl_buf; + queue->type = spec->type; + queue->gdma_dev = gd; + + if (spec->type == GDMA_EQ) + err = mana_gd_create_eq(gd, spec, false, queue); + else if (spec->type == GDMA_CQ) + mana_gd_create_cq(spec, queue); + + if (err) + goto out; + + *queue_ptr = queue; + return 0; +out: + mana_gd_free_memory(gmi); +free_q: + kfree(queue); + return err; +} + +static void mana_gd_destroy_dma_region(struct gdma_context *gc, u64 gdma_region) +{ + struct gdma_destroy_dma_region_req req = {}; + struct gdma_general_resp resp = {}; + int err; + + if (gdma_region == GDMA_INVALID_DMA_REGION) + return; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_DMA_REGION, sizeof(req), + sizeof(resp)); + req.gdma_region = gdma_region; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) + dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", + err, resp.hdr.status); +} + +static int mana_gd_create_dma_region(struct gdma_dev *gd, + struct gdma_mem_info *gmi) +{ + unsigned int num_page = gmi->length / PAGE_SIZE; + struct gdma_create_dma_region_req *req = NULL; + struct gdma_create_dma_region_resp resp = {}; + struct gdma_context *gc = gd->gdma_context; + struct hw_channel_context *hwc; + u32 length = gmi->length; + u32 req_msg_size; + int err; + int i; + + if (length < PAGE_SIZE || !is_power_of_2(length)) + return -EINVAL; + + if (offset_in_page(gmi->virt_addr) != 0) + return -EINVAL; + + hwc = gc->hwc.driver_data; + req_msg_size = sizeof(*req) + num_page * sizeof(u64); + if (req_msg_size > hwc->max_req_msg_size) + return -EINVAL; + + req = kzalloc(req_msg_size, GFP_KERNEL); + if (!req) + return -ENOMEM; + + mana_gd_init_req_hdr(&req->hdr, GDMA_CREATE_DMA_REGION, + req_msg_size, sizeof(resp)); + req->length = length; + req->offset_in_page = 0; + req->gdma_page_type = GDMA_PAGE_TYPE_4K; + req->page_count = num_page; + req->page_addr_list_len = num_page; + + for (i = 0; i < num_page; i++) + req->page_addr_list[i] = gmi->dma_handle + i * PAGE_SIZE; + + err = mana_gd_send_request(gc, req_msg_size, req, sizeof(resp), &resp); + if (err) + goto out; + + if (resp.hdr.status || resp.gdma_region == GDMA_INVALID_DMA_REGION) { + dev_err(gc->dev, "Failed to create DMA region: 0x%x\n", + resp.hdr.status); + err = -EPROTO; + goto out; + } + + gmi->gdma_region = resp.gdma_region; +out: + kfree(req); + return err; +} + +int mana_gd_create_mana_eq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_mem_info *gmi; + struct gdma_queue *queue; + int err; + + if (spec->type != GDMA_EQ) + return -EINVAL; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -ENOMEM; + + gmi = &queue->mem_info; + err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); + if (err) + goto free_q; + + err = mana_gd_create_dma_region(gd, gmi); + if (err) + goto out; + + queue->head = 0; + queue->tail = 0; + queue->queue_mem_ptr = gmi->virt_addr; + queue->queue_size = spec->queue_size; + queue->monitor_avl_buf = spec->monitor_avl_buf; + queue->type = spec->type; + queue->gdma_dev = gd; + + err = mana_gd_create_eq(gd, spec, true, queue); + if (err) + goto out; + + *queue_ptr = queue; + return 0; +out: + mana_gd_free_memory(gmi); +free_q: + kfree(queue); + return err; +} + +int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_mem_info *gmi; + struct gdma_queue *queue; + int err; + + if (spec->type != GDMA_CQ && spec->type != GDMA_SQ && + spec->type != GDMA_RQ) + return -EINVAL; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -ENOMEM; + + gmi = &queue->mem_info; + err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); + if (err) + goto free_q; + + err = mana_gd_create_dma_region(gd, gmi); + if (err) + goto out; + + queue->head = 0; + queue->tail = 0; + queue->queue_mem_ptr = gmi->virt_addr; + queue->queue_size = spec->queue_size; + queue->monitor_avl_buf = spec->monitor_avl_buf; + queue->type = spec->type; + queue->gdma_dev = gd; + + if (spec->type == GDMA_CQ) + mana_gd_create_cq(spec, queue); + + *queue_ptr = queue; + return 0; +out: + mana_gd_free_memory(gmi); +free_q: + kfree(queue); + return err; +} + +void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue) +{ + struct gdma_mem_info *gmi = &queue->mem_info; + + switch (queue->type) { + case GDMA_EQ: + mana_gd_destroy_eq(gc, queue->eq.disable_needed, queue); + break; + + case GDMA_CQ: + mana_gd_destroy_cq(gc, queue); + break; + + case GDMA_RQ: + break; + + case GDMA_SQ: + break; + + default: + dev_err(gc->dev, "Can't destroy unknown queue: type=%d\n", + queue->type); + return; + } + + mana_gd_destroy_dma_region(gc, gmi->gdma_region); + mana_gd_free_memory(gmi); + kfree(queue); +} + +int mana_gd_verify_vf_version(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_verify_ver_resp resp = {}; + struct gdma_verify_ver_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_VERIFY_VF_DRIVER_VERSION, + sizeof(req), sizeof(resp)); + + req.protocol_ver_min = GDMA_PROTOCOL_FIRST; + req.protocol_ver_max = GDMA_PROTOCOL_LAST; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "VfVerifyVersionOutput: %d, status=0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + return 0; +} + +int mana_gd_register_device(struct gdma_dev *gd) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_register_device_resp resp = {}; + struct gdma_general_req req = {}; + int err; + + gd->pdid = INVALID_PDID; + gd->doorbell = INVALID_DOORBELL; + gd->gpa_mkey = INVALID_MEM_KEY; + + mana_gd_init_req_hdr(&req.hdr, GDMA_REGISTER_DEVICE, sizeof(req), + sizeof(resp)); + + req.hdr.dev_id = gd->dev_id; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "gdma_register_device_resp failed: %d, 0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + gd->pdid = resp.pdid; + gd->gpa_mkey = resp.gpa_mkey; + gd->doorbell = resp.db_id; + + return 0; +} + +int mana_gd_deregister_device(struct gdma_dev *gd) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_general_resp resp = {}; + struct gdma_general_req req = {}; + int err; + + if (gd->pdid == INVALID_PDID) + return -EINVAL; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DEREGISTER_DEVICE, sizeof(req), + sizeof(resp)); + + req.hdr.dev_id = gd->dev_id; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n", + err, resp.hdr.status); + if (!err) + err = -EPROTO; + } + + gd->pdid = INVALID_PDID; + gd->doorbell = INVALID_DOORBELL; + gd->gpa_mkey = INVALID_MEM_KEY; + + return err; +} + +u32 mana_gd_wq_avail_space(struct gdma_queue *wq) +{ + u32 used_space = (wq->head - wq->tail) * GDMA_WQE_BU_SIZE; + u32 wq_size = wq->queue_size; + + WARN_ON_ONCE(used_space > wq_size); + + return wq_size - used_space; +} + +u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset) +{ + u32 offset = (wqe_offset * GDMA_WQE_BU_SIZE) & (wq->queue_size - 1); + + WARN_ON_ONCE((offset + GDMA_WQE_BU_SIZE) > wq->queue_size); + + return wq->queue_mem_ptr + offset; +} + +static u32 mana_gd_write_client_oob(const struct gdma_wqe_request *wqe_req, + enum gdma_queue_type q_type, + u32 client_oob_size, u32 sgl_data_size, + u8 *wqe_ptr) +{ + bool oob_in_sgl = !!(wqe_req->flags & GDMA_WR_OOB_IN_SGL); + bool pad_data = !!(wqe_req->flags & GDMA_WR_PAD_BY_SGE0); + struct gdma_wqe *header = (struct gdma_wqe *)wqe_ptr; + u8 *ptr; + + memset(header, 0, sizeof(struct gdma_wqe)); + header->num_sge = wqe_req->num_sge; + header->inline_oob_size_div4 = client_oob_size / sizeof(u32); + + if (oob_in_sgl) { + WARN_ON_ONCE(!pad_data || wqe_req->num_sge < 2); + + header->client_oob_in_sgl = 1; + + if (pad_data) + header->last_vbytes = wqe_req->sgl[0].size; + } + + if (q_type == GDMA_SQ) + header->client_data_unit = wqe_req->client_data_unit; + + /* The size of gdma_wqe + client_oob_size must be less than or equal + * to one Basic Unit (i.e. 32 bytes), so the pointer can't go beyond + * the queue memory buffer boundary. + */ + ptr = wqe_ptr + sizeof(header); + + if (wqe_req->inline_oob_data && wqe_req->inline_oob_size > 0) { + memcpy(ptr, wqe_req->inline_oob_data, wqe_req->inline_oob_size); + + if (client_oob_size > wqe_req->inline_oob_size) + memset(ptr + wqe_req->inline_oob_size, 0, + client_oob_size - wqe_req->inline_oob_size); + } + + return sizeof(header) + client_oob_size; +} + +static void mana_gd_write_sgl(struct gdma_queue *wq, u8 *wqe_ptr, + const struct gdma_wqe_request *wqe_req) +{ + u32 sgl_size = sizeof(struct gdma_sge) * wqe_req->num_sge; + const u8 *address = (u8 *)wqe_req->sgl; + u8 *base_ptr, *end_ptr; + u32 size_to_end; + + base_ptr = wq->queue_mem_ptr; + end_ptr = base_ptr + wq->queue_size; + size_to_end = (u32)(end_ptr - wqe_ptr); + + if (size_to_end < sgl_size) { + memcpy(wqe_ptr, address, size_to_end); + + wqe_ptr = base_ptr; + address += size_to_end; + sgl_size -= size_to_end; + } + + memcpy(wqe_ptr, address, sgl_size); +} + +int mana_gd_post_work_request(struct gdma_queue *wq, + const struct gdma_wqe_request *wqe_req, + struct gdma_posted_wqe_info *wqe_info) +{ + u32 client_oob_size = wqe_req->inline_oob_size; + struct gdma_context *gc; + u32 sgl_data_size; + u32 max_wqe_size; + u32 wqe_size; + u8 *wqe_ptr; + + if (wqe_req->num_sge == 0) + return -EINVAL; + + if (wq->type == GDMA_RQ) { + if (client_oob_size != 0) + return -EINVAL; + + client_oob_size = INLINE_OOB_SMALL_SIZE; + + max_wqe_size = GDMA_MAX_RQE_SIZE; + } else { + if (client_oob_size != INLINE_OOB_SMALL_SIZE && + client_oob_size != INLINE_OOB_LARGE_SIZE) + return -EINVAL; + + max_wqe_size = GDMA_MAX_SQE_SIZE; + } + + sgl_data_size = sizeof(struct gdma_sge) * wqe_req->num_sge; + wqe_size = ALIGN(sizeof(struct gdma_wqe) + client_oob_size + + sgl_data_size, GDMA_WQE_BU_SIZE); + if (wqe_size > max_wqe_size) + return -EINVAL; + + if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq)) { + gc = wq->gdma_dev->gdma_context; + dev_err(gc->dev, "unsuccessful flow control!\n"); + return -ENOSPC; + } + + if (wqe_info) + wqe_info->wqe_size_in_bu = wqe_size / GDMA_WQE_BU_SIZE; + + wqe_ptr = mana_gd_get_wqe_ptr(wq, wq->head); + wqe_ptr += mana_gd_write_client_oob(wqe_req, wq->type, client_oob_size, + sgl_data_size, wqe_ptr); + if (wqe_ptr >= (u8 *)wq->queue_mem_ptr + wq->queue_size) + wqe_ptr -= wq->queue_size; + + mana_gd_write_sgl(wq, wqe_ptr, wqe_req); + + wq->head += wqe_size / GDMA_WQE_BU_SIZE; + + return 0; +} + +int mana_gd_post_and_ring(struct gdma_queue *queue, + const struct gdma_wqe_request *wqe_req, + struct gdma_posted_wqe_info *wqe_info) +{ + struct gdma_context *gc = queue->gdma_dev->gdma_context; + int err; + + err = mana_gd_post_work_request(queue, wqe_req, wqe_info); + if (err) + return err; + + mana_gd_wq_ring_doorbell(gc, queue); + + return 0; +} + +static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp) +{ + unsigned int num_cqe = cq->queue_size / sizeof(struct gdma_cqe); + struct gdma_cqe *cq_cqe = cq->queue_mem_ptr; + u32 owner_bits, new_bits, old_bits; + struct gdma_cqe *cqe; + + cqe = &cq_cqe[cq->head % num_cqe]; + owner_bits = cqe->cqe_info.owner_bits; + + old_bits = (cq->head / num_cqe - 1) & GDMA_CQE_OWNER_MASK; + /* Return 0 if no more entries. */ + if (owner_bits == old_bits) + return 0; + + new_bits = (cq->head / num_cqe) & GDMA_CQE_OWNER_MASK; + /* Return -1 if overflow detected. */ + if (owner_bits != new_bits) + return -1; + + comp->wq_num = cqe->cqe_info.wq_num; + comp->is_sq = cqe->cqe_info.is_sq; + memcpy(comp->cqe_data, cqe->cqe_data, GDMA_COMP_DATA_SIZE); + + return 1; +} + +int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe) +{ + int cqe_idx; + int ret; + + for (cqe_idx = 0; cqe_idx < num_cqe; cqe_idx++) { + ret = mana_gd_read_cqe(cq, &comp[cqe_idx]); + + if (ret < 0) { + cq->head -= cqe_idx; + return ret; + } + + if (ret == 0) + break; + + cq->head++; + } + + return cqe_idx; +} + +static irqreturn_t mana_gd_intr(int irq, void *arg) +{ + struct gdma_irq_context *gic = arg; + + if (gic->handler) + gic->handler(gic->arg); + + return IRQ_HANDLED; +} + +int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r) +{ + r->map = bitmap_zalloc(res_avail, GFP_KERNEL); + if (!r->map) + return -ENOMEM; + + r->size = res_avail; + spin_lock_init(&r->lock); + + return 0; +} + +void mana_gd_free_res_map(struct gdma_resource *r) +{ + bitmap_free(r->map); + r->map = NULL; + r->size = 0; +} + +static int mana_gd_setup_irqs(struct pci_dev *pdev) +{ + unsigned int max_queues_per_port = num_online_cpus(); + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_irq_context *gic; + unsigned int max_irqs; + int nvec, irq; + int err, i, j; + + if (max_queues_per_port > MANA_MAX_NUM_QUEUES) + max_queues_per_port = MANA_MAX_NUM_QUEUES; + + max_irqs = max_queues_per_port * MAX_PORTS_IN_MANA_DEV; + + /* Need 1 interrupt for the Hardware communication Channel (HWC) */ + max_irqs++; + + nvec = pci_alloc_irq_vectors(pdev, 2, max_irqs, PCI_IRQ_MSIX); + if (nvec < 0) + return nvec; + + gc->irq_contexts = kcalloc(nvec, sizeof(struct gdma_irq_context), + GFP_KERNEL); + if (!gc->irq_contexts) { + err = -ENOMEM; + goto free_irq_vector; + } + + for (i = 0; i < nvec; i++) { + gic = &gc->irq_contexts[i]; + gic->handler = NULL; + gic->arg = NULL; + + irq = pci_irq_vector(pdev, i); + if (irq < 0) { + err = irq; + goto free_irq; + } + + err = request_irq(irq, mana_gd_intr, 0, "mana_intr", gic); + if (err) + goto free_irq; + } + + err = mana_gd_alloc_res_map(nvec, &gc->msix_resource); + if (err) + goto free_irq; + + gc->max_num_msix = nvec; + gc->num_msix_usable = nvec; + + return 0; + +free_irq: + for (j = i - 1; j >= 0; j--) { + irq = pci_irq_vector(pdev, j); + gic = &gc->irq_contexts[j]; + free_irq(irq, gic); + } + + kfree(gc->irq_contexts); + gc->irq_contexts = NULL; +free_irq_vector: + pci_free_irq_vectors(pdev); + return err; +} + +static void mana_gd_remove_irqs(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_irq_context *gic; + int irq, i; + + if (gc->max_num_msix < 1) + return; + + mana_gd_free_res_map(&gc->msix_resource); + + for (i = 0; i < gc->max_num_msix; i++) { + irq = pci_irq_vector(pdev, i); + if (irq < 0) + continue; + + gic = &gc->irq_contexts[i]; + free_irq(irq, gic); + } + + pci_free_irq_vectors(pdev); + + gc->max_num_msix = 0; + gc->num_msix_usable = 0; + kfree(gc->irq_contexts); + gc->irq_contexts = NULL; +} + +static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct gdma_context *gc; + void __iomem *bar0_va; + int bar = 0; + int err; + + err = pci_enable_device(pdev); + if (err) + return -ENXIO; + + pci_set_master(pdev); + + err = pci_request_regions(pdev, "mana"); + if (err) + goto disable_dev; + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) + goto release_region; + + err = -ENOMEM; + gc = vzalloc(sizeof(*gc)); + if (!gc) + goto release_region; + + bar0_va = pci_iomap(pdev, bar, 0); + if (!bar0_va) + goto free_gc; + + gc->bar0_va = bar0_va; + gc->dev = &pdev->dev; + + pci_set_drvdata(pdev, gc); + + mana_gd_init_registers(pdev); + + mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base); + + err = mana_gd_setup_irqs(pdev); + if (err) + goto unmap_bar; + + mutex_init(&gc->eq_test_event_mutex); + + err = mana_hwc_create_channel(gc); + if (err) + goto remove_irq; + + err = mana_gd_verify_vf_version(pdev); + if (err) + goto remove_irq; + + err = mana_gd_query_max_resources(pdev); + if (err) + goto remove_irq; + + err = mana_gd_detect_devices(pdev); + if (err) + goto remove_irq; + + err = mana_probe(&gc->mana); + if (err) + goto clean_up_gdma; + + return 0; + +clean_up_gdma: + mana_hwc_destroy_channel(gc); + vfree(gc->cq_table); + gc->cq_table = NULL; +remove_irq: + mana_gd_remove_irqs(pdev); +unmap_bar: + pci_iounmap(pdev, bar0_va); +free_gc: + vfree(gc); +release_region: + pci_release_regions(pdev); +disable_dev: + pci_clear_master(pdev); + pci_disable_device(pdev); + dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err); + return err; +} + +static void mana_gd_remove(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + + mana_remove(&gc->mana); + + mana_hwc_destroy_channel(gc); + vfree(gc->cq_table); + gc->cq_table = NULL; + + mana_gd_remove_irqs(pdev); + + pci_iounmap(pdev, gc->bar0_va); + + vfree(gc); + + pci_release_regions(pdev); + pci_clear_master(pdev); + pci_disable_device(pdev); +} + +#ifndef PCI_VENDOR_ID_MICROSOFT +#define PCI_VENDOR_ID_MICROSOFT 0x1414 +#endif + +static const struct pci_device_id mana_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, 0x00BA) }, + { } +}; + +static struct pci_driver mana_driver = { + .name = "mana", + .id_table = mana_id_table, + .probe = mana_gd_probe, + .remove = mana_gd_remove, +}; + +module_pci_driver(mana_driver); + +MODULE_DEVICE_TABLE(pci, mana_id_table); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Microsoft Azure Network Adapter driver"); diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c new file mode 100644 index 000000000000..1a923fd99990 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -0,0 +1,843 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include "gdma.h" +#include "hw_channel.h" + +static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id) +{ + struct gdma_resource *r = &hwc->inflight_msg_res; + unsigned long flags; + u32 index; + + down(&hwc->sema); + + spin_lock_irqsave(&r->lock, flags); + + index = find_first_zero_bit(hwc->inflight_msg_res.map, + hwc->inflight_msg_res.size); + + bitmap_set(hwc->inflight_msg_res.map, index, 1); + + spin_unlock_irqrestore(&r->lock, flags); + + *msg_id = index; + + return 0; +} + +static void mana_hwc_put_msg_index(struct hw_channel_context *hwc, u16 msg_id) +{ + struct gdma_resource *r = &hwc->inflight_msg_res; + unsigned long flags; + + spin_lock_irqsave(&r->lock, flags); + bitmap_clear(hwc->inflight_msg_res.map, msg_id, 1); + spin_unlock_irqrestore(&r->lock, flags); + + up(&hwc->sema); +} + +static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx, + const struct gdma_resp_hdr *resp_msg, + u32 resp_len) +{ + if (resp_len < sizeof(*resp_msg)) + return -EPROTO; + + if (resp_len > caller_ctx->output_buflen) + return -EPROTO; + + return 0; +} + +static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, + const struct gdma_resp_hdr *resp_msg) +{ + struct hwc_caller_ctx *ctx; + int err; + + if (!test_bit(resp_msg->response.hwc_msg_id, + hwc->inflight_msg_res.map)) { + dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", + resp_msg->response.hwc_msg_id); + return; + } + + ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id; + err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len); + if (err) + goto out; + + ctx->status_code = resp_msg->status; + + memcpy(ctx->output_buf, resp_msg, resp_len); +out: + ctx->error = err; + complete(&ctx->comp_event); +} + +static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, + struct hwc_work_request *req) +{ + struct device *dev = hwc_rxq->hwc->dev; + struct gdma_sge *sge; + int err; + + sge = &req->sge; + sge->address = (u64)req->buf_sge_addr; + sge->mem_key = hwc_rxq->msg_buf->gpa_mkey; + sge->size = req->buf_len; + + memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); + req->wqe_req.sgl = sge; + req->wqe_req.num_sge = 1; + req->wqe_req.client_data_unit = 0; + + err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL); + if (err) + dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err); + return err; +} + +static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, + struct gdma_event *event) +{ + struct hw_channel_context *hwc = ctx; + struct gdma_dev *gd = hwc->gdma_dev; + union hwc_init_type_data type_data; + union hwc_init_eq_id_db eq_db; + u32 type, val; + + switch (event->type) { + case GDMA_EQE_HWC_INIT_EQ_ID_DB: + eq_db.as_uint32 = event->details[0]; + hwc->cq->gdma_eq->id = eq_db.eq_id; + gd->doorbell = eq_db.doorbell; + break; + + case GDMA_EQE_HWC_INIT_DATA: + type_data.as_uint32 = event->details[0]; + type = type_data.type; + val = type_data.value; + + switch (type) { + case HWC_INIT_DATA_CQID: + hwc->cq->gdma_cq->id = val; + break; + + case HWC_INIT_DATA_RQID: + hwc->rxq->gdma_wq->id = val; + break; + + case HWC_INIT_DATA_SQID: + hwc->txq->gdma_wq->id = val; + break; + + case HWC_INIT_DATA_QUEUE_DEPTH: + hwc->hwc_init_q_depth_max = (u16)val; + break; + + case HWC_INIT_DATA_MAX_REQUEST: + hwc->hwc_init_max_req_msg_size = val; + break; + + case HWC_INIT_DATA_MAX_RESPONSE: + hwc->hwc_init_max_resp_msg_size = val; + break; + + case HWC_INIT_DATA_MAX_NUM_CQS: + gd->gdma_context->max_num_cqs = val; + break; + + case HWC_INIT_DATA_PDID: + hwc->gdma_dev->pdid = val; + break; + + case HWC_INIT_DATA_GPA_MKEY: + hwc->rxq->msg_buf->gpa_mkey = val; + hwc->txq->msg_buf->gpa_mkey = val; + break; + } + + break; + + case GDMA_EQE_HWC_INIT_DONE: + complete(&hwc->hwc_init_eqe_comp); + break; + + default: + /* Ignore unknown events, which should never happen. */ + break; + } +} + +static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, + const struct hwc_rx_oob *rx_oob) +{ + struct hw_channel_context *hwc = ctx; + struct hwc_wq *hwc_rxq = hwc->rxq; + struct hwc_work_request *rx_req; + struct gdma_resp_hdr *resp; + struct gdma_wqe *dma_oob; + struct gdma_queue *rq; + struct gdma_sge *sge; + u64 rq_base_addr; + u64 rx_req_idx; + u8 *wqe; + + if (WARN_ON_ONCE(hwc_rxq->gdma_wq->id != gdma_rxq_id)) + return; + + rq = hwc_rxq->gdma_wq; + wqe = mana_gd_get_wqe_ptr(rq, rx_oob->wqe_offset / GDMA_WQE_BU_SIZE); + dma_oob = (struct gdma_wqe *)wqe; + + sge = (struct gdma_sge *)(wqe + 8 + dma_oob->inline_oob_size_div4 * 4); + + /* Select the RX work request for virtual address and for reposting. */ + rq_base_addr = hwc_rxq->msg_buf->mem_info.dma_handle; + rx_req_idx = (sge->address - rq_base_addr) / hwc->max_req_msg_size; + + rx_req = &hwc_rxq->msg_buf->reqs[rx_req_idx]; + resp = (struct gdma_resp_hdr *)rx_req->buf_va; + + if (resp->response.hwc_msg_id >= hwc->num_inflight_msg) { + dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n", + resp->response.hwc_msg_id); + return; + } + + mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp); + + /* Do no longer use 'resp', because the buffer is posted to the HW + * in the below mana_hwc_post_rx_wqe(). + */ + resp = NULL; + + mana_hwc_post_rx_wqe(hwc_rxq, rx_req); +} + +static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id, + const struct hwc_rx_oob *rx_oob) +{ + struct hw_channel_context *hwc = ctx; + struct hwc_wq *hwc_txq = hwc->txq; + + WARN_ON_ONCE(!hwc_txq || hwc_txq->gdma_wq->id != gdma_txq_id); +} + +static int mana_hwc_create_gdma_wq(struct hw_channel_context *hwc, + enum gdma_queue_type type, u64 queue_size, + struct gdma_queue **queue) +{ + struct gdma_queue_spec spec = {}; + + if (type != GDMA_SQ && type != GDMA_RQ) + return -EINVAL; + + spec.type = type; + spec.monitor_avl_buf = false; + spec.queue_size = queue_size; + + return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); +} + +static int mana_hwc_create_gdma_cq(struct hw_channel_context *hwc, + u64 queue_size, + void *ctx, gdma_cq_callback *cb, + struct gdma_queue *parent_eq, + struct gdma_queue **queue) +{ + struct gdma_queue_spec spec = {}; + + spec.type = GDMA_CQ; + spec.monitor_avl_buf = false; + spec.queue_size = queue_size; + spec.cq.context = ctx; + spec.cq.callback = cb; + spec.cq.parent_eq = parent_eq; + + return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); +} + +static int mana_hwc_create_gdma_eq(struct hw_channel_context *hwc, + u64 queue_size, + void *ctx, gdma_eq_callback *cb, + struct gdma_queue **queue) +{ + struct gdma_queue_spec spec = {}; + + spec.type = GDMA_EQ; + spec.monitor_avl_buf = false; + spec.queue_size = queue_size; + spec.eq.context = ctx; + spec.eq.callback = cb; + spec.eq.log2_throttle_limit = DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ; + + return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); +} + +static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self) +{ + struct hwc_rx_oob comp_data = {}; + struct gdma_comp *completions; + struct hwc_cq *hwc_cq = ctx; + int comp_read, i; + + WARN_ON_ONCE(hwc_cq->gdma_cq != q_self); + + completions = hwc_cq->comp_buf; + comp_read = mana_gd_poll_cq(q_self, completions, hwc_cq->queue_depth); + WARN_ON_ONCE(comp_read <= 0 || comp_read > hwc_cq->queue_depth); + + for (i = 0; i < comp_read; ++i) { + comp_data = *(struct hwc_rx_oob *)completions[i].cqe_data; + + if (completions[i].is_sq) + hwc_cq->tx_event_handler(hwc_cq->tx_event_ctx, + completions[i].wq_num, + &comp_data); + else + hwc_cq->rx_event_handler(hwc_cq->rx_event_ctx, + completions[i].wq_num, + &comp_data); + } + + mana_gd_arm_cq(q_self); +} + +static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq) +{ + if (!hwc_cq) + return; + + kfree(hwc_cq->comp_buf); + + if (hwc_cq->gdma_cq) + mana_gd_destroy_queue(gc, hwc_cq->gdma_cq); + + if (hwc_cq->gdma_eq) + mana_gd_destroy_queue(gc, hwc_cq->gdma_eq); + + kfree(hwc_cq); +} + +static int mana_hwc_create_cq(struct hw_channel_context *hwc, u16 q_depth, + gdma_eq_callback *callback, void *ctx, + hwc_rx_event_handler_t *rx_ev_hdlr, + void *rx_ev_ctx, + hwc_tx_event_handler_t *tx_ev_hdlr, + void *tx_ev_ctx, struct hwc_cq **hwc_cq_ptr) +{ + struct gdma_queue *eq, *cq; + struct gdma_comp *comp_buf; + struct hwc_cq *hwc_cq; + u32 eq_size, cq_size; + int err; + + eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth); + if (eq_size < MINIMUM_SUPPORTED_PAGE_SIZE) + eq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + + cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth); + if (cq_size < MINIMUM_SUPPORTED_PAGE_SIZE) + cq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + + hwc_cq = kzalloc(sizeof(*hwc_cq), GFP_KERNEL); + if (!hwc_cq) + return -ENOMEM; + + err = mana_hwc_create_gdma_eq(hwc, eq_size, ctx, callback, &eq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC EQ for RQ: %d\n", err); + goto out; + } + hwc_cq->gdma_eq = eq; + + err = mana_hwc_create_gdma_cq(hwc, cq_size, hwc_cq, mana_hwc_comp_event, + eq, &cq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC CQ for RQ: %d\n", err); + goto out; + } + hwc_cq->gdma_cq = cq; + + comp_buf = kcalloc(q_depth, sizeof(struct gdma_comp), GFP_KERNEL); + if (!comp_buf) { + err = -ENOMEM; + goto out; + } + + hwc_cq->hwc = hwc; + hwc_cq->comp_buf = comp_buf; + hwc_cq->queue_depth = q_depth; + hwc_cq->rx_event_handler = rx_ev_hdlr; + hwc_cq->rx_event_ctx = rx_ev_ctx; + hwc_cq->tx_event_handler = tx_ev_hdlr; + hwc_cq->tx_event_ctx = tx_ev_ctx; + + *hwc_cq_ptr = hwc_cq; + return 0; +out: + mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq); + return err; +} + +static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, + u32 max_msg_size, + struct hwc_dma_buf **dma_buf_ptr) +{ + struct gdma_context *gc = hwc->gdma_dev->gdma_context; + struct hwc_work_request *hwc_wr; + struct hwc_dma_buf *dma_buf; + struct gdma_mem_info *gmi; + void *virt_addr; + u32 buf_size; + u8 *base_pa; + int err; + u16 i; + + dma_buf = kzalloc(sizeof(*dma_buf) + + q_depth * sizeof(struct hwc_work_request), + GFP_KERNEL); + if (!dma_buf) + return -ENOMEM; + + dma_buf->num_reqs = q_depth; + + buf_size = PAGE_ALIGN(q_depth * max_msg_size); + + gmi = &dma_buf->mem_info; + err = mana_gd_alloc_memory(gc, buf_size, gmi); + if (err) { + dev_err(hwc->dev, "Failed to allocate DMA buffer: %d\n", err); + goto out; + } + + virt_addr = dma_buf->mem_info.virt_addr; + base_pa = (u8 *)dma_buf->mem_info.dma_handle; + + for (i = 0; i < q_depth; i++) { + hwc_wr = &dma_buf->reqs[i]; + + hwc_wr->buf_va = virt_addr + i * max_msg_size; + hwc_wr->buf_sge_addr = base_pa + i * max_msg_size; + + hwc_wr->buf_len = max_msg_size; + } + + *dma_buf_ptr = dma_buf; + return 0; +out: + kfree(dma_buf); + return err; +} + +static void mana_hwc_dealloc_dma_buf(struct hw_channel_context *hwc, + struct hwc_dma_buf *dma_buf) +{ + if (!dma_buf) + return; + + mana_gd_free_memory(&dma_buf->mem_info); + + kfree(dma_buf); +} + +static void mana_hwc_destroy_wq(struct hw_channel_context *hwc, + struct hwc_wq *hwc_wq) +{ + if (!hwc_wq) + return; + + mana_hwc_dealloc_dma_buf(hwc, hwc_wq->msg_buf); + + if (hwc_wq->gdma_wq) + mana_gd_destroy_queue(hwc->gdma_dev->gdma_context, + hwc_wq->gdma_wq); + + kfree(hwc_wq); +} + +static int mana_hwc_create_wq(struct hw_channel_context *hwc, + enum gdma_queue_type q_type, u16 q_depth, + u32 max_msg_size, struct hwc_cq *hwc_cq, + struct hwc_wq **hwc_wq_ptr) +{ + struct gdma_queue *queue; + struct hwc_wq *hwc_wq; + u32 queue_size; + int err; + + WARN_ON(q_type != GDMA_SQ && q_type != GDMA_RQ); + + if (q_type == GDMA_RQ) + queue_size = roundup_pow_of_two(GDMA_MAX_RQE_SIZE * q_depth); + else + queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth); + + if (queue_size < MINIMUM_SUPPORTED_PAGE_SIZE) + queue_size = MINIMUM_SUPPORTED_PAGE_SIZE; + + hwc_wq = kzalloc(sizeof(*hwc_wq), GFP_KERNEL); + if (!hwc_wq) + return -ENOMEM; + + err = mana_hwc_create_gdma_wq(hwc, q_type, queue_size, &queue); + if (err) + goto out; + + err = mana_hwc_alloc_dma_buf(hwc, q_depth, max_msg_size, + &hwc_wq->msg_buf); + if (err) + goto out; + + hwc_wq->hwc = hwc; + hwc_wq->gdma_wq = queue; + hwc_wq->queue_depth = q_depth; + hwc_wq->hwc_cq = hwc_cq; + + *hwc_wq_ptr = hwc_wq; + return 0; +out: + if (err) + mana_hwc_destroy_wq(hwc, hwc_wq); + return err; +} + +static int mana_hwc_post_tx_wqe(const struct hwc_wq *hwc_txq, + struct hwc_work_request *req, + u32 dest_virt_rq_id, u32 dest_virt_rcq_id, + bool dest_pf) +{ + struct device *dev = hwc_txq->hwc->dev; + struct hwc_tx_oob *tx_oob; + struct gdma_sge *sge; + int err; + + if (req->msg_size == 0 || req->msg_size > req->buf_len) { + dev_err(dev, "wrong msg_size: %u, buf_len: %u\n", + req->msg_size, req->buf_len); + return -EINVAL; + } + + tx_oob = &req->tx_oob; + + tx_oob->vrq_id = dest_virt_rq_id; + tx_oob->dest_vfid = 0; + tx_oob->vrcq_id = dest_virt_rcq_id; + tx_oob->vscq_id = hwc_txq->hwc_cq->gdma_cq->id; + tx_oob->loopback = false; + tx_oob->lso_override = false; + tx_oob->dest_pf = dest_pf; + tx_oob->vsq_id = hwc_txq->gdma_wq->id; + + sge = &req->sge; + sge->address = (u64)req->buf_sge_addr; + sge->mem_key = hwc_txq->msg_buf->gpa_mkey; + sge->size = req->msg_size; + + memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); + req->wqe_req.sgl = sge; + req->wqe_req.num_sge = 1; + req->wqe_req.inline_oob_size = sizeof(struct hwc_tx_oob); + req->wqe_req.inline_oob_data = tx_oob; + req->wqe_req.client_data_unit = 0; + + err = mana_gd_post_and_ring(hwc_txq->gdma_wq, &req->wqe_req, NULL); + if (err) + dev_err(dev, "Failed to post WQE on HWC SQ: %d\n", err); + return err; +} + +static int mana_hwc_init_inflight_msg(struct hw_channel_context *hwc, + u16 num_msg) +{ + int err; + + sema_init(&hwc->sema, num_msg); + + err = mana_gd_alloc_res_map(num_msg, &hwc->inflight_msg_res); + if (err) + dev_err(hwc->dev, "Failed to init inflight_msg_res: %d\n", err); + return err; +} + +static int mana_hwc_test_channel(struct hw_channel_context *hwc, u16 q_depth, + u32 max_req_msg_size, u32 max_resp_msg_size) +{ + struct gdma_context *gc = hwc->gdma_dev->gdma_context; + struct hwc_wq *hwc_rxq = hwc->rxq; + struct hwc_work_request *req; + struct hwc_caller_ctx *ctx; + int err; + int i; + + /* Post all WQEs on the RQ */ + for (i = 0; i < q_depth; i++) { + req = &hwc_rxq->msg_buf->reqs[i]; + err = mana_hwc_post_rx_wqe(hwc_rxq, req); + if (err) + return err; + } + + ctx = kzalloc(q_depth * sizeof(struct hwc_caller_ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + for (i = 0; i < q_depth; ++i) + init_completion(&ctx[i].comp_event); + + hwc->caller_ctx = ctx; + + return mana_gd_test_eq(gc, hwc->cq->gdma_eq); +} + +static int mana_hwc_establish_channel(struct gdma_context *gc, u16 *q_depth, + u32 *max_req_msg_size, + u32 *max_resp_msg_size) +{ + struct hw_channel_context *hwc = gc->hwc.driver_data; + struct gdma_queue *rq = hwc->rxq->gdma_wq; + struct gdma_queue *sq = hwc->txq->gdma_wq; + struct gdma_queue *eq = hwc->cq->gdma_eq; + struct gdma_queue *cq = hwc->cq->gdma_cq; + int err; + + init_completion(&hwc->hwc_init_eqe_comp); + + err = mana_smc_setup_hwc(&gc->shm_channel, false, + eq->mem_info.dma_handle, + cq->mem_info.dma_handle, + rq->mem_info.dma_handle, + sq->mem_info.dma_handle, + eq->eq.msix_index); + if (err) + return err; + + if (!wait_for_completion_timeout(&hwc->hwc_init_eqe_comp, 60 * HZ)) + return -ETIMEDOUT; + + *q_depth = hwc->hwc_init_q_depth_max; + *max_req_msg_size = hwc->hwc_init_max_req_msg_size; + *max_resp_msg_size = hwc->hwc_init_max_resp_msg_size; + + if (WARN_ON(cq->id >= gc->max_num_cqs)) + return -EPROTO; + + gc->cq_table = vzalloc(gc->max_num_cqs * sizeof(struct gdma_queue *)); + if (!gc->cq_table) + return -ENOMEM; + + gc->cq_table[cq->id] = cq; + + return 0; +} + +static int mana_hwc_init_queues(struct hw_channel_context *hwc, u16 q_depth, + u32 max_req_msg_size, u32 max_resp_msg_size) +{ + struct hwc_wq *hwc_rxq = NULL; + struct hwc_wq *hwc_txq = NULL; + struct hwc_cq *hwc_cq = NULL; + int err; + + err = mana_hwc_init_inflight_msg(hwc, q_depth); + if (err) + return err; + + /* CQ is shared by SQ and RQ, so CQ's queue depth is the sum of SQ + * queue depth and RQ queue depth. + */ + err = mana_hwc_create_cq(hwc, q_depth * 2, + mana_hwc_init_event_handler, hwc, + mana_hwc_rx_event_handler, hwc, + mana_hwc_tx_event_handler, hwc, &hwc_cq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC CQ: %d\n", err); + goto out; + } + hwc->cq = hwc_cq; + + err = mana_hwc_create_wq(hwc, GDMA_RQ, q_depth, max_req_msg_size, + hwc_cq, &hwc_rxq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC RQ: %d\n", err); + goto out; + } + hwc->rxq = hwc_rxq; + + err = mana_hwc_create_wq(hwc, GDMA_SQ, q_depth, max_resp_msg_size, + hwc_cq, &hwc_txq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC SQ: %d\n", err); + goto out; + } + hwc->txq = hwc_txq; + + hwc->num_inflight_msg = q_depth; + hwc->max_req_msg_size = max_req_msg_size; + + return 0; +out: + if (hwc_txq) + mana_hwc_destroy_wq(hwc, hwc_txq); + + if (hwc_rxq) + mana_hwc_destroy_wq(hwc, hwc_rxq); + + if (hwc_cq) + mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq); + + mana_gd_free_res_map(&hwc->inflight_msg_res); + return err; +} + +int mana_hwc_create_channel(struct gdma_context *gc) +{ + u32 max_req_msg_size, max_resp_msg_size; + struct gdma_dev *gd = &gc->hwc; + struct hw_channel_context *hwc; + u16 q_depth_max; + int err; + + hwc = kzalloc(sizeof(*hwc), GFP_KERNEL); + if (!hwc) + return -ENOMEM; + + gd->gdma_context = gc; + gd->driver_data = hwc; + hwc->gdma_dev = gd; + hwc->dev = gc->dev; + + /* HWC's instance number is always 0. */ + gd->dev_id.as_uint32 = 0; + gd->dev_id.type = GDMA_DEVICE_HWC; + + gd->pdid = INVALID_PDID; + gd->doorbell = INVALID_DOORBELL; + + err = mana_hwc_init_queues(hwc, HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH, + HW_CHANNEL_MAX_REQUEST_SIZE, + HW_CHANNEL_MAX_RESPONSE_SIZE); + if (err) { + dev_err(hwc->dev, "Failed to initialize HWC: %d\n", err); + goto out; + } + + err = mana_hwc_establish_channel(gc, &q_depth_max, &max_req_msg_size, + &max_resp_msg_size); + if (err) { + dev_err(hwc->dev, "Failed to establish HWC: %d\n", err); + goto out; + } + + err = mana_hwc_test_channel(gc->hwc.driver_data, + HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH, + max_req_msg_size, max_resp_msg_size); + if (err) { + dev_err(hwc->dev, "Failed to test HWC: %d\n", err); + goto out; + } + + return 0; +out: + kfree(hwc); + return err; +} + +void mana_hwc_destroy_channel(struct gdma_context *gc) +{ + struct hw_channel_context *hwc = gc->hwc.driver_data; + struct hwc_caller_ctx *ctx; + + mana_smc_teardown_hwc(&gc->shm_channel, false); + + ctx = hwc->caller_ctx; + kfree(ctx); + hwc->caller_ctx = NULL; + + mana_hwc_destroy_wq(hwc, hwc->txq); + hwc->txq = NULL; + + mana_hwc_destroy_wq(hwc, hwc->rxq); + hwc->rxq = NULL; + + mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq); + hwc->cq = NULL; + + mana_gd_free_res_map(&hwc->inflight_msg_res); + + hwc->num_inflight_msg = 0; + + if (hwc->gdma_dev->pdid != INVALID_PDID) { + hwc->gdma_dev->doorbell = INVALID_DOORBELL; + hwc->gdma_dev->pdid = INVALID_PDID; + } + + kfree(hwc); + gc->hwc.driver_data = NULL; + gc->hwc.gdma_context = NULL; +} + +int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, + const void *req, u32 resp_len, void *resp) +{ + struct hwc_work_request *tx_wr; + struct hwc_wq *txq = hwc->txq; + struct gdma_req_hdr *req_msg; + struct hwc_caller_ctx *ctx; + u16 msg_id; + int err; + + mana_hwc_get_msg_index(hwc, &msg_id); + + tx_wr = &txq->msg_buf->reqs[msg_id]; + + if (req_len > tx_wr->buf_len) { + dev_err(hwc->dev, "HWC: req msg size: %d > %d\n", req_len, + tx_wr->buf_len); + err = -EINVAL; + goto out; + } + + ctx = hwc->caller_ctx + msg_id; + ctx->output_buf = resp; + ctx->output_buflen = resp_len; + + req_msg = (struct gdma_req_hdr *)tx_wr->buf_va; + if (req) + memcpy(req_msg, req, req_len); + + req_msg->req.hwc_msg_id = msg_id; + + tx_wr->msg_size = req_len; + + err = mana_hwc_post_tx_wqe(txq, tx_wr, 0, 0, false); + if (err) { + dev_err(hwc->dev, "HWC: Failed to post send WQE: %d\n", err); + goto out; + } + + if (!wait_for_completion_timeout(&ctx->comp_event, 30 * HZ)) { + dev_err(hwc->dev, "HWC: Request timed out!\n"); + err = -ETIMEDOUT; + goto out; + } + + if (ctx->error) { + err = ctx->error; + goto out; + } + + if (ctx->status_code) { + dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", + ctx->status_code); + err = -EPROTO; + goto out; + } +out: + mana_hwc_put_msg_index(hwc, msg_id); + return err; +} diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.h b/drivers/net/ethernet/microsoft/mana/hw_channel.h new file mode 100644 index 000000000000..31c6e83c454a --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.h @@ -0,0 +1,190 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _HW_CHANNEL_H +#define _HW_CHANNEL_H + +#define DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ 4 + +#define HW_CHANNEL_MAX_REQUEST_SIZE 0x1000 +#define HW_CHANNEL_MAX_RESPONSE_SIZE 0x1000 + +#define HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH 1 + +#define HWC_INIT_DATA_CQID 1 +#define HWC_INIT_DATA_RQID 2 +#define HWC_INIT_DATA_SQID 3 +#define HWC_INIT_DATA_QUEUE_DEPTH 4 +#define HWC_INIT_DATA_MAX_REQUEST 5 +#define HWC_INIT_DATA_MAX_RESPONSE 6 +#define HWC_INIT_DATA_MAX_NUM_CQS 7 +#define HWC_INIT_DATA_PDID 8 +#define HWC_INIT_DATA_GPA_MKEY 9 + +/* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +union hwc_init_eq_id_db { + u32 as_uint32; + + struct { + u32 eq_id : 16; + u32 doorbell : 16; + }; +}; /* HW DATA */ + +union hwc_init_type_data { + u32 as_uint32; + + struct { + u32 value : 24; + u32 type : 8; + }; +}; /* HW DATA */ + +struct hwc_rx_oob { + u32 type : 6; + u32 eom : 1; + u32 som : 1; + u32 vendor_err : 8; + u32 reserved1 : 16; + + u32 src_virt_wq : 24; + u32 src_vfid : 8; + + u32 reserved2; + + union { + u32 wqe_addr_low; + u32 wqe_offset; + }; + + u32 wqe_addr_high; + + u32 client_data_unit : 14; + u32 reserved3 : 18; + + u32 tx_oob_data_size; + + u32 chunk_offset : 21; + u32 reserved4 : 11; +}; /* HW DATA */ + +struct hwc_tx_oob { + u32 reserved1; + + u32 reserved2; + + u32 vrq_id : 24; + u32 dest_vfid : 8; + + u32 vrcq_id : 24; + u32 reserved3 : 8; + + u32 vscq_id : 24; + u32 loopback : 1; + u32 lso_override: 1; + u32 dest_pf : 1; + u32 reserved4 : 5; + + u32 vsq_id : 24; + u32 reserved5 : 8; +}; /* HW DATA */ + +struct hwc_work_request { + void *buf_va; + void *buf_sge_addr; + u32 buf_len; + u32 msg_size; + + struct gdma_wqe_request wqe_req; + struct hwc_tx_oob tx_oob; + + struct gdma_sge sge; +}; + +/* hwc_dma_buf represents the array of in-flight WQEs. + * mem_info as know as the GDMA mapped memory is partitioned and used by + * in-flight WQEs. + * The number of WQEs is determined by the number of in-flight messages. + */ +struct hwc_dma_buf { + struct gdma_mem_info mem_info; + + u32 gpa_mkey; + + u32 num_reqs; + struct hwc_work_request reqs[]; +}; + +typedef void hwc_rx_event_handler_t(void *ctx, u32 gdma_rxq_id, + const struct hwc_rx_oob *rx_oob); + +typedef void hwc_tx_event_handler_t(void *ctx, u32 gdma_txq_id, + const struct hwc_rx_oob *rx_oob); + +struct hwc_cq { + struct hw_channel_context *hwc; + + struct gdma_queue *gdma_cq; + struct gdma_queue *gdma_eq; + struct gdma_comp *comp_buf; + u16 queue_depth; + + hwc_rx_event_handler_t *rx_event_handler; + void *rx_event_ctx; + + hwc_tx_event_handler_t *tx_event_handler; + void *tx_event_ctx; +}; + +struct hwc_wq { + struct hw_channel_context *hwc; + + struct gdma_queue *gdma_wq; + struct hwc_dma_buf *msg_buf; + u16 queue_depth; + + struct hwc_cq *hwc_cq; +}; + +struct hwc_caller_ctx { + struct completion comp_event; + void *output_buf; + u32 output_buflen; + + u32 error; /* Linux error code */ + u32 status_code; +}; + +struct hw_channel_context { + struct gdma_dev *gdma_dev; + struct device *dev; + + u16 num_inflight_msg; + u32 max_req_msg_size; + + u16 hwc_init_q_depth_max; + u32 hwc_init_max_req_msg_size; + u32 hwc_init_max_resp_msg_size; + + struct completion hwc_init_eqe_comp; + + struct hwc_wq *rxq; + struct hwc_wq *txq; + struct hwc_cq *cq; + + struct semaphore sema; + struct gdma_resource inflight_msg_res; + + struct hwc_caller_ctx *caller_ctx; +}; + +int mana_hwc_create_channel(struct gdma_context *gc); +void mana_hwc_destroy_channel(struct gdma_context *gc); + +int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, + const void *req, u32 resp_len, void *resp); + +#endif /* _HW_CHANNEL_H */ diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h new file mode 100644 index 000000000000..a2c3f826f022 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/mana.h @@ -0,0 +1,533 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _MANA_H +#define _MANA_H + +#include "gdma.h" +#include "hw_channel.h" + +/* Microsoft Azure Network Adapter (MANA)'s definitions + * + * Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +/* MANA protocol version */ +#define MANA_MAJOR_VERSION 0 +#define MANA_MINOR_VERSION 1 +#define MANA_MICRO_VERSION 1 + +typedef u64 mana_handle_t; +#define INVALID_MANA_HANDLE ((mana_handle_t)-1) + +enum TRI_STATE { + TRI_STATE_UNKNOWN = -1, + TRI_STATE_FALSE = 0, + TRI_STATE_TRUE = 1 +}; + +/* Number of entries for hardware indirection table must be in power of 2 */ +#define MANA_INDIRECT_TABLE_SIZE 64 +#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1) + +/* The Toeplitz hash key's length in bytes: should be multiple of 8 */ +#define MANA_HASH_KEY_SIZE 40 + +#define COMP_ENTRY_SIZE 64 + +#define ADAPTER_MTU_SIZE 1500 +#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14) + +#define RX_BUFFERS_PER_QUEUE 512 + +#define MAX_SEND_BUFFERS_PER_QUEUE 256 + +#define EQ_SIZE (8 * PAGE_SIZE) +#define LOG2_EQ_THROTTLE 3 + +#define MAX_PORTS_IN_MANA_DEV 16 + +struct mana_stats { + u64 packets; + u64 bytes; + struct u64_stats_sync syncp; +}; + +struct mana_txq { + struct gdma_queue *gdma_sq; + + union { + u32 gdma_txq_id; + struct { + u32 reserved1 : 10; + u32 vsq_frame : 14; + u32 reserved2 : 8; + }; + }; + + u16 vp_offset; + + struct net_device *ndev; + + /* The SKBs are sent to the HW and we are waiting for the CQEs. */ + struct sk_buff_head pending_skbs; + struct netdev_queue *net_txq; + + atomic_t pending_sends; + + struct mana_stats stats; +}; + +/* skb data and frags dma mappings */ +struct mana_skb_head { + dma_addr_t dma_handle[MAX_SKB_FRAGS + 1]; + + u32 size[MAX_SKB_FRAGS + 1]; +}; + +#define MANA_HEADROOM sizeof(struct mana_skb_head) + +enum mana_tx_pkt_format { + MANA_SHORT_PKT_FMT = 0, + MANA_LONG_PKT_FMT = 1, +}; + +struct mana_tx_short_oob { + u32 pkt_fmt : 2; + u32 is_outer_ipv4 : 1; + u32 is_outer_ipv6 : 1; + u32 comp_iphdr_csum : 1; + u32 comp_tcp_csum : 1; + u32 comp_udp_csum : 1; + u32 supress_txcqe_gen : 1; + u32 vcq_num : 24; + + u32 trans_off : 10; /* Transport header offset */ + u32 vsq_frame : 14; + u32 short_vp_offset : 8; +}; /* HW DATA */ + +struct mana_tx_long_oob { + u32 is_encap : 1; + u32 inner_is_ipv6 : 1; + u32 inner_tcp_opt : 1; + u32 inject_vlan_pri_tag : 1; + u32 reserved1 : 12; + u32 pcp : 3; /* 802.1Q */ + u32 dei : 1; /* 802.1Q */ + u32 vlan_id : 12; /* 802.1Q */ + + u32 inner_frame_offset : 10; + u32 inner_ip_rel_offset : 6; + u32 long_vp_offset : 12; + u32 reserved2 : 4; + + u32 reserved3; + u32 reserved4; +}; /* HW DATA */ + +struct mana_tx_oob { + struct mana_tx_short_oob s_oob; + struct mana_tx_long_oob l_oob; +}; /* HW DATA */ + +enum mana_cq_type { + MANA_CQ_TYPE_RX, + MANA_CQ_TYPE_TX, +}; + +enum mana_cqe_type { + CQE_INVALID = 0, + CQE_RX_OKAY = 1, + CQE_RX_COALESCED_4 = 2, + CQE_RX_OBJECT_FENCE = 3, + CQE_RX_TRUNCATED = 4, + + CQE_TX_OKAY = 32, + CQE_TX_SA_DROP = 33, + CQE_TX_MTU_DROP = 34, + CQE_TX_INVALID_OOB = 35, + CQE_TX_INVALID_ETH_TYPE = 36, + CQE_TX_HDR_PROCESSING_ERROR = 37, + CQE_TX_VF_DISABLED = 38, + CQE_TX_VPORT_IDX_OUT_OF_RANGE = 39, + CQE_TX_VPORT_DISABLED = 40, + CQE_TX_VLAN_TAGGING_VIOLATION = 41, +}; + +#define MANA_CQE_COMPLETION 1 + +struct mana_cqe_header { + u32 cqe_type : 6; + u32 client_type : 2; + u32 vendor_err : 24; +}; /* HW DATA */ + +/* NDIS HASH Types */ +#define NDIS_HASH_IPV4 BIT(0) +#define NDIS_HASH_TCP_IPV4 BIT(1) +#define NDIS_HASH_UDP_IPV4 BIT(2) +#define NDIS_HASH_IPV6 BIT(3) +#define NDIS_HASH_TCP_IPV6 BIT(4) +#define NDIS_HASH_UDP_IPV6 BIT(5) +#define NDIS_HASH_IPV6_EX BIT(6) +#define NDIS_HASH_TCP_IPV6_EX BIT(7) +#define NDIS_HASH_UDP_IPV6_EX BIT(8) + +#define MANA_HASH_L3 (NDIS_HASH_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_IPV6_EX) +#define MANA_HASH_L4 \ + (NDIS_HASH_TCP_IPV4 | NDIS_HASH_UDP_IPV4 | NDIS_HASH_TCP_IPV6 | \ + NDIS_HASH_UDP_IPV6 | NDIS_HASH_TCP_IPV6_EX | NDIS_HASH_UDP_IPV6_EX) + +struct mana_rxcomp_perpkt_info { + u32 pkt_len : 16; + u32 reserved1 : 16; + u32 reserved2; + u32 pkt_hash; +}; /* HW DATA */ + +#define MANA_RXCOMP_OOB_NUM_PPI 4 + +/* Receive completion OOB */ +struct mana_rxcomp_oob { + struct mana_cqe_header cqe_hdr; + + u32 rx_vlan_id : 12; + u32 rx_vlantag_present : 1; + u32 rx_outer_iphdr_csum_succeed : 1; + u32 rx_outer_iphdr_csum_fail : 1; + u32 reserved1 : 1; + u32 rx_hashtype : 9; + u32 rx_iphdr_csum_succeed : 1; + u32 rx_iphdr_csum_fail : 1; + u32 rx_tcp_csum_succeed : 1; + u32 rx_tcp_csum_fail : 1; + u32 rx_udp_csum_succeed : 1; + u32 rx_udp_csum_fail : 1; + u32 reserved2 : 1; + + struct mana_rxcomp_perpkt_info ppi[MANA_RXCOMP_OOB_NUM_PPI]; + + u32 rx_wqe_offset; +}; /* HW DATA */ + +struct mana_tx_comp_oob { + struct mana_cqe_header cqe_hdr; + + u32 tx_data_offset; + + u32 tx_sgl_offset : 5; + u32 tx_wqe_offset : 27; + + u32 reserved[12]; +}; /* HW DATA */ + +struct mana_rxq; + +struct mana_cq { + struct gdma_queue *gdma_cq; + + /* Cache the CQ id (used to verify if each CQE comes to the right CQ. */ + u32 gdma_id; + + /* Type of the CQ: TX or RX */ + enum mana_cq_type type; + + /* Pointer to the mana_rxq that is pushing RX CQEs to the queue. + * Only and must be non-NULL if type is MANA_CQ_TYPE_RX. + */ + struct mana_rxq *rxq; + + /* Pointer to the mana_txq that is pushing TX CQEs to the queue. + * Only and must be non-NULL if type is MANA_CQ_TYPE_TX. + */ + struct mana_txq *txq; + + /* Pointer to a buffer which the CQ handler can copy the CQE's into. */ + struct gdma_comp *gdma_comp_buf; +}; + +#define GDMA_MAX_RQE_SGES 15 + +struct mana_recv_buf_oob { + /* A valid GDMA work request representing the data buffer. */ + struct gdma_wqe_request wqe_req; + + void *buf_va; + dma_addr_t buf_dma_addr; + + /* SGL of the buffer going to be sent has part of the work request. */ + u32 num_sge; + struct gdma_sge sgl[GDMA_MAX_RQE_SGES]; + + /* Required to store the result of mana_gd_post_work_request. + * gdma_posted_wqe_info.wqe_size_in_bu is required for progressing the + * work queue when the WQE is consumed. + */ + struct gdma_posted_wqe_info wqe_inf; +}; + +struct mana_rxq { + struct gdma_queue *gdma_rq; + /* Cache the gdma receive queue id */ + u32 gdma_id; + + /* Index of RQ in the vPort, not gdma receive queue id */ + u32 rxq_idx; + + u32 datasize; + + mana_handle_t rxobj; + + struct mana_cq rx_cq; + + struct net_device *ndev; + + /* Total number of receive buffers to be allocated */ + u32 num_rx_buf; + + u32 buf_index; + + struct mana_stats stats; + + /* MUST BE THE LAST MEMBER: + * Each receive buffer has an associated mana_recv_buf_oob. + */ + struct mana_recv_buf_oob rx_oobs[]; +}; + +struct mana_tx_qp { + struct mana_txq txq; + + struct mana_cq tx_cq; + + mana_handle_t tx_object; +}; + +struct mana_ethtool_stats { + u64 stop_queue; + u64 wake_queue; +}; + +struct mana_context { + struct gdma_dev *gdma_dev; + + u16 num_ports; + + struct net_device *ports[MAX_PORTS_IN_MANA_DEV]; +}; + +struct mana_port_context { + struct mana_context *ac; + struct net_device *ndev; + + u8 mac_addr[ETH_ALEN]; + + struct mana_eq *eqs; + + enum TRI_STATE rss_state; + + mana_handle_t default_rxobj; + bool tx_shortform_allowed; + u16 tx_vp_offset; + + struct mana_tx_qp *tx_qp; + + /* Indirection Table for RX & TX. The values are queue indexes */ + u32 indir_table[MANA_INDIRECT_TABLE_SIZE]; + + /* Indirection table containing RxObject Handles */ + mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE]; + + /* Hash key used by the NIC */ + u8 hashkey[MANA_HASH_KEY_SIZE]; + + /* This points to an array of num_queues of RQ pointers. */ + struct mana_rxq **rxqs; + + /* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */ + unsigned int max_queues; + unsigned int num_queues; + + mana_handle_t port_handle; + + u16 port_idx; + + bool port_is_up; + bool port_st_save; /* Saved port state */ + + struct mana_ethtool_stats eth_stats; +}; + +int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx, + bool update_hash, bool update_tab); + +int mana_alloc_queues(struct net_device *ndev); +int mana_attach(struct net_device *ndev); +int mana_detach(struct net_device *ndev, bool from_close); + +int mana_probe(struct gdma_dev *gd); +void mana_remove(struct gdma_dev *gd); + +extern const struct ethtool_ops mana_ethtool_ops; + +struct mana_obj_spec { + u32 queue_index; + u64 gdma_region; + u32 queue_size; + u32 attached_eq; + u32 modr_ctx_id; +}; + +enum mana_command_code { + MANA_QUERY_DEV_CONFIG = 0x20001, + MANA_QUERY_GF_STAT = 0x20002, + MANA_CONFIG_VPORT_TX = 0x20003, + MANA_CREATE_WQ_OBJ = 0x20004, + MANA_DESTROY_WQ_OBJ = 0x20005, + MANA_FENCE_RQ = 0x20006, + MANA_CONFIG_VPORT_RX = 0x20007, + MANA_QUERY_VPORT_CONFIG = 0x20008, +}; + +/* Query Device Configuration */ +struct mana_query_device_cfg_req { + struct gdma_req_hdr hdr; + + /* Driver Capability flags */ + u64 drv_cap_flags1; + u64 drv_cap_flags2; + u64 drv_cap_flags3; + u64 drv_cap_flags4; + + u32 proto_major_ver; + u32 proto_minor_ver; + u32 proto_micro_ver; + + u32 reserved; +}; /* HW DATA */ + +struct mana_query_device_cfg_resp { + struct gdma_resp_hdr hdr; + + u64 pf_cap_flags1; + u64 pf_cap_flags2; + u64 pf_cap_flags3; + u64 pf_cap_flags4; + + u16 max_num_vports; + u16 reserved; + u32 max_num_eqs; +}; /* HW DATA */ + +/* Query vPort Configuration */ +struct mana_query_vport_cfg_req { + struct gdma_req_hdr hdr; + u32 vport_index; +}; /* HW DATA */ + +struct mana_query_vport_cfg_resp { + struct gdma_resp_hdr hdr; + u32 max_num_sq; + u32 max_num_rq; + u32 num_indirection_ent; + u32 reserved1; + u8 mac_addr[6]; + u8 reserved2[2]; + mana_handle_t vport; +}; /* HW DATA */ + +/* Configure vPort */ +struct mana_config_vport_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u32 pdid; + u32 doorbell_pageid; +}; /* HW DATA */ + +struct mana_config_vport_resp { + struct gdma_resp_hdr hdr; + u16 tx_vport_offset; + u8 short_form_allowed; + u8 reserved; +}; /* HW DATA */ + +/* Create WQ Object */ +struct mana_create_wqobj_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u32 wq_type; + u32 reserved; + u64 wq_gdma_region; + u64 cq_gdma_region; + u32 wq_size; + u32 cq_size; + u32 cq_moderation_ctx_id; + u32 cq_parent_qid; +}; /* HW DATA */ + +struct mana_create_wqobj_resp { + struct gdma_resp_hdr hdr; + u32 wq_id; + u32 cq_id; + mana_handle_t wq_obj; +}; /* HW DATA */ + +/* Destroy WQ Object */ +struct mana_destroy_wqobj_req { + struct gdma_req_hdr hdr; + u32 wq_type; + u32 reserved; + mana_handle_t wq_obj_handle; +}; /* HW DATA */ + +struct mana_destroy_wqobj_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Fence RQ */ +struct mana_fence_rq_req { + struct gdma_req_hdr hdr; + mana_handle_t wq_obj_handle; +}; /* HW DATA */ + +struct mana_fence_rq_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Configure vPort Rx Steering */ +struct mana_cfg_rx_steer_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u16 num_indir_entries; + u16 indir_tab_offset; + u32 rx_enable; + u32 rss_enable; + u8 update_default_rxobj; + u8 update_hashkey; + u8 update_indir_tab; + u8 reserved; + mana_handle_t default_rxobj; + u8 hashkey[MANA_HASH_KEY_SIZE]; +}; /* HW DATA */ + +struct mana_cfg_rx_steer_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +#define MANA_MAX_NUM_QUEUES 16 + +#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1) + +struct mana_tx_package { + struct gdma_wqe_request wqe_req; + struct gdma_sge sgl_array[5]; + struct gdma_sge *sgl_ptr; + + struct mana_tx_oob tx_oob; + + struct gdma_posted_wqe_info wqe_info; +}; + +#endif /* _MANA_H */ diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c new file mode 100644 index 000000000000..04d067243457 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -0,0 +1,1895 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/mm.h> + +#include <net/checksum.h> +#include <net/ip6_checksum.h> + +#include "mana.h" + +/* Microsoft Azure Network Adapter (MANA) functions */ + +static int mana_open(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + int err; + + err = mana_alloc_queues(ndev); + if (err) + return err; + + apc->port_is_up = true; + + /* Ensure port state updated before txq state */ + smp_wmb(); + + netif_carrier_on(ndev); + netif_tx_wake_all_queues(ndev); + + return 0; +} + +static int mana_close(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + if (!apc->port_is_up) + return 0; + + return mana_detach(ndev, true); +} + +static bool mana_can_tx(struct gdma_queue *wq) +{ + return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE; +} + +static unsigned int mana_checksum_info(struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *ip = ip_hdr(skb); + + if (ip->protocol == IPPROTO_TCP) + return IPPROTO_TCP; + + if (ip->protocol == IPPROTO_UDP) + return IPPROTO_UDP; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6 = ipv6_hdr(skb); + + if (ip6->nexthdr == IPPROTO_TCP) + return IPPROTO_TCP; + + if (ip6->nexthdr == IPPROTO_UDP) + return IPPROTO_UDP; + } + + /* No csum offloading */ + return 0; +} + +static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, + struct mana_tx_package *tp) +{ + struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; + struct gdma_dev *gd = apc->ac->gdma_dev; + struct gdma_context *gc; + struct device *dev; + skb_frag_t *frag; + dma_addr_t da; + int i; + + gc = gd->gdma_context; + dev = gc->dev; + da = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); + + if (dma_mapping_error(dev, da)) + return -ENOMEM; + + ash->dma_handle[0] = da; + ash->size[0] = skb_headlen(skb); + + tp->wqe_req.sgl[0].address = ash->dma_handle[0]; + tp->wqe_req.sgl[0].mem_key = gd->gpa_mkey; + tp->wqe_req.sgl[0].size = ash->size[0]; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + frag = &skb_shinfo(skb)->frags[i]; + da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), + DMA_TO_DEVICE); + + if (dma_mapping_error(dev, da)) + goto frag_err; + + ash->dma_handle[i + 1] = da; + ash->size[i + 1] = skb_frag_size(frag); + + tp->wqe_req.sgl[i + 1].address = ash->dma_handle[i + 1]; + tp->wqe_req.sgl[i + 1].mem_key = gd->gpa_mkey; + tp->wqe_req.sgl[i + 1].size = ash->size[i + 1]; + } + + return 0; + +frag_err: + for (i = i - 1; i >= 0; i--) + dma_unmap_page(dev, ash->dma_handle[i + 1], ash->size[i + 1], + DMA_TO_DEVICE); + + dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); + + return -ENOMEM; +} + +static int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; + struct mana_port_context *apc = netdev_priv(ndev); + u16 txq_idx = skb_get_queue_mapping(skb); + struct gdma_dev *gd = apc->ac->gdma_dev; + bool ipv4 = false, ipv6 = false; + struct mana_tx_package pkg = {}; + struct netdev_queue *net_txq; + struct mana_stats *tx_stats; + struct gdma_queue *gdma_sq; + unsigned int csum_type; + struct mana_txq *txq; + struct mana_cq *cq; + int err, len; + + if (unlikely(!apc->port_is_up)) + goto tx_drop; + + if (skb_cow_head(skb, MANA_HEADROOM)) + goto tx_drop_count; + + txq = &apc->tx_qp[txq_idx].txq; + gdma_sq = txq->gdma_sq; + cq = &apc->tx_qp[txq_idx].tx_cq; + + pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; + pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; + + if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) { + pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset; + pkt_fmt = MANA_LONG_PKT_FMT; + } else { + pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset; + } + + pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; + + if (pkt_fmt == MANA_SHORT_PKT_FMT) + pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob); + else + pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob); + + pkg.wqe_req.inline_oob_data = &pkg.tx_oob; + pkg.wqe_req.flags = 0; + pkg.wqe_req.client_data_unit = 0; + + pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags; + WARN_ON_ONCE(pkg.wqe_req.num_sge > 30); + + if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { + pkg.wqe_req.sgl = pkg.sgl_array; + } else { + pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge, + sizeof(struct gdma_sge), + GFP_ATOMIC); + if (!pkg.sgl_ptr) + goto tx_drop_count; + + pkg.wqe_req.sgl = pkg.sgl_ptr; + } + + if (skb->protocol == htons(ETH_P_IP)) + ipv4 = true; + else if (skb->protocol == htons(ETH_P_IPV6)) + ipv6 = true; + + if (skb_is_gso(skb)) { + pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; + pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; + + pkg.tx_oob.s_oob.comp_iphdr_csum = 1; + pkg.tx_oob.s_oob.comp_tcp_csum = 1; + pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); + + pkg.wqe_req.client_data_unit = skb_shinfo(skb)->gso_size; + pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0; + if (ipv4) { + ip_hdr(skb)->tot_len = 0; + ip_hdr(skb)->check = 0; + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + } else { + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + } + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + csum_type = mana_checksum_info(skb); + + if (csum_type == IPPROTO_TCP) { + pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; + pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; + + pkg.tx_oob.s_oob.comp_tcp_csum = 1; + pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); + + } else if (csum_type == IPPROTO_UDP) { + pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; + pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; + + pkg.tx_oob.s_oob.comp_udp_csum = 1; + } else { + /* Can't do offload of this type of checksum */ + if (skb_checksum_help(skb)) + goto free_sgl_ptr; + } + } + + if (mana_map_skb(skb, apc, &pkg)) + goto free_sgl_ptr; + + skb_queue_tail(&txq->pending_skbs, skb); + + len = skb->len; + net_txq = netdev_get_tx_queue(ndev, txq_idx); + + err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req, + (struct gdma_posted_wqe_info *)skb->cb); + if (!mana_can_tx(gdma_sq)) { + netif_tx_stop_queue(net_txq); + apc->eth_stats.stop_queue++; + } + + if (err) { + (void)skb_dequeue_tail(&txq->pending_skbs); + netdev_warn(ndev, "Failed to post TX OOB: %d\n", err); + err = NETDEV_TX_BUSY; + goto tx_busy; + } + + err = NETDEV_TX_OK; + atomic_inc(&txq->pending_sends); + + mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq); + + /* skb may be freed after mana_gd_post_work_request. Do not use it. */ + skb = NULL; + + tx_stats = &txq->stats; + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->packets++; + tx_stats->bytes += len; + u64_stats_update_end(&tx_stats->syncp); + +tx_busy: + if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) { + netif_tx_wake_queue(net_txq); + apc->eth_stats.wake_queue++; + } + + kfree(pkg.sgl_ptr); + return err; + +free_sgl_ptr: + kfree(pkg.sgl_ptr); +tx_drop_count: + ndev->stats.tx_dropped++; +tx_drop: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +static void mana_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *st) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int num_queues = apc->num_queues; + struct mana_stats *stats; + unsigned int start; + u64 packets, bytes; + int q; + + if (!apc->port_is_up) + return; + + netdev_stats_to_stats64(st, &ndev->stats); + + for (q = 0; q < num_queues; q++) { + stats = &apc->rxqs[q]->stats; + + do { + start = u64_stats_fetch_begin_irq(&stats->syncp); + packets = stats->packets; + bytes = stats->bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + st->rx_packets += packets; + st->rx_bytes += bytes; + } + + for (q = 0; q < num_queues; q++) { + stats = &apc->tx_qp[q].txq.stats; + + do { + start = u64_stats_fetch_begin_irq(&stats->syncp); + packets = stats->packets; + bytes = stats->bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + st->tx_packets += packets; + st->tx_bytes += bytes; + } +} + +static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, + int old_q) +{ + struct mana_port_context *apc = netdev_priv(ndev); + u32 hash = skb_get_hash(skb); + struct sock *sk = skb->sk; + int txq; + + txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK]; + + if (txq != old_q && sk && sk_fullsock(sk) && + rcu_access_pointer(sk->sk_dst_cache)) + sk_tx_queue_set(sk, txq); + + return txq; +} + +static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + int txq; + + if (ndev->real_num_tx_queues == 1) + return 0; + + txq = sk_tx_queue_get(skb->sk); + + if (txq < 0 || skb->ooo_okay || txq >= ndev->real_num_tx_queues) { + if (skb_rx_queue_recorded(skb)) + txq = skb_get_rx_queue(skb); + else + txq = mana_get_tx_queue(ndev, skb, txq); + } + + return txq; +} + +static const struct net_device_ops mana_devops = { + .ndo_open = mana_open, + .ndo_stop = mana_close, + .ndo_select_queue = mana_select_queue, + .ndo_start_xmit = mana_start_xmit, + .ndo_validate_addr = eth_validate_addr, + .ndo_get_stats64 = mana_get_stats64, +}; + +static void mana_cleanup_port_context(struct mana_port_context *apc) +{ + kfree(apc->rxqs); + apc->rxqs = NULL; +} + +static int mana_init_port_context(struct mana_port_context *apc) +{ + apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *), + GFP_KERNEL); + + return !apc->rxqs ? -ENOMEM : 0; +} + +static int mana_send_request(struct mana_context *ac, void *in_buf, + u32 in_len, void *out_buf, u32 out_len) +{ + struct gdma_context *gc = ac->gdma_dev->gdma_context; + struct gdma_resp_hdr *resp = out_buf; + struct gdma_req_hdr *req = in_buf; + struct device *dev = gc->dev; + static atomic_t activity_id; + int err; + + req->dev_id = gc->mana.dev_id; + req->activity_id = atomic_inc_return(&activity_id); + + err = mana_gd_send_request(gc, in_len, in_buf, out_len, + out_buf); + if (err || resp->status) { + dev_err(dev, "Failed to send mana message: %d, 0x%x\n", + err, resp->status); + return err ? err : -EPROTO; + } + + if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 || + req->activity_id != resp->activity_id) { + dev_err(dev, "Unexpected mana message response: %x,%x,%x,%x\n", + req->dev_id.as_uint32, resp->dev_id.as_uint32, + req->activity_id, resp->activity_id); + return -EPROTO; + } + + return 0; +} + +static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr, + const enum mana_command_code expected_code, + const u32 min_size) +{ + if (resp_hdr->response.msg_type != expected_code) + return -EPROTO; + + if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1) + return -EPROTO; + + if (resp_hdr->response.msg_size < min_size) + return -EPROTO; + + return 0; +} + +static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, + u32 proto_minor_ver, u32 proto_micro_ver, + u16 *max_num_vports) +{ + struct gdma_context *gc = ac->gdma_dev->gdma_context; + struct mana_query_device_cfg_resp resp = {}; + struct mana_query_device_cfg_req req = {}; + struct device *dev = gc->dev; + int err = 0; + + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, + sizeof(req), sizeof(resp)); + req.proto_major_ver = proto_major_ver; + req.proto_minor_ver = proto_minor_ver; + req.proto_micro_ver = proto_micro_ver; + + err = mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp)); + if (err) { + dev_err(dev, "Failed to query config: %d", err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG, + sizeof(resp)); + if (err || resp.hdr.status) { + dev_err(dev, "Invalid query result: %d, 0x%x\n", err, + resp.hdr.status); + if (!err) + err = -EPROTO; + return err; + } + + *max_num_vports = resp.max_num_vports; + + return 0; +} + +static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, + u32 *max_sq, u32 *max_rq, u32 *num_indir_entry) +{ + struct mana_query_vport_cfg_resp resp = {}; + struct mana_query_vport_cfg_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG, + sizeof(req), sizeof(resp)); + + req.vport_index = vport_index; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) + return err; + + err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG, + sizeof(resp)); + if (err) + return err; + + if (resp.hdr.status) + return -EPROTO; + + *max_sq = resp.max_num_sq; + *max_rq = resp.max_num_rq; + *num_indir_entry = resp.num_indirection_ent; + + apc->port_handle = resp.vport; + ether_addr_copy(apc->mac_addr, resp.mac_addr); + + return 0; +} + +static int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, + u32 doorbell_pg_id) +{ + struct mana_config_vport_resp resp = {}; + struct mana_config_vport_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX, + sizeof(req), sizeof(resp)); + req.vport = apc->port_handle; + req.pdid = protection_dom_id; + req.doorbell_pageid = doorbell_pg_id; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to configure vPort: %d\n", err); + goto out; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(apc->ndev, "Failed to configure vPort: %d, 0x%x\n", + err, resp.hdr.status); + if (!err) + err = -EPROTO; + + goto out; + } + + apc->tx_shortform_allowed = resp.short_form_allowed; + apc->tx_vp_offset = resp.tx_vport_offset; +out: + return err; +} + +static int mana_cfg_vport_steering(struct mana_port_context *apc, + enum TRI_STATE rx, + bool update_default_rxobj, bool update_key, + bool update_tab) +{ + u16 num_entries = MANA_INDIRECT_TABLE_SIZE; + struct mana_cfg_rx_steer_req *req = NULL; + struct mana_cfg_rx_steer_resp resp = {}; + struct net_device *ndev = apc->ndev; + mana_handle_t *req_indir_tab; + u32 req_buf_size; + int err; + + req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries; + req = kzalloc(req_buf_size, GFP_KERNEL); + if (!req) + return -ENOMEM; + + mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, + sizeof(resp)); + + req->vport = apc->port_handle; + req->num_indir_entries = num_entries; + req->indir_tab_offset = sizeof(*req); + req->rx_enable = rx; + req->rss_enable = apc->rss_state; + req->update_default_rxobj = update_default_rxobj; + req->update_hashkey = update_key; + req->update_indir_tab = update_tab; + req->default_rxobj = apc->default_rxobj; + + if (update_key) + memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); + + if (update_tab) { + req_indir_tab = (mana_handle_t *)(req + 1); + memcpy(req_indir_tab, apc->rxobj_table, + req->num_indir_entries * sizeof(mana_handle_t)); + } + + err = mana_send_request(apc->ac, req, req_buf_size, &resp, + sizeof(resp)); + if (err) { + netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); + goto out; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX, + sizeof(resp)); + if (err) { + netdev_err(ndev, "vPort RX configuration failed: %d\n", err); + goto out; + } + + if (resp.hdr.status) { + netdev_err(ndev, "vPort RX configuration failed: 0x%x\n", + resp.hdr.status); + err = -EPROTO; + } +out: + kfree(req); + return err; +} + +static int mana_create_wq_obj(struct mana_port_context *apc, + mana_handle_t vport, + u32 wq_type, struct mana_obj_spec *wq_spec, + struct mana_obj_spec *cq_spec, + mana_handle_t *wq_obj) +{ + struct mana_create_wqobj_resp resp = {}; + struct mana_create_wqobj_req req = {}; + struct net_device *ndev = apc->ndev; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ, + sizeof(req), sizeof(resp)); + req.vport = vport; + req.wq_type = wq_type; + req.wq_gdma_region = wq_spec->gdma_region; + req.cq_gdma_region = cq_spec->gdma_region; + req.wq_size = wq_spec->queue_size; + req.cq_size = cq_spec->queue_size; + req.cq_moderation_ctx_id = cq_spec->modr_ctx_id; + req.cq_parent_qid = cq_spec->attached_eq; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(ndev, "Failed to create WQ object: %d\n", err); + goto out; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(ndev, "Failed to create WQ object: %d, 0x%x\n", err, + resp.hdr.status); + if (!err) + err = -EPROTO; + goto out; + } + + if (resp.wq_obj == INVALID_MANA_HANDLE) { + netdev_err(ndev, "Got an invalid WQ object handle\n"); + err = -EPROTO; + goto out; + } + + *wq_obj = resp.wq_obj; + wq_spec->queue_index = resp.wq_id; + cq_spec->queue_index = resp.cq_id; + + return 0; +out: + return err; +} + +static void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, + mana_handle_t wq_obj) +{ + struct mana_destroy_wqobj_resp resp = {}; + struct mana_destroy_wqobj_req req = {}; + struct net_device *ndev = apc->ndev; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ, + sizeof(req), sizeof(resp)); + req.wq_type = wq_type; + req.wq_obj_handle = wq_obj; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(ndev, "Failed to destroy WQ object: %d\n", err); + return; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ, + sizeof(resp)); + if (err || resp.hdr.status) + netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err, + resp.hdr.status); +} + +static void mana_init_cqe_poll_buf(struct gdma_comp *cqe_poll_buf) +{ + int i; + + for (i = 0; i < CQE_POLLING_BUFFER; i++) + memset(&cqe_poll_buf[i], 0, sizeof(struct gdma_comp)); +} + +static void mana_destroy_eq(struct gdma_context *gc, + struct mana_port_context *apc) +{ + struct gdma_queue *eq; + int i; + + if (!apc->eqs) + return; + + for (i = 0; i < apc->num_queues; i++) { + eq = apc->eqs[i].eq; + if (!eq) + continue; + + mana_gd_destroy_queue(gc, eq); + } + + kfree(apc->eqs); + apc->eqs = NULL; +} + +static int mana_create_eq(struct mana_port_context *apc) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + struct gdma_queue_spec spec = {}; + int err; + int i; + + apc->eqs = kcalloc(apc->num_queues, sizeof(struct mana_eq), + GFP_KERNEL); + if (!apc->eqs) + return -ENOMEM; + + spec.type = GDMA_EQ; + spec.monitor_avl_buf = false; + spec.queue_size = EQ_SIZE; + spec.eq.callback = NULL; + spec.eq.context = apc->eqs; + spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; + spec.eq.ndev = apc->ndev; + + for (i = 0; i < apc->num_queues; i++) { + mana_init_cqe_poll_buf(apc->eqs[i].cqe_poll); + + err = mana_gd_create_mana_eq(gd, &spec, &apc->eqs[i].eq); + if (err) + goto out; + } + + return 0; +out: + mana_destroy_eq(gd->gdma_context, apc); + return err; +} + +static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units) +{ + u32 used_space_old; + u32 used_space_new; + + used_space_old = wq->head - wq->tail; + used_space_new = wq->head - (wq->tail + num_units); + + if (WARN_ON_ONCE(used_space_new > used_space_old)) + return -ERANGE; + + wq->tail += num_units; + return 0; +} + +static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) +{ + struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; + struct device *dev = gc->dev; + int i; + + dma_unmap_single(dev, ash->dma_handle[0], ash->size[0], DMA_TO_DEVICE); + + for (i = 1; i < skb_shinfo(skb)->nr_frags + 1; i++) + dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], + DMA_TO_DEVICE); +} + +static void mana_poll_tx_cq(struct mana_cq *cq) +{ + struct gdma_queue *gdma_eq = cq->gdma_cq->cq.parent; + struct gdma_comp *completions = cq->gdma_comp_buf; + struct gdma_posted_wqe_info *wqe_info; + unsigned int pkt_transmitted = 0; + unsigned int wqe_unit_cnt = 0; + struct mana_txq *txq = cq->txq; + struct mana_port_context *apc; + struct netdev_queue *net_txq; + struct gdma_queue *gdma_wq; + unsigned int avail_space; + struct net_device *ndev; + struct sk_buff *skb; + bool txq_stopped; + int comp_read; + int i; + + ndev = txq->ndev; + apc = netdev_priv(ndev); + + comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, + CQE_POLLING_BUFFER); + + for (i = 0; i < comp_read; i++) { + struct mana_tx_comp_oob *cqe_oob; + + if (WARN_ON_ONCE(!completions[i].is_sq)) + return; + + cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data; + if (WARN_ON_ONCE(cqe_oob->cqe_hdr.client_type != + MANA_CQE_COMPLETION)) + return; + + switch (cqe_oob->cqe_hdr.cqe_type) { + case CQE_TX_OKAY: + break; + + case CQE_TX_SA_DROP: + case CQE_TX_MTU_DROP: + case CQE_TX_INVALID_OOB: + case CQE_TX_INVALID_ETH_TYPE: + case CQE_TX_HDR_PROCESSING_ERROR: + case CQE_TX_VF_DISABLED: + case CQE_TX_VPORT_IDX_OUT_OF_RANGE: + case CQE_TX_VPORT_DISABLED: + case CQE_TX_VLAN_TAGGING_VIOLATION: + WARN_ONCE(1, "TX: CQE error %d: ignored.\n", + cqe_oob->cqe_hdr.cqe_type); + break; + + default: + /* If the CQE type is unexpected, log an error, assert, + * and go through the error path. + */ + WARN_ONCE(1, "TX: Unexpected CQE type %d: HW BUG?\n", + cqe_oob->cqe_hdr.cqe_type); + return; + } + + if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num)) + return; + + skb = skb_dequeue(&txq->pending_skbs); + if (WARN_ON_ONCE(!skb)) + return; + + wqe_info = (struct gdma_posted_wqe_info *)skb->cb; + wqe_unit_cnt += wqe_info->wqe_size_in_bu; + + mana_unmap_skb(skb, apc); + + napi_consume_skb(skb, gdma_eq->eq.budget); + + pkt_transmitted++; + } + + if (WARN_ON_ONCE(wqe_unit_cnt == 0)) + return; + + mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt); + + gdma_wq = txq->gdma_sq; + avail_space = mana_gd_wq_avail_space(gdma_wq); + + /* Ensure tail updated before checking q stop */ + smp_mb(); + + net_txq = txq->net_txq; + txq_stopped = netif_tx_queue_stopped(net_txq); + + /* Ensure checking txq_stopped before apc->port_is_up. */ + smp_rmb(); + + if (txq_stopped && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) { + netif_tx_wake_queue(net_txq); + apc->eth_stats.wake_queue++; + } + + if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0) + WARN_ON_ONCE(1); +} + +static void mana_post_pkt_rxq(struct mana_rxq *rxq) +{ + struct mana_recv_buf_oob *recv_buf_oob; + u32 curr_index; + int err; + + curr_index = rxq->buf_index++; + if (rxq->buf_index == rxq->num_rx_buf) + rxq->buf_index = 0; + + recv_buf_oob = &rxq->rx_oobs[curr_index]; + + err = mana_gd_post_and_ring(rxq->gdma_rq, &recv_buf_oob->wqe_req, + &recv_buf_oob->wqe_inf); + if (WARN_ON_ONCE(err)) + return; + + WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1); +} + +static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, + struct mana_rxq *rxq) +{ + struct mana_stats *rx_stats = &rxq->stats; + struct net_device *ndev = rxq->ndev; + uint pkt_len = cqe->ppi[0].pkt_len; + struct mana_port_context *apc; + u16 rxq_idx = rxq->rxq_idx; + struct napi_struct *napi; + struct gdma_queue *eq; + struct sk_buff *skb; + u32 hash_value; + + apc = netdev_priv(ndev); + eq = apc->eqs[rxq_idx].eq; + eq->eq.work_done++; + napi = &eq->eq.napi; + + if (!buf_va) { + ++ndev->stats.rx_dropped; + return; + } + + skb = build_skb(buf_va, PAGE_SIZE); + + if (!skb) { + free_page((unsigned long)buf_va); + ++ndev->stats.rx_dropped; + return; + } + + skb_put(skb, pkt_len); + skb->dev = napi->dev; + + skb->protocol = eth_type_trans(skb, ndev); + skb_checksum_none_assert(skb); + skb_record_rx_queue(skb, rxq_idx); + + if ((ndev->features & NETIF_F_RXCSUM) && cqe->rx_iphdr_csum_succeed) { + if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) { + hash_value = cqe->ppi[0].pkt_hash; + + if (cqe->rx_hashtype & MANA_HASH_L4) + skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4); + else + skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); + } + + napi_gro_receive(napi, skb); + + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->packets++; + rx_stats->bytes += pkt_len; + u64_stats_update_end(&rx_stats->syncp); +} + +static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, + struct gdma_comp *cqe) +{ + struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data; + struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; + struct net_device *ndev = rxq->ndev; + struct mana_recv_buf_oob *rxbuf_oob; + struct device *dev = gc->dev; + void *new_buf, *old_buf; + struct page *new_page; + u32 curr, pktlen; + dma_addr_t da; + + switch (oob->cqe_hdr.cqe_type) { + case CQE_RX_OKAY: + break; + + case CQE_RX_TRUNCATED: + netdev_err(ndev, "Dropped a truncated packet\n"); + return; + + case CQE_RX_COALESCED_4: + netdev_err(ndev, "RX coalescing is unsupported\n"); + return; + + case CQE_RX_OBJECT_FENCE: + netdev_err(ndev, "RX Fencing is unsupported\n"); + return; + + default: + netdev_err(ndev, "Unknown RX CQE type = %d\n", + oob->cqe_hdr.cqe_type); + return; + } + + if (oob->cqe_hdr.cqe_type != CQE_RX_OKAY) + return; + + pktlen = oob->ppi[0].pkt_len; + + if (pktlen == 0) { + /* data packets should never have packetlength of zero */ + netdev_err(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n", + rxq->gdma_id, cq->gdma_id, rxq->rxobj); + return; + } + + curr = rxq->buf_index; + rxbuf_oob = &rxq->rx_oobs[curr]; + WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); + + new_page = alloc_page(GFP_ATOMIC); + + if (new_page) { + da = dma_map_page(dev, new_page, 0, rxq->datasize, + DMA_FROM_DEVICE); + + if (dma_mapping_error(dev, da)) { + __free_page(new_page); + new_page = NULL; + } + } + + new_buf = new_page ? page_to_virt(new_page) : NULL; + + if (new_buf) { + dma_unmap_page(dev, rxbuf_oob->buf_dma_addr, rxq->datasize, + DMA_FROM_DEVICE); + + old_buf = rxbuf_oob->buf_va; + + /* refresh the rxbuf_oob with the new page */ + rxbuf_oob->buf_va = new_buf; + rxbuf_oob->buf_dma_addr = da; + rxbuf_oob->sgl[0].address = rxbuf_oob->buf_dma_addr; + } else { + old_buf = NULL; /* drop the packet if no memory */ + } + + mana_rx_skb(old_buf, oob, rxq); + + mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); + + mana_post_pkt_rxq(rxq); +} + +static void mana_poll_rx_cq(struct mana_cq *cq) +{ + struct gdma_comp *comp = cq->gdma_comp_buf; + int comp_read, i; + + comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); + WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); + + for (i = 0; i < comp_read; i++) { + if (WARN_ON_ONCE(comp[i].is_sq)) + return; + + /* verify recv cqe references the right rxq */ + if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id)) + return; + + mana_process_rx_cqe(cq->rxq, cq, &comp[i]); + } +} + +static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) +{ + struct mana_cq *cq = context; + + WARN_ON_ONCE(cq->gdma_cq != gdma_queue); + + if (cq->type == MANA_CQ_TYPE_RX) + mana_poll_rx_cq(cq); + else + mana_poll_tx_cq(cq); + + mana_gd_arm_cq(gdma_queue); +} + +static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + + if (!cq->gdma_cq) + return; + + mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq); +} + +static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + + if (!txq->gdma_sq) + return; + + mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq); +} + +static void mana_destroy_txq(struct mana_port_context *apc) +{ + int i; + + if (!apc->tx_qp) + return; + + for (i = 0; i < apc->num_queues; i++) { + mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); + + mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); + + mana_deinit_txq(apc, &apc->tx_qp[i].txq); + } + + kfree(apc->tx_qp); + apc->tx_qp = NULL; +} + +static int mana_create_txq(struct mana_port_context *apc, + struct net_device *net) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + struct mana_obj_spec wq_spec; + struct mana_obj_spec cq_spec; + struct gdma_queue_spec spec; + struct gdma_context *gc; + struct mana_txq *txq; + struct mana_cq *cq; + u32 txq_size; + u32 cq_size; + int err; + int i; + + apc->tx_qp = kcalloc(apc->num_queues, sizeof(struct mana_tx_qp), + GFP_KERNEL); + if (!apc->tx_qp) + return -ENOMEM; + + /* The minimum size of the WQE is 32 bytes, hence + * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs + * the SQ can store. This value is then used to size other queues + * to prevent overflow. + */ + txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; + BUILD_BUG_ON(!PAGE_ALIGNED(txq_size)); + + cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; + cq_size = PAGE_ALIGN(cq_size); + + gc = gd->gdma_context; + + for (i = 0; i < apc->num_queues; i++) { + apc->tx_qp[i].tx_object = INVALID_MANA_HANDLE; + + /* Create SQ */ + txq = &apc->tx_qp[i].txq; + + u64_stats_init(&txq->stats.syncp); + txq->ndev = net; + txq->net_txq = netdev_get_tx_queue(net, i); + txq->vp_offset = apc->tx_vp_offset; + skb_queue_head_init(&txq->pending_skbs); + + memset(&spec, 0, sizeof(spec)); + spec.type = GDMA_SQ; + spec.monitor_avl_buf = true; + spec.queue_size = txq_size; + err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq); + if (err) + goto out; + + /* Create SQ's CQ */ + cq = &apc->tx_qp[i].tx_cq; + cq->gdma_comp_buf = apc->eqs[i].cqe_poll; + cq->type = MANA_CQ_TYPE_TX; + + cq->txq = txq; + + memset(&spec, 0, sizeof(spec)); + spec.type = GDMA_CQ; + spec.monitor_avl_buf = false; + spec.queue_size = cq_size; + spec.cq.callback = mana_cq_handler; + spec.cq.parent_eq = apc->eqs[i].eq; + spec.cq.context = cq; + err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); + if (err) + goto out; + + memset(&wq_spec, 0, sizeof(wq_spec)); + memset(&cq_spec, 0, sizeof(cq_spec)); + + wq_spec.gdma_region = txq->gdma_sq->mem_info.gdma_region; + wq_spec.queue_size = txq->gdma_sq->queue_size; + + cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region; + cq_spec.queue_size = cq->gdma_cq->queue_size; + cq_spec.modr_ctx_id = 0; + cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; + + err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ, + &wq_spec, &cq_spec, + &apc->tx_qp[i].tx_object); + + if (err) + goto out; + + txq->gdma_sq->id = wq_spec.queue_index; + cq->gdma_cq->id = cq_spec.queue_index; + + txq->gdma_sq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; + cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; + + txq->gdma_txq_id = txq->gdma_sq->id; + + cq->gdma_id = cq->gdma_cq->id; + + if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) + return -EINVAL; + + gc->cq_table[cq->gdma_id] = cq->gdma_cq; + + mana_gd_arm_cq(cq->gdma_cq); + } + + return 0; +out: + mana_destroy_txq(apc); + return err; +} + +static void mana_napi_sync_for_rx(struct mana_rxq *rxq) +{ + struct net_device *ndev = rxq->ndev; + struct mana_port_context *apc; + u16 rxq_idx = rxq->rxq_idx; + struct napi_struct *napi; + struct gdma_queue *eq; + + apc = netdev_priv(ndev); + eq = apc->eqs[rxq_idx].eq; + napi = &eq->eq.napi; + + napi_synchronize(napi); +} + +static void mana_destroy_rxq(struct mana_port_context *apc, + struct mana_rxq *rxq, bool validate_state) + +{ + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; + struct mana_recv_buf_oob *rx_oob; + struct device *dev = gc->dev; + int i; + + if (!rxq) + return; + + if (validate_state) + mana_napi_sync_for_rx(rxq); + + mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); + + mana_deinit_cq(apc, &rxq->rx_cq); + + for (i = 0; i < rxq->num_rx_buf; i++) { + rx_oob = &rxq->rx_oobs[i]; + + if (!rx_oob->buf_va) + continue; + + dma_unmap_page(dev, rx_oob->buf_dma_addr, rxq->datasize, + DMA_FROM_DEVICE); + + free_page((unsigned long)rx_oob->buf_va); + rx_oob->buf_va = NULL; + } + + if (rxq->gdma_rq) + mana_gd_destroy_queue(gc, rxq->gdma_rq); + + kfree(rxq); +} + +#define MANA_WQE_HEADER_SIZE 16 +#define MANA_WQE_SGE_SIZE 16 + +static int mana_alloc_rx_wqe(struct mana_port_context *apc, + struct mana_rxq *rxq, u32 *rxq_size, u32 *cq_size) +{ + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; + struct mana_recv_buf_oob *rx_oob; + struct device *dev = gc->dev; + struct page *page; + dma_addr_t da; + u32 buf_idx; + + WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE); + + *rxq_size = 0; + *cq_size = 0; + + for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { + rx_oob = &rxq->rx_oobs[buf_idx]; + memset(rx_oob, 0, sizeof(*rx_oob)); + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + da = dma_map_page(dev, page, 0, rxq->datasize, DMA_FROM_DEVICE); + + if (dma_mapping_error(dev, da)) { + __free_page(page); + return -ENOMEM; + } + + rx_oob->buf_va = page_to_virt(page); + rx_oob->buf_dma_addr = da; + + rx_oob->num_sge = 1; + rx_oob->sgl[0].address = rx_oob->buf_dma_addr; + rx_oob->sgl[0].size = rxq->datasize; + rx_oob->sgl[0].mem_key = apc->ac->gdma_dev->gpa_mkey; + + rx_oob->wqe_req.sgl = rx_oob->sgl; + rx_oob->wqe_req.num_sge = rx_oob->num_sge; + rx_oob->wqe_req.inline_oob_size = 0; + rx_oob->wqe_req.inline_oob_data = NULL; + rx_oob->wqe_req.flags = 0; + rx_oob->wqe_req.client_data_unit = 0; + + *rxq_size += ALIGN(MANA_WQE_HEADER_SIZE + + MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32); + *cq_size += COMP_ENTRY_SIZE; + } + + return 0; +} + +static int mana_push_wqe(struct mana_rxq *rxq) +{ + struct mana_recv_buf_oob *rx_oob; + u32 buf_idx; + int err; + + for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { + rx_oob = &rxq->rx_oobs[buf_idx]; + + err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req, + &rx_oob->wqe_inf); + if (err) + return -ENOSPC; + } + + return 0; +} + +static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + u32 rxq_idx, struct mana_eq *eq, + struct net_device *ndev) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + struct mana_obj_spec wq_spec; + struct mana_obj_spec cq_spec; + struct gdma_queue_spec spec; + struct mana_cq *cq = NULL; + struct gdma_context *gc; + u32 cq_size, rq_size; + struct mana_rxq *rxq; + int err; + + gc = gd->gdma_context; + + rxq = kzalloc(sizeof(*rxq) + + RX_BUFFERS_PER_QUEUE * sizeof(struct mana_recv_buf_oob), + GFP_KERNEL); + if (!rxq) + return NULL; + + rxq->ndev = ndev; + rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; + rxq->rxq_idx = rxq_idx; + rxq->datasize = ALIGN(MAX_FRAME_SIZE, 64); + rxq->rxobj = INVALID_MANA_HANDLE; + + err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); + if (err) + goto out; + + rq_size = PAGE_ALIGN(rq_size); + cq_size = PAGE_ALIGN(cq_size); + + /* Create RQ */ + memset(&spec, 0, sizeof(spec)); + spec.type = GDMA_RQ; + spec.monitor_avl_buf = true; + spec.queue_size = rq_size; + err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq); + if (err) + goto out; + + /* Create RQ's CQ */ + cq = &rxq->rx_cq; + cq->gdma_comp_buf = eq->cqe_poll; + cq->type = MANA_CQ_TYPE_RX; + cq->rxq = rxq; + + memset(&spec, 0, sizeof(spec)); + spec.type = GDMA_CQ; + spec.monitor_avl_buf = false; + spec.queue_size = cq_size; + spec.cq.callback = mana_cq_handler; + spec.cq.parent_eq = eq->eq; + spec.cq.context = cq; + err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); + if (err) + goto out; + + memset(&wq_spec, 0, sizeof(wq_spec)); + memset(&cq_spec, 0, sizeof(cq_spec)); + wq_spec.gdma_region = rxq->gdma_rq->mem_info.gdma_region; + wq_spec.queue_size = rxq->gdma_rq->queue_size; + + cq_spec.gdma_region = cq->gdma_cq->mem_info.gdma_region; + cq_spec.queue_size = cq->gdma_cq->queue_size; + cq_spec.modr_ctx_id = 0; + cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; + + err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ, + &wq_spec, &cq_spec, &rxq->rxobj); + if (err) + goto out; + + rxq->gdma_rq->id = wq_spec.queue_index; + cq->gdma_cq->id = cq_spec.queue_index; + + rxq->gdma_rq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; + cq->gdma_cq->mem_info.gdma_region = GDMA_INVALID_DMA_REGION; + + rxq->gdma_id = rxq->gdma_rq->id; + cq->gdma_id = cq->gdma_cq->id; + + err = mana_push_wqe(rxq); + if (err) + goto out; + + if (cq->gdma_id >= gc->max_num_cqs) + goto out; + + gc->cq_table[cq->gdma_id] = cq->gdma_cq; + + mana_gd_arm_cq(cq->gdma_cq); +out: + if (!err) + return rxq; + + netdev_err(ndev, "Failed to create RXQ: err = %d\n", err); + + mana_destroy_rxq(apc, rxq, false); + + if (cq) + mana_deinit_cq(apc, cq); + + return NULL; +} + +static int mana_add_rx_queues(struct mana_port_context *apc, + struct net_device *ndev) +{ + struct mana_rxq *rxq; + int err = 0; + int i; + + for (i = 0; i < apc->num_queues; i++) { + rxq = mana_create_rxq(apc, i, &apc->eqs[i], ndev); + if (!rxq) { + err = -ENOMEM; + goto out; + } + + u64_stats_init(&rxq->stats.syncp); + + apc->rxqs[i] = rxq; + } + + apc->default_rxobj = apc->rxqs[0]->rxobj; +out: + return err; +} + +static void mana_destroy_vport(struct mana_port_context *apc) +{ + struct mana_rxq *rxq; + u32 rxq_idx; + + for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { + rxq = apc->rxqs[rxq_idx]; + if (!rxq) + continue; + + mana_destroy_rxq(apc, rxq, true); + apc->rxqs[rxq_idx] = NULL; + } + + mana_destroy_txq(apc); +} + +static int mana_create_vport(struct mana_port_context *apc, + struct net_device *net) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + int err; + + apc->default_rxobj = INVALID_MANA_HANDLE; + + err = mana_cfg_vport(apc, gd->pdid, gd->doorbell); + if (err) + return err; + + return mana_create_txq(apc, net); +} + +static void mana_rss_table_init(struct mana_port_context *apc) +{ + int i; + + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + apc->indir_table[i] = + ethtool_rxfh_indir_default(i, apc->num_queues); +} + +int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, + bool update_hash, bool update_tab) +{ + u32 queue_idx; + int i; + + if (update_tab) { + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + queue_idx = apc->indir_table[i]; + apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; + } + } + + return mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab); +} + +static int mana_init_port(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + u32 max_txq, max_rxq, max_queues; + int port_idx = apc->port_idx; + u32 num_indirect_entries; + int err; + + err = mana_init_port_context(apc); + if (err) + return err; + + err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, + &num_indirect_entries); + if (err) { + netdev_err(ndev, "Failed to query info for vPort 0\n"); + goto reset_apc; + } + + max_queues = min_t(u32, max_txq, max_rxq); + if (apc->max_queues > max_queues) + apc->max_queues = max_queues; + + if (apc->num_queues > apc->max_queues) + apc->num_queues = apc->max_queues; + + ether_addr_copy(ndev->dev_addr, apc->mac_addr); + + return 0; + +reset_apc: + kfree(apc->rxqs); + apc->rxqs = NULL; + return err; +} + +int mana_alloc_queues(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + struct gdma_dev *gd = apc->ac->gdma_dev; + int err; + + err = mana_create_eq(apc); + if (err) + return err; + + err = mana_create_vport(apc, ndev); + if (err) + goto destroy_eq; + + err = netif_set_real_num_tx_queues(ndev, apc->num_queues); + if (err) + goto destroy_vport; + + err = mana_add_rx_queues(apc, ndev); + if (err) + goto destroy_vport; + + apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; + + err = netif_set_real_num_rx_queues(ndev, apc->num_queues); + if (err) + goto destroy_vport; + + mana_rss_table_init(apc); + + err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); + if (err) + goto destroy_vport; + + return 0; + +destroy_vport: + mana_destroy_vport(apc); +destroy_eq: + mana_destroy_eq(gd->gdma_context, apc); + return err; +} + +int mana_attach(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + int err; + + ASSERT_RTNL(); + + err = mana_init_port(ndev); + if (err) + return err; + + err = mana_alloc_queues(ndev); + if (err) { + kfree(apc->rxqs); + apc->rxqs = NULL; + return err; + } + + netif_device_attach(ndev); + + apc->port_is_up = apc->port_st_save; + + /* Ensure port state updated before txq state */ + smp_wmb(); + + if (apc->port_is_up) { + netif_carrier_on(ndev); + netif_tx_wake_all_queues(ndev); + } + + return 0; +} + +static int mana_dealloc_queues(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + struct mana_txq *txq; + int i, err; + + if (apc->port_is_up) + return -EINVAL; + + /* No packet can be transmitted now since apc->port_is_up is false. + * There is still a tiny chance that mana_poll_tx_cq() can re-enable + * a txq because it may not timely see apc->port_is_up being cleared + * to false, but it doesn't matter since mana_start_xmit() drops any + * new packets due to apc->port_is_up being false. + * + * Drain all the in-flight TX packets + */ + for (i = 0; i < apc->num_queues; i++) { + txq = &apc->tx_qp[i].txq; + + while (atomic_read(&txq->pending_sends) > 0) + usleep_range(1000, 2000); + } + + /* We're 100% sure the queues can no longer be woken up, because + * we're sure now mana_poll_tx_cq() can't be running. + */ + + apc->rss_state = TRI_STATE_FALSE; + err = mana_config_rss(apc, TRI_STATE_FALSE, false, false); + if (err) { + netdev_err(ndev, "Failed to disable vPort: %d\n", err); + return err; + } + + /* TODO: Implement RX fencing */ + ssleep(1); + + mana_destroy_vport(apc); + + mana_destroy_eq(apc->ac->gdma_dev->gdma_context, apc); + + return 0; +} + +int mana_detach(struct net_device *ndev, bool from_close) +{ + struct mana_port_context *apc = netdev_priv(ndev); + int err; + + ASSERT_RTNL(); + + apc->port_st_save = apc->port_is_up; + apc->port_is_up = false; + + /* Ensure port state updated before txq state */ + smp_wmb(); + + netif_tx_disable(ndev); + netif_carrier_off(ndev); + + if (apc->port_st_save) { + err = mana_dealloc_queues(ndev); + if (err) + return err; + } + + if (!from_close) { + netif_device_detach(ndev); + mana_cleanup_port_context(apc); + } + + return 0; +} + +static int mana_probe_port(struct mana_context *ac, int port_idx, + struct net_device **ndev_storage) +{ + struct gdma_context *gc = ac->gdma_dev->gdma_context; + struct mana_port_context *apc; + struct net_device *ndev; + int err; + + ndev = alloc_etherdev_mq(sizeof(struct mana_port_context), + gc->max_num_queues); + if (!ndev) + return -ENOMEM; + + *ndev_storage = ndev; + + apc = netdev_priv(ndev); + apc->ac = ac; + apc->ndev = ndev; + apc->max_queues = gc->max_num_queues; + apc->num_queues = min_t(uint, gc->max_num_queues, MANA_MAX_NUM_QUEUES); + apc->port_handle = INVALID_MANA_HANDLE; + apc->port_idx = port_idx; + + ndev->netdev_ops = &mana_devops; + ndev->ethtool_ops = &mana_ethtool_ops; + ndev->mtu = ETH_DATA_LEN; + ndev->max_mtu = ndev->mtu; + ndev->min_mtu = ndev->mtu; + ndev->needed_headroom = MANA_HEADROOM; + SET_NETDEV_DEV(ndev, gc->dev); + + netif_carrier_off(ndev); + + netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); + + err = mana_init_port(ndev); + if (err) + goto free_net; + + netdev_lockdep_set_classes(ndev); + + ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + ndev->hw_features |= NETIF_F_RXCSUM; + ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + ndev->hw_features |= NETIF_F_RXHASH; + ndev->features = ndev->hw_features; + ndev->vlan_features = 0; + + err = register_netdev(ndev); + if (err) { + netdev_err(ndev, "Unable to register netdev.\n"); + goto reset_apc; + } + + return 0; + +reset_apc: + kfree(apc->rxqs); + apc->rxqs = NULL; +free_net: + *ndev_storage = NULL; + netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err); + free_netdev(ndev); + return err; +} + +int mana_probe(struct gdma_dev *gd) +{ + struct gdma_context *gc = gd->gdma_context; + struct device *dev = gc->dev; + struct mana_context *ac; + int err; + int i; + + dev_info(dev, + "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n", + MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION); + + err = mana_gd_register_device(gd); + if (err) + return err; + + ac = kzalloc(sizeof(*ac), GFP_KERNEL); + if (!ac) + return -ENOMEM; + + ac->gdma_dev = gd; + ac->num_ports = 1; + gd->driver_data = ac; + + err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, + MANA_MICRO_VERSION, &ac->num_ports); + if (err) + goto out; + + if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) + ac->num_ports = MAX_PORTS_IN_MANA_DEV; + + for (i = 0; i < ac->num_ports; i++) { + err = mana_probe_port(ac, i, &ac->ports[i]); + if (err) + break; + } +out: + if (err) + mana_remove(gd); + + return err; +} + +void mana_remove(struct gdma_dev *gd) +{ + struct gdma_context *gc = gd->gdma_context; + struct mana_context *ac = gd->driver_data; + struct device *dev = gc->dev; + struct net_device *ndev; + int i; + + for (i = 0; i < ac->num_ports; i++) { + ndev = ac->ports[i]; + if (!ndev) { + if (i == 0) + dev_err(dev, "No net device to remove\n"); + goto out; + } + + /* All cleanup actions should stay after rtnl_lock(), otherwise + * other functions may access partially cleaned up data. + */ + rtnl_lock(); + + mana_detach(ndev, false); + + unregister_netdevice(ndev); + + rtnl_unlock(); + + free_netdev(ndev); + } +out: + mana_gd_deregister_device(gd); + gd->driver_data = NULL; + gd->gdma_context = NULL; + kfree(ac); +} diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c new file mode 100644 index 000000000000..7e74339f39ae --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> + +#include "mana.h" + +static const struct { + char name[ETH_GSTRING_LEN]; + u16 offset; +} mana_eth_stats[] = { + {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, + {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, +}; + +static int mana_get_sset_count(struct net_device *ndev, int stringset) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int num_queues = apc->num_queues; + + if (stringset != ETH_SS_STATS) + return -EINVAL; + + return ARRAY_SIZE(mana_eth_stats) + num_queues * 4; +} + +static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int num_queues = apc->num_queues; + u8 *p = data; + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) { + memcpy(p, mana_eth_stats[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < num_queues; i++) { + sprintf(p, "rx_%d_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_%d_bytes", i); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < num_queues; i++) { + sprintf(p, "tx_%d_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_bytes", i); + p += ETH_GSTRING_LEN; + } +} + +static void mana_get_ethtool_stats(struct net_device *ndev, + struct ethtool_stats *e_stats, u64 *data) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int num_queues = apc->num_queues; + void *eth_stats = &apc->eth_stats; + struct mana_stats *stats; + unsigned int start; + u64 packets, bytes; + int q, i = 0; + + if (!apc->port_is_up) + return; + + for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++) + data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset); + + for (q = 0; q < num_queues; q++) { + stats = &apc->rxqs[q]->stats; + + do { + start = u64_stats_fetch_begin_irq(&stats->syncp); + packets = stats->packets; + bytes = stats->bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + data[i++] = packets; + data[i++] = bytes; + } + + for (q = 0; q < num_queues; q++) { + stats = &apc->tx_qp[q].txq.stats; + + do { + start = u64_stats_fetch_begin_irq(&stats->syncp); + packets = stats->packets; + bytes = stats->bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + data[i++] = packets; + data[i++] = bytes; + } +} + +static int mana_get_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *cmd, + u32 *rules) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = apc->num_queues; + return 0; + } + + return -EOPNOTSUPP; +} + +static u32 mana_get_rxfh_key_size(struct net_device *ndev) +{ + return MANA_HASH_KEY_SIZE; +} + +static u32 mana_rss_indir_size(struct net_device *ndev) +{ + return MANA_INDIRECT_TABLE_SIZE; +} + +static int mana_get_rxfh(struct net_device *ndev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct mana_port_context *apc = netdev_priv(ndev); + int i; + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ + + if (indir) { + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + indir[i] = apc->indir_table[i]; + } + + if (key) + memcpy(key, apc->hashkey, MANA_HASH_KEY_SIZE); + + return 0; +} + +static int mana_set_rxfh(struct net_device *ndev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct mana_port_context *apc = netdev_priv(ndev); + bool update_hash = false, update_table = false; + u32 save_table[MANA_INDIRECT_TABLE_SIZE]; + u8 save_key[MANA_HASH_KEY_SIZE]; + int i, err; + + if (!apc->port_is_up) + return -EOPNOTSUPP; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + + if (indir) { + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + if (indir[i] >= apc->num_queues) + return -EINVAL; + + update_table = true; + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + save_table[i] = apc->indir_table[i]; + apc->indir_table[i] = indir[i]; + } + } + + if (key) { + update_hash = true; + memcpy(save_key, apc->hashkey, MANA_HASH_KEY_SIZE); + memcpy(apc->hashkey, key, MANA_HASH_KEY_SIZE); + } + + err = mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); + + if (err) { /* recover to original values */ + if (update_table) { + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + apc->indir_table[i] = save_table[i]; + } + + if (update_hash) + memcpy(apc->hashkey, save_key, MANA_HASH_KEY_SIZE); + + mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); + } + + return err; +} + +static void mana_get_channels(struct net_device *ndev, + struct ethtool_channels *channel) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + channel->max_combined = apc->max_queues; + channel->combined_count = apc->num_queues; +} + +static int mana_set_channels(struct net_device *ndev, + struct ethtool_channels *channels) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int new_count = channels->combined_count; + unsigned int old_count = apc->num_queues; + int err, err2; + + if (!apc->port_is_up) + return -EOPNOTSUPP; + + err = mana_detach(ndev, false); + if (err) { + netdev_err(ndev, "mana_detach failed: %d\n", err); + return err; + } + + apc->num_queues = new_count; + err = mana_attach(ndev); + if (!err) + return 0; + + netdev_err(ndev, "mana_attach failed: %d\n", err); + + /* Try to roll it back to the old configuration. */ + apc->num_queues = old_count; + err2 = mana_attach(ndev); + if (err2) + netdev_err(ndev, "mana re-attach failed: %d\n", err2); + + return err; +} + +const struct ethtool_ops mana_ethtool_ops = { + .get_ethtool_stats = mana_get_ethtool_stats, + .get_sset_count = mana_get_sset_count, + .get_strings = mana_get_strings, + .get_rxnfc = mana_get_rxnfc, + .get_rxfh_key_size = mana_get_rxfh_key_size, + .get_rxfh_indir_size = mana_rss_indir_size, + .get_rxfh = mana_get_rxfh, + .set_rxfh = mana_set_rxfh, + .get_channels = mana_get_channels, + .set_channels = mana_set_channels, +}; diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c new file mode 100644 index 000000000000..da255da62176 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/mm.h> + +#include "shm_channel.h" + +#define PAGE_FRAME_L48_WIDTH_BYTES 6 +#define PAGE_FRAME_L48_WIDTH_BITS (PAGE_FRAME_L48_WIDTH_BYTES * 8) +#define PAGE_FRAME_L48_MASK 0x0000FFFFFFFFFFFF +#define PAGE_FRAME_H4_WIDTH_BITS 4 +#define VECTOR_MASK 0xFFFF +#define SHMEM_VF_RESET_STATE ((u32)-1) + +#define SMC_MSG_TYPE_ESTABLISH_HWC 1 +#define SMC_MSG_TYPE_ESTABLISH_HWC_VERSION 0 + +#define SMC_MSG_TYPE_DESTROY_HWC 2 +#define SMC_MSG_TYPE_DESTROY_HWC_VERSION 0 + +#define SMC_MSG_DIRECTION_REQUEST 0 +#define SMC_MSG_DIRECTION_RESPONSE 1 + +/* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +/* Shared memory channel protocol header + * + * msg_type: set on request and response; response matches request. + * msg_version: newer PF writes back older response (matching request) + * older PF acts on latest version known and sets that version in result + * (less than request). + * direction: 0 for request, VF->PF; 1 for response, PF->VF. + * status: 0 on request, + * operation result on response (success = 0, failure = 1 or greater). + * reset_vf: If set on either establish or destroy request, indicates perform + * FLR before/after the operation. + * owner_is_pf: 1 indicates PF owned, 0 indicates VF owned. + */ +union smc_proto_hdr { + u32 as_uint32; + + struct { + u8 msg_type : 3; + u8 msg_version : 3; + u8 reserved_1 : 1; + u8 direction : 1; + + u8 status; + + u8 reserved_2; + + u8 reset_vf : 1; + u8 reserved_3 : 6; + u8 owner_is_pf : 1; + }; +}; /* HW DATA */ + +#define SMC_APERTURE_BITS 256 +#define SMC_BASIC_UNIT (sizeof(u32)) +#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8)) +#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1) + +static int mana_smc_poll_register(void __iomem *base, bool reset) +{ + void __iomem *ptr = base + SMC_LAST_DWORD * SMC_BASIC_UNIT; + u32 last_dword; + int i; + + /* Poll the hardware for the ownership bit. This should be pretty fast, + * but let's do it in a loop just in case the hardware or the PF + * driver are temporarily busy. + */ + for (i = 0; i < 20 * 1000; i++) { + last_dword = readl(ptr); + + /* shmem reads as 0xFFFFFFFF in the reset case */ + if (reset && last_dword == SHMEM_VF_RESET_STATE) + return 0; + + /* If bit_31 is set, the PF currently owns the SMC. */ + if (!(last_dword & BIT(31))) + return 0; + + usleep_range(1000, 2000); + } + + return -ETIMEDOUT; +} + +static int mana_smc_read_response(struct shm_channel *sc, u32 msg_type, + u32 msg_version, bool reset_vf) +{ + void __iomem *base = sc->base; + union smc_proto_hdr hdr; + int err; + + /* Wait for PF to respond. */ + err = mana_smc_poll_register(base, reset_vf); + if (err) + return err; + + hdr.as_uint32 = readl(base + SMC_LAST_DWORD * SMC_BASIC_UNIT); + + if (reset_vf && hdr.as_uint32 == SHMEM_VF_RESET_STATE) + return 0; + + /* Validate protocol fields from the PF driver */ + if (hdr.msg_type != msg_type || hdr.msg_version > msg_version || + hdr.direction != SMC_MSG_DIRECTION_RESPONSE) { + dev_err(sc->dev, "Wrong SMC response 0x%x, type=%d, ver=%d\n", + hdr.as_uint32, msg_type, msg_version); + return -EPROTO; + } + + /* Validate the operation result */ + if (hdr.status != 0) { + dev_err(sc->dev, "SMC operation failed: 0x%x\n", hdr.status); + return -EPROTO; + } + + return 0; +} + +void mana_smc_init(struct shm_channel *sc, struct device *dev, + void __iomem *base) +{ + sc->dev = dev; + sc->base = base; +} + +int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, + u64 cq_addr, u64 rq_addr, u64 sq_addr, + u32 eq_msix_index) +{ + union smc_proto_hdr *hdr; + u16 all_addr_h4bits = 0; + u16 frame_addr_seq = 0; + u64 frame_addr = 0; + u8 shm_buf[32]; + u64 *shmem; + u32 *dword; + u8 *ptr; + int err; + int i; + + /* Ensure VF already has possession of shared memory */ + err = mana_smc_poll_register(sc->base, false); + if (err) { + dev_err(sc->dev, "Timeout when setting up HWC: %d\n", err); + return err; + } + + if (!PAGE_ALIGNED(eq_addr) || !PAGE_ALIGNED(cq_addr) || + !PAGE_ALIGNED(rq_addr) || !PAGE_ALIGNED(sq_addr)) + return -EINVAL; + + if ((eq_msix_index & VECTOR_MASK) != eq_msix_index) + return -EINVAL; + + /* Scheme for packing four addresses and extra info into 256 bits. + * + * Addresses must be page frame aligned, so only frame address bits + * are transferred. + * + * 52-bit frame addresses are split into the lower 48 bits and upper + * 4 bits. Lower 48 bits of 4 address are written sequentially from + * the start of the 256-bit shared memory region followed by 16 bits + * containing the upper 4 bits of the 4 addresses in sequence. + * + * A 16 bit EQ vector number fills out the next-to-last 32-bit dword. + * + * The final 32-bit dword is used for protocol control information as + * defined in smc_proto_hdr. + */ + + memset(shm_buf, 0, sizeof(shm_buf)); + ptr = shm_buf; + + /* EQ addr: low 48 bits of frame address */ + shmem = (u64 *)ptr; + frame_addr = PHYS_PFN(eq_addr); + *shmem = frame_addr & PAGE_FRAME_L48_MASK; + all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << + (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); + ptr += PAGE_FRAME_L48_WIDTH_BYTES; + + /* CQ addr: low 48 bits of frame address */ + shmem = (u64 *)ptr; + frame_addr = PHYS_PFN(cq_addr); + *shmem = frame_addr & PAGE_FRAME_L48_MASK; + all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << + (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); + ptr += PAGE_FRAME_L48_WIDTH_BYTES; + + /* RQ addr: low 48 bits of frame address */ + shmem = (u64 *)ptr; + frame_addr = PHYS_PFN(rq_addr); + *shmem = frame_addr & PAGE_FRAME_L48_MASK; + all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << + (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); + ptr += PAGE_FRAME_L48_WIDTH_BYTES; + + /* SQ addr: low 48 bits of frame address */ + shmem = (u64 *)ptr; + frame_addr = PHYS_PFN(sq_addr); + *shmem = frame_addr & PAGE_FRAME_L48_MASK; + all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << + (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); + ptr += PAGE_FRAME_L48_WIDTH_BYTES; + + /* High 4 bits of the four frame addresses */ + *((u16 *)ptr) = all_addr_h4bits; + ptr += sizeof(u16); + + /* EQ MSIX vector number */ + *((u16 *)ptr) = (u16)eq_msix_index; + ptr += sizeof(u16); + + /* 32-bit protocol header in final dword */ + *((u32 *)ptr) = 0; + + hdr = (union smc_proto_hdr *)ptr; + hdr->msg_type = SMC_MSG_TYPE_ESTABLISH_HWC; + hdr->msg_version = SMC_MSG_TYPE_ESTABLISH_HWC_VERSION; + hdr->direction = SMC_MSG_DIRECTION_REQUEST; + hdr->reset_vf = reset_vf; + + /* Write 256-message buffer to shared memory (final 32-bit write + * triggers HW to set possession bit to PF). + */ + dword = (u32 *)shm_buf; + for (i = 0; i < SMC_APERTURE_DWORDS; i++) + writel(*dword++, sc->base + i * SMC_BASIC_UNIT); + + /* Read shmem response (polling for VF possession) and validate. + * For setup, waiting for response on shared memory is not strictly + * necessary, since wait occurs later for results to appear in EQE's. + */ + err = mana_smc_read_response(sc, SMC_MSG_TYPE_ESTABLISH_HWC, + SMC_MSG_TYPE_ESTABLISH_HWC_VERSION, + reset_vf); + if (err) { + dev_err(sc->dev, "Error when setting up HWC: %d\n", err); + return err; + } + + return 0; +} + +int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf) +{ + union smc_proto_hdr hdr = {}; + int err; + + /* Ensure already has possession of shared memory */ + err = mana_smc_poll_register(sc->base, false); + if (err) { + dev_err(sc->dev, "Timeout when tearing down HWC\n"); + return err; + } + + /* Set up protocol header for HWC destroy message */ + hdr.msg_type = SMC_MSG_TYPE_DESTROY_HWC; + hdr.msg_version = SMC_MSG_TYPE_DESTROY_HWC_VERSION; + hdr.direction = SMC_MSG_DIRECTION_REQUEST; + hdr.reset_vf = reset_vf; + + /* Write message in high 32 bits of 256-bit shared memory, causing HW + * to set possession bit to PF. + */ + writel(hdr.as_uint32, sc->base + SMC_LAST_DWORD * SMC_BASIC_UNIT); + + /* Read shmem response (polling for VF possession) and validate. + * For teardown, waiting for response is required to ensure hardware + * invalidates MST entries before software frees memory. + */ + err = mana_smc_read_response(sc, SMC_MSG_TYPE_DESTROY_HWC, + SMC_MSG_TYPE_DESTROY_HWC_VERSION, + reset_vf); + if (err) { + dev_err(sc->dev, "Error when tearing down HWC: %d\n", err); + return err; + } + + return 0; +} diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.h b/drivers/net/ethernet/microsoft/mana/shm_channel.h new file mode 100644 index 000000000000..5199b41497ff --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _SHM_CHANNEL_H +#define _SHM_CHANNEL_H + +struct shm_channel { + struct device *dev; + void __iomem *base; +}; + +void mana_smc_init(struct shm_channel *sc, struct device *dev, + void __iomem *base); + +int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, + u64 cq_addr, u64 rq_addr, u64 sq_addr, + u32 eq_msix_index); + +int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf); + +#endif /* _SHM_CHANNEL_H */ diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c index 49fd843c4c8a..b85733942053 100644 --- a/drivers/net/ethernet/moxa/moxart_ether.c +++ b/drivers/net/ethernet/moxa/moxart_ether.c @@ -485,7 +485,6 @@ static int moxart_mac_probe(struct platform_device *pdev) ndev->base_addr = res->start; priv->base = devm_ioremap_resource(p_dev, res); if (IS_ERR(priv->base)) { - dev_err(p_dev, "devm_ioremap_resource failed\n"); ret = PTR_ERR(priv->base); goto init_fail; } diff --git a/drivers/net/ethernet/mscc/Kconfig b/drivers/net/ethernet/mscc/Kconfig index 05cb040c2677..2d3157e4d081 100644 --- a/drivers/net/ethernet/mscc/Kconfig +++ b/drivers/net/ethernet/mscc/Kconfig @@ -11,7 +11,7 @@ config NET_VENDOR_MICROSEMI if NET_VENDOR_MICROSEMI -# Users should depend on NET_SWITCHDEV, HAS_IOMEM +# Users should depend on NET_SWITCHDEV, HAS_IOMEM, BRIDGE config MSCC_OCELOT_SWITCH_LIB select NET_DEVLINK select REGMAP_MMIO @@ -24,6 +24,7 @@ config MSCC_OCELOT_SWITCH_LIB config MSCC_OCELOT_SWITCH tristate "Ocelot switch driver" + depends on BRIDGE || BRIDGE=n depends on NET_SWITCHDEV depends on HAS_IOMEM depends on OF_NET diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 46e5c9136bac..0c4283319d7f 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -6,6 +6,7 @@ */ #include <linux/dsa/ocelot.h> #include <linux/if_bridge.h> +#include <linux/ptp_classify.h> #include <soc/mscc/ocelot_vcap.h> #include "ocelot.h" #include "ocelot_vcap.h" @@ -484,7 +485,8 @@ void ocelot_adjust_link(struct ocelot *ocelot, int port, DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG); /* Take MAC, Port, Phy (intern) and PCS (SGMII/Serdes) clock out of - * reset */ + * reset + */ ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(speed), DEV_CLOCK_CFG); @@ -529,22 +531,92 @@ void ocelot_port_disable(struct ocelot *ocelot, int port) } EXPORT_SYMBOL(ocelot_port_disable); -void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, - struct sk_buff *clone) +static void ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port, + struct sk_buff *clone) { struct ocelot_port *ocelot_port = ocelot->ports[port]; spin_lock(&ocelot_port->ts_id_lock); skb_shinfo(clone)->tx_flags |= SKBTX_IN_PROGRESS; - /* Store timestamp ID in cb[0] of sk_buff */ - clone->cb[0] = ocelot_port->ts_id; + /* Store timestamp ID in OCELOT_SKB_CB(clone)->ts_id */ + OCELOT_SKB_CB(clone)->ts_id = ocelot_port->ts_id; ocelot_port->ts_id = (ocelot_port->ts_id + 1) % 4; skb_queue_tail(&ocelot_port->tx_skbs, clone); spin_unlock(&ocelot_port->ts_id_lock); } -EXPORT_SYMBOL(ocelot_port_add_txtstamp_skb); + +u32 ocelot_ptp_rew_op(struct sk_buff *skb) +{ + struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone; + u8 ptp_cmd = OCELOT_SKB_CB(skb)->ptp_cmd; + u32 rew_op = 0; + + if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP && clone) { + rew_op = ptp_cmd; + rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3; + } else if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) { + rew_op = ptp_cmd; + } + + return rew_op; +} +EXPORT_SYMBOL(ocelot_ptp_rew_op); + +static bool ocelot_ptp_is_onestep_sync(struct sk_buff *skb) +{ + struct ptp_header *hdr; + unsigned int ptp_class; + u8 msgtype, twostep; + + ptp_class = ptp_classify_raw(skb); + if (ptp_class == PTP_CLASS_NONE) + return false; + + hdr = ptp_parse_header(skb, ptp_class); + if (!hdr) + return false; + + msgtype = ptp_get_msgtype(hdr, ptp_class); + twostep = hdr->flag_field[0] & 0x2; + + if (msgtype == PTP_MSGTYPE_SYNC && twostep == 0) + return true; + + return false; +} + +int ocelot_port_txtstamp_request(struct ocelot *ocelot, int port, + struct sk_buff *skb, + struct sk_buff **clone) +{ + struct ocelot_port *ocelot_port = ocelot->ports[port]; + u8 ptp_cmd = ocelot_port->ptp_cmd; + + /* Store ptp_cmd in OCELOT_SKB_CB(skb)->ptp_cmd */ + if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) { + if (ocelot_ptp_is_onestep_sync(skb)) { + OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd; + return 0; + } + + /* Fall back to two-step timestamping */ + ptp_cmd = IFH_REW_OP_TWO_STEP_PTP; + } + + if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { + *clone = skb_clone_sk(skb); + if (!(*clone)) + return -ENOMEM; + + ocelot_port_add_txtstamp_skb(ocelot, port, *clone); + OCELOT_SKB_CB(skb)->ptp_cmd = ptp_cmd; + } + + return 0; +} +EXPORT_SYMBOL(ocelot_port_txtstamp_request); static void ocelot_get_hwtimestamp(struct ocelot *ocelot, struct timespec64 *ts) @@ -603,7 +675,7 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) spin_lock_irqsave(&port->tx_skbs.lock, flags); skb_queue_walk_safe(&port->tx_skbs, skb, skb_tmp) { - if (skb->cb[0] != id) + if (OCELOT_SKB_CB(skb)->ts_id != id) continue; __skb_unlink(skb, &port->tx_skbs); skb_match = skb; @@ -687,7 +759,7 @@ static int ocelot_xtr_poll_xfh(struct ocelot *ocelot, int grp, u32 *xfh) int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) { struct skb_shared_hwtstamps *shhwtstamps; - u64 tod_in_ns, full_ts_in_ns, cpuq; + u64 tod_in_ns, full_ts_in_ns; u64 timestamp, src_port, len; u32 xfh[OCELOT_TAG_LEN / 4]; struct net_device *dev; @@ -704,7 +776,6 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) ocelot_xfh_get_src_port(xfh, &src_port); ocelot_xfh_get_len(xfh, &len); ocelot_xfh_get_rew_val(xfh, ×tamp); - ocelot_xfh_get_cpuq(xfh, &cpuq); if (WARN_ON(src_port >= ocelot->num_phys_ports)) return -EINVAL; @@ -767,17 +838,11 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) /* Everything we see on an interface that is in the HW bridge * has already been forwarded. */ - if (ocelot->bridge_mask & BIT(src_port)) + if (ocelot->ports[src_port]->bridge) skb->offload_fwd_mark = 1; skb->protocol = eth_type_trans(skb, dev); -#if IS_ENABLED(CONFIG_BRIDGE_MRP) - if (skb->protocol == cpu_to_be16(ETH_P_MRP) && - cpuq & BIT(OCELOT_MRP_CPUQ)) - skb->offload_fwd_mark = 0; -#endif - *nskb = skb; return 0; @@ -1190,6 +1255,26 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond, return mask; } +static u32 ocelot_get_bridge_fwd_mask(struct ocelot *ocelot, + struct net_device *bridge) +{ + u32 mask = 0; + int port; + + for (port = 0; port < ocelot->num_phys_ports; port++) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + if (!ocelot_port) + continue; + + if (ocelot_port->stp_state == BR_STATE_FORWARDING && + ocelot_port->bridge == bridge) + mask |= BIT(port); + } + + return mask; +} + static u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot) { u32 mask = 0; @@ -1239,10 +1324,12 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot) */ mask = GENMASK(ocelot->num_phys_ports - 1, 0); mask &= ~cpu_fwd_mask; - } else if (ocelot->bridge_fwd_mask & BIT(port)) { + } else if (ocelot_port->bridge) { + struct net_device *bridge = ocelot_port->bridge; struct net_device *bond = ocelot_port->bond; - mask = ocelot->bridge_fwd_mask & ~BIT(port); + mask = ocelot_get_bridge_fwd_mask(ocelot, bridge); + mask &= ~BIT(port); if (bond) { mask &= ~ocelot_get_bond_mask(ocelot, bond, false); @@ -1263,29 +1350,16 @@ EXPORT_SYMBOL(ocelot_apply_bridge_fwd_mask); void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - u32 port_cfg; + u32 learn_ena = 0; - if (!(BIT(port) & ocelot->bridge_mask)) - return; + ocelot_port->stp_state = state; - port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, port); + if ((state == BR_STATE_LEARNING || state == BR_STATE_FORWARDING) && + ocelot_port->learn_ena) + learn_ena = ANA_PORT_PORT_CFG_LEARN_ENA; - switch (state) { - case BR_STATE_FORWARDING: - ocelot->bridge_fwd_mask |= BIT(port); - fallthrough; - case BR_STATE_LEARNING: - if (ocelot_port->learn_ena) - port_cfg |= ANA_PORT_PORT_CFG_LEARN_ENA; - break; - - default: - port_cfg &= ~ANA_PORT_PORT_CFG_LEARN_ENA; - ocelot->bridge_fwd_mask &= ~BIT(port); - break; - } - - ocelot_write_gix(ocelot, port_cfg, ANA_PORT_PORT_CFG, port); + ocelot_rmw_gix(ocelot, learn_ena, ANA_PORT_PORT_CFG_LEARN_ENA, + ANA_PORT_PORT_CFG, port); ocelot_apply_bridge_fwd_mask(ocelot); } @@ -1512,43 +1586,28 @@ int ocelot_port_mdb_del(struct ocelot *ocelot, int port, } EXPORT_SYMBOL(ocelot_port_mdb_del); -int ocelot_port_bridge_join(struct ocelot *ocelot, int port, - struct net_device *bridge) +void ocelot_port_bridge_join(struct ocelot *ocelot, int port, + struct net_device *bridge) { - if (!ocelot->bridge_mask) { - ocelot->hw_bridge_dev = bridge; - } else { - if (ocelot->hw_bridge_dev != bridge) - /* This is adding the port to a second bridge, this is - * unsupported */ - return -ENODEV; - } + struct ocelot_port *ocelot_port = ocelot->ports[port]; - ocelot->bridge_mask |= BIT(port); + ocelot_port->bridge = bridge; - return 0; + ocelot_apply_bridge_fwd_mask(ocelot); } EXPORT_SYMBOL(ocelot_port_bridge_join); -int ocelot_port_bridge_leave(struct ocelot *ocelot, int port, - struct net_device *bridge) +void ocelot_port_bridge_leave(struct ocelot *ocelot, int port, + struct net_device *bridge) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; struct ocelot_vlan pvid = {0}, native_vlan = {0}; - int ret; - ocelot->bridge_mask &= ~BIT(port); - - if (!ocelot->bridge_mask) - ocelot->hw_bridge_dev = NULL; - - ret = ocelot_port_vlan_filtering(ocelot, port, false); - if (ret) - return ret; + ocelot_port->bridge = NULL; ocelot_port_set_pvid(ocelot, port, pvid); ocelot_port_set_native_vlan(ocelot, port, native_vlan); - - return 0; + ocelot_apply_bridge_fwd_mask(ocelot); } EXPORT_SYMBOL(ocelot_port_bridge_leave); @@ -2051,6 +2110,9 @@ int ocelot_init(struct ocelot *ocelot) ocelot_write_rix(ocelot, val, ANA_PGID_PGID, i); } + + ocelot_write_rix(ocelot, 0, ANA_PGID_PGID, PGID_BLACKHOLE); + /* Allow broadcast and unknown L2 multicast to the CPU. */ ocelot_rmw_rix(ocelot, ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)), ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)), diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index a41b458b1b3e..8b843d3c9189 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -220,6 +220,11 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, "Last action must be GOTO"); return -EOPNOTSUPP; } + if (a->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } filter->action.police_ena = true; rate = a->police.rate_bytes_ps; filter->action.pol.rate = div_u64(rate, 1000) * 8; diff --git a/drivers/net/ethernet/mscc/ocelot_mrp.c b/drivers/net/ethernet/mscc/ocelot_mrp.c index 683da320bfd8..08b481a93460 100644 --- a/drivers/net/ethernet/mscc/ocelot_mrp.c +++ b/drivers/net/ethernet/mscc/ocelot_mrp.c @@ -1,9 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) /* Microsemi Ocelot Switch driver * - * This contains glue logic between the switchdev driver operations and the - * mscc_ocelot_switch_lib. - * * Copyright (c) 2017, 2019 Microsemi Corporation * Copyright 2020-2021 NXP Semiconductors */ @@ -15,13 +12,34 @@ #include "ocelot.h" #include "ocelot_vcap.h" -static int ocelot_mrp_del_vcap(struct ocelot *ocelot, int port) +static const u8 mrp_test_dmac[] = { 0x01, 0x15, 0x4e, 0x00, 0x00, 0x01 }; +static const u8 mrp_control_dmac[] = { 0x01, 0x15, 0x4e, 0x00, 0x00, 0x02 }; + +static int ocelot_mrp_find_partner_port(struct ocelot *ocelot, + struct ocelot_port *p) +{ + int i; + + for (i = 0; i < ocelot->num_phys_ports; ++i) { + struct ocelot_port *ocelot_port = ocelot->ports[i]; + + if (!ocelot_port || p == ocelot_port) + continue; + + if (ocelot_port->mrp_ring_id == p->mrp_ring_id) + return i; + } + + return -1; +} + +static int ocelot_mrp_del_vcap(struct ocelot *ocelot, int id) { struct ocelot_vcap_block *block_vcap_is2; struct ocelot_vcap_filter *filter; block_vcap_is2 = &ocelot->block[VCAP_IS2]; - filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, port, + filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, id, false); if (!filter) return 0; @@ -29,6 +47,87 @@ static int ocelot_mrp_del_vcap(struct ocelot *ocelot, int port) return ocelot_vcap_filter_del(ocelot, filter); } +static int ocelot_mrp_redirect_add_vcap(struct ocelot *ocelot, int src_port, + int dst_port) +{ + const u8 mrp_test_mask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + struct ocelot_vcap_filter *filter; + int err; + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + filter->key_type = OCELOT_VCAP_KEY_ETYPE; + filter->prio = 1; + filter->id.cookie = src_port; + filter->id.tc_offload = false; + filter->block_id = VCAP_IS2; + filter->type = OCELOT_VCAP_FILTER_OFFLOAD; + filter->ingress_port_mask = BIT(src_port); + ether_addr_copy(filter->key.etype.dmac.value, mrp_test_dmac); + ether_addr_copy(filter->key.etype.dmac.mask, mrp_test_mask); + filter->action.mask_mode = OCELOT_MASK_MODE_REDIRECT; + filter->action.port_mask = BIT(dst_port); + + err = ocelot_vcap_filter_add(ocelot, filter, NULL); + if (err) + kfree(filter); + + return err; +} + +static int ocelot_mrp_copy_add_vcap(struct ocelot *ocelot, int port, + int prio, unsigned long cookie) +{ + const u8 mrp_mask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 }; + struct ocelot_vcap_filter *filter; + int err; + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + filter->key_type = OCELOT_VCAP_KEY_ETYPE; + filter->prio = prio; + filter->id.cookie = cookie; + filter->id.tc_offload = false; + filter->block_id = VCAP_IS2; + filter->type = OCELOT_VCAP_FILTER_OFFLOAD; + filter->ingress_port_mask = BIT(port); + /* Here is possible to use control or test dmac because the mask + * doesn't cover the LSB + */ + ether_addr_copy(filter->key.etype.dmac.value, mrp_test_dmac); + ether_addr_copy(filter->key.etype.dmac.mask, mrp_mask); + filter->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY; + filter->action.port_mask = 0x0; + filter->action.cpu_copy_ena = true; + filter->action.cpu_qu_num = OCELOT_MRP_CPUQ; + + err = ocelot_vcap_filter_add(ocelot, filter, NULL); + if (err) + kfree(filter); + + return err; +} + +static void ocelot_mrp_save_mac(struct ocelot *ocelot, + struct ocelot_port *port) +{ + ocelot_mact_learn(ocelot, PGID_BLACKHOLE, mrp_test_dmac, + port->pvid_vlan.vid, ENTRYTYPE_LOCKED); + ocelot_mact_learn(ocelot, PGID_BLACKHOLE, mrp_control_dmac, + port->pvid_vlan.vid, ENTRYTYPE_LOCKED); +} + +static void ocelot_mrp_del_mac(struct ocelot *ocelot, + struct ocelot_port *port) +{ + ocelot_mact_forget(ocelot, mrp_test_dmac, port->pvid_vlan.vid); + ocelot_mact_forget(ocelot, mrp_control_dmac, port->pvid_vlan.vid); +} + int ocelot_mrp_add(struct ocelot *ocelot, int port, const struct switchdev_obj_mrp *mrp) { @@ -45,18 +144,7 @@ int ocelot_mrp_add(struct ocelot *ocelot, int port, if (mrp->p_port != dev && mrp->s_port != dev) return 0; - if (ocelot->mrp_ring_id != 0 && - ocelot->mrp_s_port && - ocelot->mrp_p_port) - return -EINVAL; - - if (mrp->p_port == dev) - ocelot->mrp_p_port = dev; - - if (mrp->s_port == dev) - ocelot->mrp_s_port = dev; - - ocelot->mrp_ring_id = mrp->ring_id; + ocelot_port->mrp_ring_id = mrp->ring_id; return 0; } @@ -66,33 +154,14 @@ int ocelot_mrp_del(struct ocelot *ocelot, int port, const struct switchdev_obj_mrp *mrp) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - struct ocelot_port_private *priv; - struct net_device *dev; if (!ocelot_port) return -EOPNOTSUPP; - priv = container_of(ocelot_port, struct ocelot_port_private, port); - dev = priv->dev; - - if (ocelot->mrp_p_port != dev && ocelot->mrp_s_port != dev) + if (ocelot_port->mrp_ring_id != mrp->ring_id) return 0; - if (ocelot->mrp_ring_id == 0 && - !ocelot->mrp_s_port && - !ocelot->mrp_p_port) - return -EINVAL; - - if (ocelot_mrp_del_vcap(ocelot, priv->chip_port)) - return -EINVAL; - - if (ocelot->mrp_p_port == dev) - ocelot->mrp_p_port = NULL; - - if (ocelot->mrp_s_port == dev) - ocelot->mrp_s_port = NULL; - - ocelot->mrp_ring_id = 0; + ocelot_port->mrp_ring_id = 0; return 0; } @@ -102,49 +171,39 @@ int ocelot_mrp_add_ring_role(struct ocelot *ocelot, int port, const struct switchdev_obj_ring_role_mrp *mrp) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - struct ocelot_vcap_filter *filter; - struct ocelot_port_private *priv; - struct net_device *dev; + int dst_port; int err; if (!ocelot_port) return -EOPNOTSUPP; - priv = container_of(ocelot_port, struct ocelot_port_private, port); - dev = priv->dev; - - if (ocelot->mrp_ring_id != mrp->ring_id) - return -EINVAL; - - if (!mrp->sw_backup) + if (mrp->ring_role != BR_MRP_RING_ROLE_MRC && !mrp->sw_backup) return -EOPNOTSUPP; - if (ocelot->mrp_p_port != dev && ocelot->mrp_s_port != dev) + if (ocelot_port->mrp_ring_id != mrp->ring_id) return 0; - filter = kzalloc(sizeof(*filter), GFP_ATOMIC); - if (!filter) - return -ENOMEM; + ocelot_mrp_save_mac(ocelot, ocelot_port); - filter->key_type = OCELOT_VCAP_KEY_ETYPE; - filter->prio = 1; - filter->id.cookie = priv->chip_port; - filter->id.tc_offload = false; - filter->block_id = VCAP_IS2; - filter->type = OCELOT_VCAP_FILTER_OFFLOAD; - filter->ingress_port_mask = BIT(priv->chip_port); - *(__be16 *)filter->key.etype.etype.value = htons(ETH_P_MRP); - *(__be16 *)filter->key.etype.etype.mask = htons(0xffff); - filter->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY; - filter->action.port_mask = 0x0; - filter->action.cpu_copy_ena = true; - filter->action.cpu_qu_num = OCELOT_MRP_CPUQ; + if (mrp->ring_role != BR_MRP_RING_ROLE_MRC) + return ocelot_mrp_copy_add_vcap(ocelot, port, 1, port); - err = ocelot_vcap_filter_add(ocelot, filter, NULL); + dst_port = ocelot_mrp_find_partner_port(ocelot, ocelot_port); + if (dst_port == -1) + return -EINVAL; + + err = ocelot_mrp_redirect_add_vcap(ocelot, port, dst_port); if (err) - kfree(filter); + return err; - return err; + err = ocelot_mrp_copy_add_vcap(ocelot, port, 2, + port + ocelot->num_phys_ports); + if (err) { + ocelot_mrp_del_vcap(ocelot, port); + return err; + } + + return 0; } EXPORT_SYMBOL(ocelot_mrp_add_ring_role); @@ -152,24 +211,32 @@ int ocelot_mrp_del_ring_role(struct ocelot *ocelot, int port, const struct switchdev_obj_ring_role_mrp *mrp) { struct ocelot_port *ocelot_port = ocelot->ports[port]; - struct ocelot_port_private *priv; - struct net_device *dev; + int i; if (!ocelot_port) return -EOPNOTSUPP; - priv = container_of(ocelot_port, struct ocelot_port_private, port); - dev = priv->dev; - - if (ocelot->mrp_ring_id != mrp->ring_id) - return -EINVAL; - - if (!mrp->sw_backup) + if (mrp->ring_role != BR_MRP_RING_ROLE_MRC && !mrp->sw_backup) return -EOPNOTSUPP; - if (ocelot->mrp_p_port != dev && ocelot->mrp_s_port != dev) + if (ocelot_port->mrp_ring_id != mrp->ring_id) return 0; - return ocelot_mrp_del_vcap(ocelot, priv->chip_port); + ocelot_mrp_del_vcap(ocelot, port); + ocelot_mrp_del_vcap(ocelot, port + ocelot->num_phys_ports); + + for (i = 0; i < ocelot->num_phys_ports; ++i) { + ocelot_port = ocelot->ports[i]; + + if (!ocelot_port) + continue; + + if (ocelot_port->mrp_ring_id != 0) + goto out; + } + + ocelot_mrp_del_mac(ocelot, ocelot->ports[port]); +out: + return 0; } EXPORT_SYMBOL(ocelot_mrp_del_ring_role); diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 12cb6867a2d0..aad33d22c33f 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -251,6 +251,12 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv, return -EEXIST; } + if (action->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + pol.rate = (u32)div_u64(action->police.rate_bytes_ps, 1000) * 8; pol.burst = action->police.burst; @@ -501,21 +507,17 @@ static netdev_tx_t ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) /* Check if timestamping is needed */ if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { - rew_op = ocelot_port->ptp_cmd; + struct sk_buff *clone = NULL; - if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) { - struct sk_buff *clone; - - clone = skb_clone_sk(skb); - if (!clone) { - kfree_skb(skb); - return NETDEV_TX_OK; - } + if (ocelot_port_txtstamp_request(ocelot, port, skb, &clone)) { + kfree_skb(skb); + return NETDEV_TX_OK; + } - ocelot_port_add_txtstamp_skb(ocelot, port, clone); + if (clone) + OCELOT_SKB_CB(skb)->clone = clone; - rew_op |= clone->cb[0] << 3; - } + rew_op = ocelot_ptp_rew_op(skb); } ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb); @@ -1111,77 +1113,213 @@ static int ocelot_port_obj_del(struct net_device *dev, return ret; } -static int ocelot_netdevice_bridge_join(struct ocelot *ocelot, int port, - struct net_device *bridge) +static void ocelot_inherit_brport_flags(struct ocelot *ocelot, int port, + struct net_device *brport_dev) +{ + struct switchdev_brport_flags flags = {0}; + int flag; + + flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; + + for_each_set_bit(flag, &flags.mask, 32) + if (br_port_flag_is_set(brport_dev, BIT(flag))) + flags.val |= BIT(flag); + + ocelot_port_bridge_flags(ocelot, port, flags); +} + +static void ocelot_clear_brport_flags(struct ocelot *ocelot, int port) { struct switchdev_brport_flags flags; - int err; flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; - flags.val = flags.mask; + flags.val = flags.mask & ~BR_LEARNING; + + ocelot_port_bridge_flags(ocelot, port, flags); +} - err = ocelot_port_bridge_join(ocelot, port, bridge); +static int ocelot_switchdev_sync(struct ocelot *ocelot, int port, + struct net_device *brport_dev, + struct net_device *bridge_dev, + struct netlink_ext_ack *extack) +{ + clock_t ageing_time; + u8 stp_state; + int err; + + ocelot_inherit_brport_flags(ocelot, port, brport_dev); + + stp_state = br_port_get_stp_state(brport_dev); + ocelot_bridge_stp_state_set(ocelot, port, stp_state); + + err = ocelot_port_vlan_filtering(ocelot, port, + br_vlan_enabled(bridge_dev)); if (err) return err; - ocelot_port_bridge_flags(ocelot, port, flags); + ageing_time = br_get_ageing_time(bridge_dev); + ocelot_port_attr_ageing_set(ocelot, port, ageing_time); + + err = br_mdb_replay(bridge_dev, brport_dev, + &ocelot_switchdev_blocking_nb, extack); + if (err && err != -EOPNOTSUPP) + return err; + + err = br_fdb_replay(bridge_dev, brport_dev, &ocelot_switchdev_nb); + if (err) + return err; + + err = br_vlan_replay(bridge_dev, brport_dev, + &ocelot_switchdev_blocking_nb, extack); + if (err && err != -EOPNOTSUPP) + return err; + + return 0; +} + +static int ocelot_switchdev_unsync(struct ocelot *ocelot, int port) +{ + int err; + + err = ocelot_port_vlan_filtering(ocelot, port, false); + if (err) + return err; + + ocelot_clear_brport_flags(ocelot, port); + + ocelot_bridge_stp_state_set(ocelot, port, BR_STATE_FORWARDING); + + return 0; +} + +static int ocelot_netdevice_bridge_join(struct net_device *dev, + struct net_device *brport_dev, + struct net_device *bridge, + struct netlink_ext_ack *extack) +{ + struct ocelot_port_private *priv = netdev_priv(dev); + struct ocelot_port *ocelot_port = &priv->port; + struct ocelot *ocelot = ocelot_port->ocelot; + int port = priv->chip_port; + int err; + + ocelot_port_bridge_join(ocelot, port, bridge); + + err = ocelot_switchdev_sync(ocelot, port, brport_dev, bridge, extack); + if (err) + goto err_switchdev_sync; return 0; + +err_switchdev_sync: + ocelot_port_bridge_leave(ocelot, port, bridge); + return err; } -static int ocelot_netdevice_bridge_leave(struct ocelot *ocelot, int port, +static int ocelot_netdevice_bridge_leave(struct net_device *dev, + struct net_device *brport_dev, struct net_device *bridge) { - struct switchdev_brport_flags flags; + struct ocelot_port_private *priv = netdev_priv(dev); + struct ocelot_port *ocelot_port = &priv->port; + struct ocelot *ocelot = ocelot_port->ocelot; + int port = priv->chip_port; int err; - flags.mask = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD; - flags.val = flags.mask & ~BR_LEARNING; + err = ocelot_switchdev_unsync(ocelot, port); + if (err) + return err; - err = ocelot_port_bridge_leave(ocelot, port, bridge); + ocelot_port_bridge_leave(ocelot, port, bridge); - ocelot_port_bridge_flags(ocelot, port, flags); + return 0; +} + +static int ocelot_netdevice_lag_join(struct net_device *dev, + struct net_device *bond, + struct netdev_lag_upper_info *info, + struct netlink_ext_ack *extack) +{ + struct ocelot_port_private *priv = netdev_priv(dev); + struct ocelot_port *ocelot_port = &priv->port; + struct ocelot *ocelot = ocelot_port->ocelot; + struct net_device *bridge_dev; + int port = priv->chip_port; + int err; + err = ocelot_port_lag_join(ocelot, port, bond, info); + if (err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_MOD(extack, "Offloading not supported"); + return 0; + } + + bridge_dev = netdev_master_upper_dev_get(bond); + if (!bridge_dev || !netif_is_bridge_master(bridge_dev)) + return 0; + + err = ocelot_netdevice_bridge_join(dev, bond, bridge_dev, extack); + if (err) + goto err_bridge_join; + + return 0; + +err_bridge_join: + ocelot_port_lag_leave(ocelot, port, bond); return err; } -static int ocelot_netdevice_changeupper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) +static int ocelot_netdevice_lag_leave(struct net_device *dev, + struct net_device *bond) { struct ocelot_port_private *priv = netdev_priv(dev); struct ocelot_port *ocelot_port = &priv->port; struct ocelot *ocelot = ocelot_port->ocelot; + struct net_device *bridge_dev; int port = priv->chip_port; + + ocelot_port_lag_leave(ocelot, port, bond); + + bridge_dev = netdev_master_upper_dev_get(bond); + if (!bridge_dev || !netif_is_bridge_master(bridge_dev)) + return 0; + + return ocelot_netdevice_bridge_leave(dev, bond, bridge_dev); +} + +static int ocelot_netdevice_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack; int err = 0; + extack = netdev_notifier_info_to_extack(&info->info); + if (netif_is_bridge_master(info->upper_dev)) { - if (info->linking) { - err = ocelot_netdevice_bridge_join(ocelot, port, - info->upper_dev); - } else { - err = ocelot_netdevice_bridge_leave(ocelot, port, + if (info->linking) + err = ocelot_netdevice_bridge_join(dev, dev, + info->upper_dev, + extack); + else + err = ocelot_netdevice_bridge_leave(dev, dev, info->upper_dev); - } } if (netif_is_lag_master(info->upper_dev)) { - if (info->linking) { - err = ocelot_port_lag_join(ocelot, port, - info->upper_dev, - info->upper_info); - if (err == -EOPNOTSUPP) { - NL_SET_ERR_MSG_MOD(info->info.extack, - "Offloading not supported"); - err = 0; - } - } else { - ocelot_port_lag_leave(ocelot, port, - info->upper_dev); - } + if (info->linking) + err = ocelot_netdevice_lag_join(dev, info->upper_dev, + info->upper_info, extack); + else + ocelot_netdevice_lag_leave(dev, info->upper_dev); } return notifier_from_errno(err); } +/* Treat CHANGEUPPER events on an offloaded LAG as individual CHANGEUPPER + * events for the lower physical ports of the LAG. + * If the LAG upper isn't offloaded, ignore its CHANGEUPPER events. + * In case the LAG joined a bridge, notify that we are offloading it and can do + * forwarding in hardware towards it. + */ static int ocelot_netdevice_lag_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) @@ -1191,6 +1329,12 @@ ocelot_netdevice_lag_changeupper(struct net_device *dev, int err = NOTIFY_DONE; netdev_for_each_lower_dev(dev, lower, iter) { + struct ocelot_port_private *priv = netdev_priv(lower); + struct ocelot_port *ocelot_port = &priv->port; + + if (ocelot_port->bond != dev) + return NOTIFY_OK; + err = ocelot_netdevice_changeupper(lower, info); if (err) return notifier_from_errno(err); diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index a33ab315cc6b..87ad2137ba06 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -4,6 +4,8 @@ * Copyright (c) 2017 Microsemi Corporation * Copyright 2020 NXP */ +#include <linux/time64.h> + #include <soc/mscc/ocelot_ptp.h> #include <soc/mscc/ocelot_sys.h> #include <soc/mscc/ocelot.h> diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 37a232911395..7945393a0655 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -761,6 +761,7 @@ static void is1_entry_set(struct ocelot *ocelot, int ix, vcap_key_bytes_set(vcap, &data, VCAP_IS1_HK_ETYPE, etype.value, etype.mask); } + break; } default: break; diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 1634ca6d4a8f..c84c8bf2bc20 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -2897,7 +2897,7 @@ static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb, dev_kfree_skb_any(curr); if (segs != NULL) { curr = segs; - segs = segs->next; + segs = next; curr->next = NULL; dev_kfree_skb_any(segs); } diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 8f2f091bce89..9cfcd5500462 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -6657,7 +6657,7 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu) /** * s2io_set_link - Set the LInk status - * @work: work struct containing a pointer to device private structue + * @work: work struct containing a pointer to device private structure * Description: Sets the link status for the adapter */ diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.h b/drivers/net/ethernet/neterion/vxge/vxge-main.h index 9c86f4f9cd42..63f65193dd49 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.h +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.h @@ -454,49 +454,49 @@ int vxge_fw_upgrade(struct vxgedev *vdev, char *fw_name, int override); #define vxge_debug_ll_config(level, fmt, ...) \ vxge_debug_ll(level, VXGE_DEBUG_LL_CONFIG, fmt, ##__VA_ARGS__) #else -#define vxge_debug_ll_config(level, fmt, ...) +#define vxge_debug_ll_config(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #if (VXGE_DEBUG_INIT & VXGE_DEBUG_MASK) #define vxge_debug_init(level, fmt, ...) \ vxge_debug_ll(level, VXGE_DEBUG_INIT, fmt, ##__VA_ARGS__) #else -#define vxge_debug_init(level, fmt, ...) +#define vxge_debug_init(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #if (VXGE_DEBUG_TX & VXGE_DEBUG_MASK) #define vxge_debug_tx(level, fmt, ...) \ vxge_debug_ll(level, VXGE_DEBUG_TX, fmt, ##__VA_ARGS__) #else -#define vxge_debug_tx(level, fmt, ...) +#define vxge_debug_tx(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #if (VXGE_DEBUG_RX & VXGE_DEBUG_MASK) #define vxge_debug_rx(level, fmt, ...) \ vxge_debug_ll(level, VXGE_DEBUG_RX, fmt, ##__VA_ARGS__) #else -#define vxge_debug_rx(level, fmt, ...) +#define vxge_debug_rx(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #if (VXGE_DEBUG_MEM & VXGE_DEBUG_MASK) #define vxge_debug_mem(level, fmt, ...) \ vxge_debug_ll(level, VXGE_DEBUG_MEM, fmt, ##__VA_ARGS__) #else -#define vxge_debug_mem(level, fmt, ...) +#define vxge_debug_mem(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #if (VXGE_DEBUG_ENTRYEXIT & VXGE_DEBUG_MASK) #define vxge_debug_entryexit(level, fmt, ...) \ vxge_debug_ll(level, VXGE_DEBUG_ENTRYEXIT, fmt, ##__VA_ARGS__) #else -#define vxge_debug_entryexit(level, fmt, ...) +#define vxge_debug_entryexit(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #if (VXGE_DEBUG_INTR & VXGE_DEBUG_MASK) #define vxge_debug_intr(level, fmt, ...) \ vxge_debug_ll(level, VXGE_DEBUG_INTR, fmt, ##__VA_ARGS__) #else -#define vxge_debug_intr(level, fmt, ...) +#define vxge_debug_intr(level, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #define VXGE_DEVICE_DEBUG_LEVEL_SET(level, mask, vdev) {\ diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c index bdbf0726145e..605a1617b195 100644 --- a/drivers/net/ethernet/netronome/nfp/abm/main.c +++ b/drivers/net/ethernet/netronome/nfp/abm/main.c @@ -419,8 +419,8 @@ nfp_abm_port_get_stats_strings(struct nfp_app *app, struct nfp_port *port, return data; alink = repr->app_priv; for (i = 0; i < alink->vnic->dp.num_r_vecs; i++) { - data = nfp_pr_et(data, "q%u_no_wait", i); - data = nfp_pr_et(data, "q%u_delayed", i); + ethtool_sprintf(&data, "q%u_no_wait", i); + ethtool_sprintf(&data, "q%u_delayed", i); } return data; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c index 0e2db6ea79e9..2ec62c8d86e1 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c @@ -454,6 +454,7 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb) dev_consume_skb_any(skb); else dev_kfree_skb_any(skb); + return; } nfp_ccm_rx(&bpf->ccm, skb); diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index caf12eec9945..31377923ea3d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -47,6 +47,7 @@ struct nfp_app; #define NFP_FL_FEATS_PRE_TUN_RULES BIT(6) #define NFP_FL_FEATS_IPV6_TUN BIT(7) #define NFP_FL_FEATS_VLAN_QINQ BIT(8) +#define NFP_FL_FEATS_QOS_PPS BIT(9) #define NFP_FL_FEATS_HOST_ACK BIT(31) #define NFP_FL_ENABLE_FLOW_MERGE BIT(0) @@ -61,7 +62,8 @@ struct nfp_app; NFP_FL_FEATS_FLOW_MOD | \ NFP_FL_FEATS_PRE_TUN_RULES | \ NFP_FL_FEATS_IPV6_TUN | \ - NFP_FL_FEATS_VLAN_QINQ) + NFP_FL_FEATS_VLAN_QINQ | \ + NFP_FL_FEATS_QOS_PPS) struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; @@ -190,6 +192,7 @@ struct nfp_fl_internal_ports { * @qos_rate_limiters: Current active qos rate limiters * @qos_stats_lock: Lock on qos stats updates * @pre_tun_rule_cnt: Number of pre-tunnel rules offloaded + * @merge_table: Hash table to store merged flows */ struct nfp_flower_priv { struct nfp_app *app; @@ -223,6 +226,7 @@ struct nfp_flower_priv { unsigned int qos_rate_limiters; spinlock_t qos_stats_lock; /* Protect the qos stats */ int pre_tun_rule_cnt; + struct rhashtable merge_table; }; /** @@ -350,6 +354,12 @@ struct nfp_fl_payload_link { }; extern const struct rhashtable_params nfp_flower_table_params; +extern const struct rhashtable_params merge_table_params; + +struct nfp_merge_info { + u64 parent_ctx; + struct rhash_head ht_node; +}; struct nfp_fl_stats_frame { __be32 stats_con_id; diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 5defd31d481c..327bb56b3ef5 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -327,8 +327,14 @@ int nfp_compile_flow_metadata(struct nfp_app *app, goto err_free_ctx_entry; } + /* Do net allocate a mask-id for pre_tun_rules. These flows are used to + * configure the pre_tun table and are never actually send to the + * firmware as an add-flow message. This causes the mask-id allocation + * on the firmware to get out of sync if allocated here. + */ new_mask_id = 0; - if (!nfp_check_mask_add(app, nfp_flow->mask_data, + if (!nfp_flow->pre_tun_rule.dev && + !nfp_check_mask_add(app, nfp_flow->mask_data, nfp_flow->meta.mask_len, &nfp_flow->meta.flags, &new_mask_id)) { NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate a new mask id"); @@ -359,7 +365,8 @@ int nfp_compile_flow_metadata(struct nfp_app *app, goto err_remove_mask; } - if (!nfp_check_mask_remove(app, nfp_flow->mask_data, + if (!nfp_flow->pre_tun_rule.dev && + !nfp_check_mask_remove(app, nfp_flow->mask_data, nfp_flow->meta.mask_len, NULL, &new_mask_id)) { NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release mask id"); @@ -374,8 +381,10 @@ int nfp_compile_flow_metadata(struct nfp_app *app, return 0; err_remove_mask: - nfp_check_mask_remove(app, nfp_flow->mask_data, nfp_flow->meta.mask_len, - NULL, &new_mask_id); + if (!nfp_flow->pre_tun_rule.dev) + nfp_check_mask_remove(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, + NULL, &new_mask_id); err_remove_rhash: WARN_ON_ONCE(rhashtable_remove_fast(&priv->stats_ctx_table, &ctx_entry->ht_node, @@ -406,9 +415,10 @@ int nfp_modify_flow_metadata(struct nfp_app *app, __nfp_modify_flow_metadata(priv, nfp_flow); - nfp_check_mask_remove(app, nfp_flow->mask_data, - nfp_flow->meta.mask_len, &nfp_flow->meta.flags, - &new_mask_id); + if (!nfp_flow->pre_tun_rule.dev) + nfp_check_mask_remove(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, &nfp_flow->meta.flags, + &new_mask_id); /* Update flow payload with mask ids. */ nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id; @@ -480,6 +490,12 @@ const struct rhashtable_params nfp_flower_table_params = { .automatic_shrinking = true, }; +const struct rhashtable_params merge_table_params = { + .key_offset = offsetof(struct nfp_merge_info, parent_ctx), + .head_offset = offsetof(struct nfp_merge_info, ht_node), + .key_len = sizeof(u64), +}; + int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, unsigned int host_num_mems) { @@ -496,6 +512,10 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, if (err) goto err_free_flow_table; + err = rhashtable_init(&priv->merge_table, &merge_table_params); + if (err) + goto err_free_stats_ctx_table; + get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed)); /* Init ring buffer and unallocated mask_ids. */ @@ -503,7 +523,7 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS, NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL); if (!priv->mask_ids.mask_id_free_list.buf) - goto err_free_stats_ctx_table; + goto err_free_merge_table; priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1; @@ -540,6 +560,8 @@ err_free_last_used: kfree(priv->mask_ids.last_used); err_free_mask_id: kfree(priv->mask_ids.mask_id_free_list.buf); +err_free_merge_table: + rhashtable_destroy(&priv->merge_table); err_free_stats_ctx_table: rhashtable_destroy(&priv->stats_ctx_table); err_free_flow_table: @@ -558,6 +580,8 @@ void nfp_flower_metadata_cleanup(struct nfp_app *app) nfp_check_rhashtable_empty, NULL); rhashtable_free_and_destroy(&priv->stats_ctx_table, nfp_check_rhashtable_empty, NULL); + rhashtable_free_and_destroy(&priv->merge_table, + nfp_check_rhashtable_empty, NULL); kvfree(priv->stats); kfree(priv->mask_ids.mask_id_free_list.buf); kfree(priv->mask_ids.last_used); diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 1c59aff2163c..e95969c462e4 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1009,6 +1009,8 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, struct netlink_ext_ack *extack = NULL; struct nfp_fl_payload *merge_flow; struct nfp_fl_key_ls merge_key_ls; + struct nfp_merge_info *merge_info; + u64 parent_ctx = 0; int err; ASSERT_RTNL(); @@ -1019,6 +1021,15 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, nfp_flower_is_merge_flow(sub_flow2)) return -EINVAL; + /* check if the two flows are already merged */ + parent_ctx = (u64)(be32_to_cpu(sub_flow1->meta.host_ctx_id)) << 32; + parent_ctx |= (u64)(be32_to_cpu(sub_flow2->meta.host_ctx_id)); + if (rhashtable_lookup_fast(&priv->merge_table, + &parent_ctx, merge_table_params)) { + nfp_flower_cmsg_warn(app, "The two flows are already merged.\n"); + return 0; + } + err = nfp_flower_can_merge(sub_flow1, sub_flow2); if (err) return err; @@ -1060,16 +1071,33 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, if (err) goto err_release_metadata; + merge_info = kmalloc(sizeof(*merge_info), GFP_KERNEL); + if (!merge_info) { + err = -ENOMEM; + goto err_remove_rhash; + } + merge_info->parent_ctx = parent_ctx; + err = rhashtable_insert_fast(&priv->merge_table, &merge_info->ht_node, + merge_table_params); + if (err) + goto err_destroy_merge_info; + err = nfp_flower_xmit_flow(app, merge_flow, NFP_FLOWER_CMSG_TYPE_FLOW_MOD); if (err) - goto err_remove_rhash; + goto err_remove_merge_info; merge_flow->in_hw = true; sub_flow1->in_hw = false; return 0; +err_remove_merge_info: + WARN_ON_ONCE(rhashtable_remove_fast(&priv->merge_table, + &merge_info->ht_node, + merge_table_params)); +err_destroy_merge_info: + kfree(merge_info); err_remove_rhash: WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table, &merge_flow->fl_node, @@ -1142,6 +1170,12 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, return -EOPNOTSUPP; } + if (!(key_layer & NFP_FLOWER_LAYER_IPV4) && + !(key_layer & NFP_FLOWER_LAYER_IPV6)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: match on ipv4/ipv6 eth_type must be present"); + return -EOPNOTSUPP; + } + /* Skip fields known to exist. */ mask += sizeof(struct nfp_flower_meta_tci); ext += sizeof(struct nfp_flower_meta_tci); @@ -1152,6 +1186,13 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, mask += sizeof(struct nfp_flower_in_port); ext += sizeof(struct nfp_flower_in_port); + /* Ensure destination MAC address matches pre_tun_dev. */ + mac = (struct nfp_flower_mac_mpls *)ext; + if (memcmp(&mac->mac_dst[0], flow->pre_tun_rule.dev->dev_addr, 6)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: dest MAC must match output dev MAC"); + return -EOPNOTSUPP; + } + /* Ensure destination MAC address is fully matched. */ mac = (struct nfp_flower_mac_mpls *)mask; if (!is_broadcast_ether_addr(&mac->mac_dst[0])) { @@ -1159,6 +1200,11 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, return -EOPNOTSUPP; } + if (mac->mpls_lse) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: MPLS not supported"); + return -EOPNOTSUPP; + } + mask += sizeof(struct nfp_flower_mac_mpls); ext += sizeof(struct nfp_flower_mac_mpls); if (key_layer & NFP_FLOWER_LAYER_IPV4 || @@ -1341,7 +1387,9 @@ nfp_flower_remove_merge_flow(struct nfp_app *app, { struct nfp_flower_priv *priv = app->priv; struct nfp_fl_payload_link *link, *temp; + struct nfp_merge_info *merge_info; struct nfp_fl_payload *origin; + u64 parent_ctx = 0; bool mod = false; int err; @@ -1378,8 +1426,22 @@ nfp_flower_remove_merge_flow(struct nfp_app *app, err_free_links: /* Clean any links connected with the merged flow. */ list_for_each_entry_safe(link, temp, &merge_flow->linked_flows, - merge_flow.list) + merge_flow.list) { + u32 ctx_id = be32_to_cpu(link->sub_flow.flow->meta.host_ctx_id); + + parent_ctx = (parent_ctx << 32) | (u64)(ctx_id); nfp_flower_unlink_flow(link); + } + + merge_info = rhashtable_lookup_fast(&priv->merge_table, + &parent_ctx, + merge_table_params); + if (merge_info) { + WARN_ON_ONCE(rhashtable_remove_fast(&priv->merge_table, + &merge_info->ht_node, + merge_table_params)); + kfree(merge_info); + } kfree(merge_flow->action_data); kfree(merge_flow->mask_data); diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c index d4ce8f9ef3cc..784c6dbf8bc4 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c @@ -10,19 +10,26 @@ #include "../nfp_port.h" #define NFP_FL_QOS_UPDATE msecs_to_jiffies(1000) +#define NFP_FL_QOS_PPS BIT(15) struct nfp_police_cfg_head { __be32 flags_opts; __be32 port; }; +enum NFP_FL_QOS_TYPES { + NFP_FL_QOS_TYPE_BPS, + NFP_FL_QOS_TYPE_PPS, + NFP_FL_QOS_TYPE_MAX, +}; + /* Police cmsg for configuring a trTCM traffic conditioner (8W/32B) * See RFC 2698 for more details. * ---------------------------------------------------------------- * 3 2 1 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | Flag options | + * | Reserved |p| Reserved | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Port Ingress | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -38,6 +45,9 @@ struct nfp_police_cfg_head { * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Committed Information Rate | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * Word[0](FLag options): + * [15] p(pps) 1 for pps ,0 for bps + * */ struct nfp_police_config { struct nfp_police_cfg_head head; @@ -62,13 +72,18 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, struct tc_cls_matchall_offload *flow, struct netlink_ext_ack *extack) { - struct flow_action_entry *action = &flow->rule->action.entries[0]; + struct flow_action_entry *paction = &flow->rule->action.entries[0]; + u32 action_num = flow->rule->action.num_entries; struct nfp_flower_priv *fl_priv = app->priv; + struct flow_action_entry *action = NULL; struct nfp_flower_repr_priv *repr_priv; struct nfp_police_config *config; + u32 netdev_port_id, i; struct nfp_repr *repr; struct sk_buff *skb; - u32 netdev_port_id; + bool pps_support; + u32 bps_num = 0; + u32 pps_num = 0; u32 burst; u64 rate; @@ -78,6 +93,8 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, } repr = netdev_priv(netdev); repr_priv = repr->app_priv; + netdev_port_id = nfp_repr_get_port_id(netdev); + pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS); if (repr_priv->block_shared) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on shared blocks"); @@ -89,9 +106,18 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, return -EOPNOTSUPP; } - if (!flow_offload_has_one_action(&flow->rule->action)) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires a single action"); - return -EOPNOTSUPP; + if (pps_support) { + if (action_num > 2 || action_num == 0) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload only support action number 1 or 2"); + return -EOPNOTSUPP; + } + } else { + if (!flow_offload_has_one_action(&flow->rule->action)) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires a single action"); + return -EOPNOTSUPP; + } } if (flow->common.prio != 1) { @@ -99,31 +125,69 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, return -EOPNOTSUPP; } - if (action->id != FLOW_ACTION_POLICE) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires police action"); - return -EOPNOTSUPP; + for (i = 0 ; i < action_num; i++) { + action = paction + i; + if (action->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + if (action->police.rate_bytes_ps > 0) { + if (bps_num++) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload only support one BPS action"); + return -EOPNOTSUPP; + } + } + if (action->police.rate_pkt_ps > 0) { + if (!pps_support) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: FW does not support PPS action"); + return -EOPNOTSUPP; + } + if (pps_num++) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload only support one PPS action"); + return -EOPNOTSUPP; + } + } } - rate = action->police.rate_bytes_ps; - burst = action->police.burst; - netdev_port_id = nfp_repr_get_port_id(netdev); - - skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config), - NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - config = nfp_flower_cmsg_get_data(skb); - memset(config, 0, sizeof(struct nfp_police_config)); - config->head.port = cpu_to_be32(netdev_port_id); - config->bkt_tkn_p = cpu_to_be32(burst); - config->bkt_tkn_c = cpu_to_be32(burst); - config->pbs = cpu_to_be32(burst); - config->cbs = cpu_to_be32(burst); - config->pir = cpu_to_be32(rate); - config->cir = cpu_to_be32(rate); - nfp_ctrl_tx(repr->app->ctrl, skb); + for (i = 0 ; i < action_num; i++) { + /* Set QoS data for this interface */ + action = paction + i; + if (action->police.rate_bytes_ps > 0) { + rate = action->police.rate_bytes_ps; + burst = action->police.burst; + } else if (action->police.rate_pkt_ps > 0) { + rate = action->police.rate_pkt_ps; + burst = action->police.burst_pkt; + } else { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit is not BPS or PPS"); + continue; + } + if (rate != 0) { + skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + if (action->police.rate_pkt_ps > 0) + config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_PPS); + config->head.port = cpu_to_be32(netdev_port_id); + config->bkt_tkn_p = cpu_to_be32(burst); + config->bkt_tkn_c = cpu_to_be32(burst); + config->pbs = cpu_to_be32(burst); + config->cbs = cpu_to_be32(burst); + config->pir = cpu_to_be32(rate); + config->cir = cpu_to_be32(rate); + nfp_ctrl_tx(repr->app->ctrl, skb); + } + } repr_priv->qos_table.netdev_port_id = netdev_port_id; fl_priv->qos_rate_limiters++; if (fl_priv->qos_rate_limiters == 1) @@ -141,9 +205,10 @@ nfp_flower_remove_rate_limiter(struct nfp_app *app, struct net_device *netdev, struct nfp_flower_priv *fl_priv = app->priv; struct nfp_flower_repr_priv *repr_priv; struct nfp_police_config *config; + u32 netdev_port_id, i; struct nfp_repr *repr; struct sk_buff *skb; - u32 netdev_port_id; + bool pps_support; if (!nfp_netdev_is_nfp_repr(netdev)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port"); @@ -153,27 +218,38 @@ nfp_flower_remove_rate_limiter(struct nfp_app *app, struct net_device *netdev, netdev_port_id = nfp_repr_get_port_id(netdev); repr_priv = repr->app_priv; + pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS); if (!repr_priv->qos_table.netdev_port_id) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot remove qos entry that does not exist"); return -EOPNOTSUPP; } - skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config), - NFP_FLOWER_CMSG_TYPE_QOS_DEL, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - /* Clear all qos associate data for this interface */ memset(&repr_priv->qos_table, 0, sizeof(struct nfp_fl_qos)); fl_priv->qos_rate_limiters--; if (!fl_priv->qos_rate_limiters) cancel_delayed_work_sync(&fl_priv->qos_stats_work); - - config = nfp_flower_cmsg_get_data(skb); - memset(config, 0, sizeof(struct nfp_police_config)); - config->head.port = cpu_to_be32(netdev_port_id); - nfp_ctrl_tx(repr->app->ctrl, skb); + for (i = 0 ; i < NFP_FL_QOS_TYPE_MAX; i++) { + if (i == NFP_FL_QOS_TYPE_PPS && !pps_support) + break; + /* 0:bps 1:pps + * Clear QoS data for this interface. + * There is no need to check if a specific QOS_TYPE was + * configured as the firmware handles clearing a QoS entry + * safely, even if it wasn't explicitly added. + */ + skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_DEL, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + if (i == NFP_FL_QOS_TYPE_PPS) + config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_PPS); + config->head.port = cpu_to_be32(netdev_port_id); + nfp_ctrl_tx(repr->app->ctrl, skb); + } return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 7248d248f604..d19c02e99114 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -16,8 +16,9 @@ #define NFP_FL_MAX_ROUTES 32 #define NFP_TUN_PRE_TUN_RULE_LIMIT 32 -#define NFP_TUN_PRE_TUN_RULE_DEL 0x1 -#define NFP_TUN_PRE_TUN_IDX_BIT 0x8 +#define NFP_TUN_PRE_TUN_RULE_DEL BIT(0) +#define NFP_TUN_PRE_TUN_IDX_BIT BIT(3) +#define NFP_TUN_PRE_TUN_IPV6_BIT BIT(7) /** * struct nfp_tun_pre_run_rule - rule matched before decap @@ -1268,6 +1269,7 @@ int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app, { struct nfp_flower_priv *app_priv = app->priv; struct nfp_tun_offloaded_mac *mac_entry; + struct nfp_flower_meta_tci *key_meta; struct nfp_tun_pre_tun_rule payload; struct net_device *internal_dev; int err; @@ -1290,6 +1292,15 @@ int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app, if (!mac_entry) return -ENOENT; + /* Set/clear IPV6 bit. cpu_to_be16() swap will lead to MSB being + * set/clear for port_idx. + */ + key_meta = (struct nfp_flower_meta_tci *)flow->unmasked_data; + if (key_meta->nfp_flow_key_layer & NFP_FLOWER_LAYER_IPV6) + mac_entry->index |= NFP_TUN_PRE_TUN_IPV6_BIT; + else + mac_entry->index &= ~NFP_TUN_PRE_TUN_IPV6_BIT; + payload.port_idx = cpu_to_be16(mac_entry->index); /* Copy mac id and vlan to flow - dev may not exist at delete time. */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index 76d13af46a7a..3e9baff07100 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -18,7 +18,6 @@ struct netdev_bpf; struct netlink_ext_ack; struct pci_dev; struct sk_buff; -struct sk_buff; struct nfp_app; struct nfp_cpp; struct nfp_pf; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 713ee3041d49..bea978df7713 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -364,6 +364,7 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) attrs.split = eth_port.is_split; attrs.splittable = !attrs.split; + attrs.lanes = eth_port.port_lanes; attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; attrs.phys.port_number = eth_port.label_port; attrs.phys.split_subport_number = eth_port.label_subport; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 9c9ae33d84ce..1b482446536d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -429,17 +429,6 @@ static int nfp_net_set_ringparam(struct net_device *netdev, return nfp_net_set_ring_size(nn, rxd_cnt, txd_cnt); } -__printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...) -{ - va_list args; - - va_start(args, fmt); - vsnprintf(data, ETH_GSTRING_LEN, fmt, args); - va_end(args); - - return data + ETH_GSTRING_LEN; -} - static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); @@ -454,29 +443,29 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) int i; for (i = 0; i < nn->max_r_vecs; i++) { - data = nfp_pr_et(data, "rvec_%u_rx_pkts", i); - data = nfp_pr_et(data, "rvec_%u_tx_pkts", i); - data = nfp_pr_et(data, "rvec_%u_tx_busy", i); + ethtool_sprintf(&data, "rvec_%u_rx_pkts", i); + ethtool_sprintf(&data, "rvec_%u_tx_pkts", i); + ethtool_sprintf(&data, "rvec_%u_tx_busy", i); } - data = nfp_pr_et(data, "hw_rx_csum_ok"); - data = nfp_pr_et(data, "hw_rx_csum_inner_ok"); - data = nfp_pr_et(data, "hw_rx_csum_complete"); - data = nfp_pr_et(data, "hw_rx_csum_err"); - data = nfp_pr_et(data, "rx_replace_buf_alloc_fail"); - data = nfp_pr_et(data, "rx_tls_decrypted_packets"); - data = nfp_pr_et(data, "hw_tx_csum"); - data = nfp_pr_et(data, "hw_tx_inner_csum"); - data = nfp_pr_et(data, "tx_gather"); - data = nfp_pr_et(data, "tx_lso"); - data = nfp_pr_et(data, "tx_tls_encrypted_packets"); - data = nfp_pr_et(data, "tx_tls_ooo"); - data = nfp_pr_et(data, "tx_tls_drop_no_sync_data"); - - data = nfp_pr_et(data, "hw_tls_no_space"); - data = nfp_pr_et(data, "rx_tls_resync_req_ok"); - data = nfp_pr_et(data, "rx_tls_resync_req_ign"); - data = nfp_pr_et(data, "rx_tls_resync_sent"); + ethtool_sprintf(&data, "hw_rx_csum_ok"); + ethtool_sprintf(&data, "hw_rx_csum_inner_ok"); + ethtool_sprintf(&data, "hw_rx_csum_complete"); + ethtool_sprintf(&data, "hw_rx_csum_err"); + ethtool_sprintf(&data, "rx_replace_buf_alloc_fail"); + ethtool_sprintf(&data, "rx_tls_decrypted_packets"); + ethtool_sprintf(&data, "hw_tx_csum"); + ethtool_sprintf(&data, "hw_tx_inner_csum"); + ethtool_sprintf(&data, "tx_gather"); + ethtool_sprintf(&data, "tx_lso"); + ethtool_sprintf(&data, "tx_tls_encrypted_packets"); + ethtool_sprintf(&data, "tx_tls_ooo"); + ethtool_sprintf(&data, "tx_tls_drop_no_sync_data"); + + ethtool_sprintf(&data, "hw_tls_no_space"); + ethtool_sprintf(&data, "rx_tls_resync_req_ok"); + ethtool_sprintf(&data, "rx_tls_resync_req_ign"); + ethtool_sprintf(&data, "rx_tls_resync_sent"); return data; } @@ -550,19 +539,19 @@ nfp_vnic_get_hw_stats_strings(u8 *data, unsigned int num_vecs, bool repr) swap_off = repr * NN_ET_SWITCH_STATS_LEN; for (i = 0; i < NN_ET_SWITCH_STATS_LEN; i++) - data = nfp_pr_et(data, nfp_net_et_stats[i + swap_off].name); + ethtool_sprintf(&data, nfp_net_et_stats[i + swap_off].name); for (i = NN_ET_SWITCH_STATS_LEN; i < NN_ET_SWITCH_STATS_LEN * 2; i++) - data = nfp_pr_et(data, nfp_net_et_stats[i - swap_off].name); + ethtool_sprintf(&data, nfp_net_et_stats[i - swap_off].name); for (i = NN_ET_SWITCH_STATS_LEN * 2; i < NN_ET_GLOBAL_STATS_LEN; i++) - data = nfp_pr_et(data, nfp_net_et_stats[i].name); + ethtool_sprintf(&data, nfp_net_et_stats[i].name); for (i = 0; i < num_vecs; i++) { - data = nfp_pr_et(data, "rxq_%u_pkts", i); - data = nfp_pr_et(data, "rxq_%u_bytes", i); - data = nfp_pr_et(data, "txq_%u_pkts", i); - data = nfp_pr_et(data, "txq_%u_bytes", i); + ethtool_sprintf(&data, "rxq_%u_pkts", i); + ethtool_sprintf(&data, "rxq_%u_bytes", i); + ethtool_sprintf(&data, "txq_%u_pkts", i); + ethtool_sprintf(&data, "txq_%u_bytes", i); } return data; @@ -610,15 +599,15 @@ static u8 *nfp_vnic_get_tlv_stats_strings(struct nfp_net *nn, u8 *data) memcpy(data, nfp_tlv_stat_names[id], ETH_GSTRING_LEN); data += ETH_GSTRING_LEN; } else { - data = nfp_pr_et(data, "dev_unknown_stat%u", id); + ethtool_sprintf(&data, "dev_unknown_stat%u", id); } } for (i = 0; i < nn->max_r_vecs; i++) { - data = nfp_pr_et(data, "rxq_%u_pkts", i); - data = nfp_pr_et(data, "rxq_%u_bytes", i); - data = nfp_pr_et(data, "txq_%u_pkts", i); - data = nfp_pr_et(data, "txq_%u_bytes", i); + ethtool_sprintf(&data, "rxq_%u_pkts", i); + ethtool_sprintf(&data, "rxq_%u_bytes", i); + ethtool_sprintf(&data, "txq_%u_pkts", i); + ethtool_sprintf(&data, "txq_%u_bytes", i); } return data; @@ -666,7 +655,7 @@ static u8 *nfp_mac_get_stats_strings(struct net_device *netdev, u8 *data) return data; for (i = 0; i < ARRAY_SIZE(nfp_mac_et_stats); i++) - data = nfp_pr_et(data, "mac.%s", nfp_mac_et_stats[i].name); + ethtool_sprintf(&data, "mac.%s", nfp_mac_et_stats[i].name); return data; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h index d7fd203bb180..ae4da189d955 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h @@ -92,8 +92,6 @@ struct nfp_port { extern const struct ethtool_ops nfp_port_ethtool_ops; -__printf(2, 3) u8 *nfp_pr_et(u8 *data, const char *fmt, ...); - int nfp_port_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data); diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index d3cbb4215f5c..64c6842bd452 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1044,7 +1044,8 @@ static netdev_tx_t lpc_eth_hard_start_xmit(struct sk_buff *skb, if (pldat->num_used_tx_buffs >= (ENET_TX_DESC - 1)) { /* This function should never be called when there are no - buffers */ + * buffers + */ netif_stop_queue(ndev); spin_unlock_irq(&pldat->lock); WARN(1, "BUG! TX request when no free TX buffers!\n"); @@ -1318,7 +1319,8 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) pldat->dma_buff_size = PAGE_ALIGN(pldat->dma_buff_size); /* Allocate a chunk of memory for the DMA ethernet buffers - and descriptors */ + * and descriptors + */ pldat->dma_buff_base_v = dma_alloc_coherent(dev, pldat->dma_buff_size, &dma_handle, @@ -1348,9 +1350,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) __lpc_get_mac(pldat, ndev->dev_addr); if (!is_valid_ether_addr(ndev->dev_addr)) { - const char *macaddr = of_get_mac_address(np); - if (!IS_ERR(macaddr)) - ether_addr_copy(ndev->dev_addr, macaddr); + of_get_mac_address(np, ndev->dev_addr); } if (!is_valid_ether_addr(ndev->dev_addr)) eth_hw_addr_random(ndev); @@ -1365,7 +1365,8 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) __lpc_mii_mngt_reset(pldat); /* Force default PHY interface setup in chip, this will probably be - changed by the PHY driver */ + * changed by the PHY driver + */ pldat->link = 0; pldat->speed = 100; pldat->duplex = DUPLEX_FULL; diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h index 55cef5b16aa5..a6823c4d355d 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h @@ -612,15 +612,6 @@ void pch_gbe_free_tx_resources(struct pch_gbe_adapter *adapter, void pch_gbe_free_rx_resources(struct pch_gbe_adapter *adapter, struct pch_gbe_rx_ring *rx_ring); void pch_gbe_update_stats(struct pch_gbe_adapter *adapter); -u32 pch_ch_control_read(struct pci_dev *pdev); -void pch_ch_control_write(struct pci_dev *pdev, u32 val); -u32 pch_ch_event_read(struct pci_dev *pdev); -void pch_ch_event_write(struct pci_dev *pdev, u32 val); -u32 pch_src_uuid_lo_read(struct pci_dev *pdev); -u32 pch_src_uuid_hi_read(struct pci_dev *pdev); -u64 pch_rx_snap_read(struct pci_dev *pdev); -u64 pch_tx_snap_read(struct pci_dev *pdev); -int pch_set_station_address(u8 *addr, struct pci_dev *pdev); /* pch_gbe_param.c */ void pch_gbe_check_options(struct pch_gbe_adapter *adapter); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 140cee7c459d..334af49e5add 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/net_tstamp.h> #include <linux/ptp_classify.h> +#include <linux/ptp_pch.h> #include <linux/gpio.h> #define DRV_VERSION "1.01" diff --git a/drivers/net/ethernet/pensando/ionic/Makefile b/drivers/net/ethernet/pensando/ionic/Makefile index 8d3c2d3cb10d..4e7642a2d25f 100644 --- a/drivers/net/ethernet/pensando/ionic/Makefile +++ b/drivers/net/ethernet/pensando/ionic/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_IONIC) := ionic.o ionic-y := ionic_main.o ionic_bus_pci.o ionic_devlink.o ionic_dev.o \ ionic_debugfs.o ionic_lif.o ionic_rx_filter.o ionic_ethtool.o \ ionic_txrx.o ionic_stats.o ionic_fw.o +ionic-$(CONFIG_PTP_1588_CLOCK) += ionic_phc.o diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h index 084a924431d5..66204106f83e 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic.h +++ b/drivers/net/ethernet/pensando/ionic/ionic.h @@ -20,6 +20,10 @@ struct ionic_lif; #define DEVCMD_TIMEOUT 10 +#define IONIC_PHC_UPDATE_NS 10000000000 /* 10s in nanoseconds */ +#define NORMAL_PPB 1000000000 /* one billion parts per billion */ +#define SCALED_PPM (1000000ull << 16) /* 2^16 million parts per 2^16 million */ + struct ionic_vf { u16 index; u8 macaddr[6]; @@ -64,6 +68,8 @@ struct ionic_admin_ctx { union ionic_adminq_comp comp; }; +int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx); +int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, int err); int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx); int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_wait); int ionic_set_dma_mask(struct ionic *ionic); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index b0d8499d373b..e4a5416adc80 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -184,6 +184,10 @@ static int ionic_sriov_configure(struct pci_dev *pdev, int num_vfs) struct device *dev = ionic->dev; int ret = 0; + if (ionic->lif && + test_bit(IONIC_LIF_F_FW_RESET, ionic->lif->state)) + return -EBUSY; + if (num_vfs > 0) { ret = pci_enable_sriov(pdev, num_vfs); if (ret) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index fb2b5bf179d7..1dfe962e22e0 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -14,18 +14,23 @@ static void ionic_watchdog_cb(struct timer_list *t) { struct ionic *ionic = from_timer(ionic, t, watchdog_timer); + struct ionic_lif *lif = ionic->lif; int hb; mod_timer(&ionic->watchdog_timer, round_jiffies(jiffies + ionic->watchdog_period)); - if (!ionic->lif) + if (!lif) return; hb = ionic_heartbeat_check(ionic); + dev_dbg(ionic->dev, "%s: hb %d running %d UP %d\n", + __func__, hb, netif_running(lif->netdev), + test_bit(IONIC_LIF_F_UP, lif->state)); - if (hb >= 0) - ionic_link_status_check_request(ionic->lif, CAN_NOT_SLEEP); + if (hb >= 0 && + !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + ionic_link_status_check_request(lif, CAN_NOT_SLEEP); } void ionic_init_devinfo(struct ionic *ionic) @@ -74,6 +79,8 @@ int ionic_dev_setup(struct ionic *ionic) idev->intr_status = bar->vaddr + IONIC_BAR0_INTR_STATUS_OFFSET; idev->intr_ctrl = bar->vaddr + IONIC_BAR0_INTR_CTRL_OFFSET; + idev->hwstamp_regs = &idev->dev_info_regs->hwstamp; + sig = ioread32(&idev->dev_info_regs->signature); if (sig != IONIC_DEV_INFO_SIGNATURE) { dev_err(dev, "Incompatible firmware signature %x", sig); @@ -89,9 +96,17 @@ int ionic_dev_setup(struct ionic *ionic) return -EFAULT; } - idev->last_fw_status = 0xff; timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0); ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ; + + /* set times to ensure the first check will proceed */ + atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ); + idev->last_hb_time = jiffies - 2 * ionic->watchdog_period; + /* init as ready, so no transition if the first check succeeds */ + idev->last_fw_hb = 0; + idev->fw_hb_ready = true; + idev->fw_status_ready = true; + mod_timer(&ionic->watchdog_timer, round_jiffies(jiffies + ionic->watchdog_period)); @@ -105,29 +120,38 @@ int ionic_dev_setup(struct ionic *ionic) int ionic_heartbeat_check(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; - unsigned long hb_time; + unsigned long check_time, last_check_time; + bool fw_status_ready, fw_hb_ready; u8 fw_status; - u32 hb; + u32 fw_hb; - /* wait a little more than one second before testing again */ - hb_time = jiffies; - if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period))) + /* wait a least one second before testing again */ + check_time = jiffies; + last_check_time = atomic_long_read(&idev->last_check_time); +do_check_time: + if (time_before(check_time, last_check_time + HZ)) return 0; + if (!atomic_long_try_cmpxchg_relaxed(&idev->last_check_time, + &last_check_time, check_time)) { + /* if called concurrently, only the first should proceed. */ + dev_dbg(ionic->dev, "%s: do_check_time again\n", __func__); + goto do_check_time; + } /* firmware is useful only if the running bit is set and * fw_status != 0xff (bad PCI read) */ fw_status = ioread8(&idev->dev_info_regs->fw_status); - if (fw_status != 0xff) - fw_status &= IONIC_FW_STS_F_RUNNING; /* use only the run bit */ + fw_status_ready = (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING); /* is this a transition? */ - if (fw_status != idev->last_fw_status && - idev->last_fw_status != 0xff) { + if (fw_status_ready != idev->fw_status_ready) { struct ionic_lif *lif = ionic->lif; bool trigger = false; - if (!fw_status || fw_status == 0xff) { + idev->fw_status_ready = fw_status_ready; + + if (!fw_status_ready) { dev_info(ionic->dev, "FW stopped %u\n", fw_status); if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) trigger = true; @@ -141,44 +165,47 @@ int ionic_heartbeat_check(struct ionic *ionic) struct ionic_deferred_work *work; work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) { - dev_err(ionic->dev, "LIF reset trigger dropped\n"); - } else { + if (work) { work->type = IONIC_DW_TYPE_LIF_RESET; - if (fw_status & IONIC_FW_STS_F_RUNNING && - fw_status != 0xff) - work->fw_status = 1; + work->fw_status = fw_status_ready; ionic_lif_deferred_enqueue(&lif->deferred, work); } } } - idev->last_fw_status = fw_status; - if (!fw_status || fw_status == 0xff) + if (!fw_status_ready) return -ENXIO; - /* early FW has no heartbeat, else FW will return non-zero */ - hb = ioread32(&idev->dev_info_regs->fw_heartbeat); - if (!hb) + /* wait at least one watchdog period since the last heartbeat */ + last_check_time = idev->last_hb_time; + if (time_before(check_time, last_check_time + ionic->watchdog_period)) return 0; - /* are we stalled? */ - if (hb == idev->last_hb) { - /* only complain once for each stall seen */ - if (idev->last_hb_time != 1) { - dev_info(ionic->dev, "FW heartbeat stalled at %d\n", - idev->last_hb); - idev->last_hb_time = 1; - } + fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat); + fw_hb_ready = fw_hb != idev->last_fw_hb; - return -ENXIO; + /* early FW version had no heartbeat, so fake it */ + if (!fw_hb_ready && !fw_hb) + fw_hb_ready = true; + + dev_dbg(ionic->dev, "%s: fw_hb %u last_fw_hb %u ready %u\n", + __func__, fw_hb, idev->last_fw_hb, fw_hb_ready); + + idev->last_fw_hb = fw_hb; + + /* log a transition */ + if (fw_hb_ready != idev->fw_hb_ready) { + idev->fw_hb_ready = fw_hb_ready; + if (!fw_hb_ready) + dev_info(ionic->dev, "FW heartbeat stalled at %d\n", fw_hb); + else + dev_info(ionic->dev, "FW heartbeat restored at %d\n", fw_hb); } - if (idev->last_hb_time == 1) - dev_info(ionic->dev, "FW heartbeat restored at %d\n", hb); + if (!fw_hb_ready) + return -ENXIO; - idev->last_hb = hb; - idev->last_hb_time = hb_time; + idev->last_hb_time = check_time; return 0; } @@ -585,9 +612,9 @@ void ionic_q_sg_map(struct ionic_queue *q, void *base, dma_addr_t base_pa) void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb, void *cb_arg) { - struct device *dev = q->lif->ionic->dev; struct ionic_desc_info *desc_info; struct ionic_lif *lif = q->lif; + struct device *dev = q->dev; desc_info = &q->info[q->head_idx]; desc_info->cb = cb; @@ -629,7 +656,7 @@ void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info, /* stop index must be for a descriptor that is not yet completed */ if (unlikely(!ionic_q_is_posted(q, stop_index))) - dev_err(q->lif->ionic->dev, + dev_err(q->dev, "ionic stop is not posted %s stop %u tail %u head %u\n", q->name, stop_index, q->tail_idx, q->head_idx); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index 690768ff0143..c25cf9b744c5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -4,6 +4,7 @@ #ifndef _IONIC_DEV_H_ #define _IONIC_DEV_H_ +#include <linux/atomic.h> #include <linux/mutex.h> #include <linux/workqueue.h> @@ -58,6 +59,7 @@ static_assert(sizeof(struct ionic_dev_getattr_cmd) == 64); static_assert(sizeof(struct ionic_dev_getattr_comp) == 16); static_assert(sizeof(struct ionic_dev_setattr_cmd) == 64); static_assert(sizeof(struct ionic_dev_setattr_comp) == 16); +static_assert(sizeof(struct ionic_lif_setphc_cmd) == 64); /* Port commands */ static_assert(sizeof(struct ionic_port_identify_cmd) == 64); @@ -134,10 +136,13 @@ struct ionic_devinfo { struct ionic_dev { union ionic_dev_info_regs __iomem *dev_info_regs; union ionic_dev_cmd_regs __iomem *dev_cmd_regs; + struct ionic_hwstamp_regs __iomem *hwstamp_regs; + atomic_long_t last_check_time; unsigned long last_hb_time; - u32 last_hb; - u8 last_fw_status; + u32 last_fw_hb; + bool fw_hb_ready; + bool fw_status_ready; u64 __iomem *db_pages; dma_addr_t phy_db_pages; @@ -170,11 +175,20 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q, struct ionic_desc_info *desc_info, struct ionic_cq_info *cq_info, void *cb_arg); -struct ionic_page_info { +#define IONIC_PAGE_SIZE PAGE_SIZE +#define IONIC_PAGE_SPLIT_SZ (PAGE_SIZE / 2) +#define IONIC_PAGE_GFP_MASK (GFP_ATOMIC | __GFP_NOWARN |\ + __GFP_COMP | __GFP_MEMALLOC) + +struct ionic_buf_info { struct page *page; dma_addr_t dma_addr; + u32 page_offset; + u32 len; }; +#define IONIC_MAX_FRAGS (1 + IONIC_TX_MAX_SG_ELEMS_V1) + struct ionic_desc_info { union { void *desc; @@ -187,8 +201,9 @@ struct ionic_desc_info { struct ionic_txq_sg_desc *txq_sg_desc; struct ionic_rxq_sg_desc *rxq_sgl_desc; }; - unsigned int npages; - struct ionic_page_info pages[IONIC_RX_MAX_SG_ELEMS + 1]; + unsigned int bytes; + unsigned int nbufs; + struct ionic_buf_info bufs[IONIC_MAX_FRAGS]; ionic_desc_cb cb; void *cb_arg; }; @@ -199,10 +214,13 @@ struct ionic_queue { struct device *dev; struct ionic_lif *lif; struct ionic_desc_info *info; + u64 dbval; u16 head_idx; u16 tail_idx; unsigned int index; unsigned int num_descs; + unsigned int max_sg_elems; + u64 features; u64 dbell_count; u64 stop; u64 wake; @@ -211,7 +229,6 @@ struct ionic_queue { unsigned int type; unsigned int hw_index; unsigned int hw_type; - u64 dbval; union { void *base; struct ionic_txq_desc *txq; @@ -229,7 +246,7 @@ struct ionic_queue { unsigned int sg_desc_size; unsigned int pid; char name[IONIC_QUEUE_NAME_MAX_SZ]; -}; +} ____cacheline_aligned_in_smp; #define IONIC_INTR_INDEX_NOT_ASSIGNED -1 #define IONIC_INTR_NAME_MAX_SZ 32 @@ -256,7 +273,7 @@ struct ionic_cq { u64 compl_count; void *base; dma_addr_t base_pa; -}; +} ____cacheline_aligned_in_smp; struct ionic; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 0832bedcb3b4..6583be570e45 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -29,11 +29,9 @@ static void ionic_get_stats_strings(struct ionic_lif *lif, u8 *buf) static void ionic_get_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *buf) { - struct ionic_lif *lif; + struct ionic_lif *lif = netdev_priv(netdev); u32 i; - lif = netdev_priv(netdev); - memset(buf, 0, stats->n_stats * sizeof(*buf)); for (i = 0; i < ionic_num_stats_grps; i++) ionic_stats_groups[i].get_values(lif, &buf); @@ -209,6 +207,14 @@ static int ionic_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_add_link_mode(ks, supported, 10000baseER_Full); break; + case IONIC_XCVR_PID_SFP_10GBASE_T: + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); + break; + case IONIC_XCVR_PID_SFP_1000BASE_T: + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); + break; case IONIC_XCVR_PID_UNKNOWN: /* This means there's no module plugged in */ break; @@ -264,12 +270,10 @@ static int ionic_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *ks) { struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_dev *idev = &lif->ionic->idev; struct ionic *ionic = lif->ionic; - struct ionic_dev *idev; int err = 0; - idev = &lif->ionic->idev; - /* set autoneg */ if (ks->base.autoneg != idev->port_info->config.an_enable) { mutex_lock(&ionic->dev_cmd_lock); @@ -845,6 +849,98 @@ static int ionic_get_module_eeprom(struct net_device *netdev, return 0; } +static int ionic_get_ts_info(struct net_device *netdev, + struct ethtool_ts_info *info) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic *ionic = lif->ionic; + __le64 mask; + + if (!lif->phc || !lif->phc->ptp) + return ethtool_op_get_ts_info(netdev, info); + + info->phc_index = ptp_clock_index(lif->phc->ptp); + + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + /* tx modes */ + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); + + mask = cpu_to_le64(BIT_ULL(IONIC_TXSTAMP_ONESTEP_SYNC)); + if (ionic->ident.lif.eth.hwstamp_tx_modes & mask) + info->tx_types |= BIT(HWTSTAMP_TX_ONESTEP_SYNC); + + mask = cpu_to_le64(BIT_ULL(IONIC_TXSTAMP_ONESTEP_P2P)); + if (ionic->ident.lif.eth.hwstamp_tx_modes & mask) + info->tx_types |= BIT(HWTSTAMP_TX_ONESTEP_P2P); + + /* rx filters */ + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + mask = cpu_to_le64(IONIC_PKT_CLS_NTP_ALL); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_NTP_ALL); + + mask = cpu_to_le64(IONIC_PKT_CLS_PTP1_SYNC); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC); + + mask = cpu_to_le64(IONIC_PKT_CLS_PTP1_DREQ); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ); + + mask = cpu_to_le64(IONIC_PKT_CLS_PTP1_ALL); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_EVENT); + + mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L4_SYNC); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L4_SYNC); + + mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L4_DREQ); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ); + + mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L4_ALL); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT); + + mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L2_SYNC); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC); + + mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L2_DREQ); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ); + + mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_L2_ALL); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT); + + mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_SYNC); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_SYNC); + + mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_DREQ); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_DELAY_REQ); + + mask = cpu_to_le64(IONIC_PKT_CLS_PTP2_ALL); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) == mask) + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V2_EVENT); + + return 0; +} + static int ionic_nway_reset(struct net_device *netdev) { struct ionic_lif *lif = netdev_priv(netdev); @@ -902,6 +998,7 @@ static const struct ethtool_ops ionic_ethtool_ops = { .set_pauseparam = ionic_set_pauseparam, .get_fecparam = ionic_get_fecparam, .set_fecparam = ionic_set_fecparam, + .get_ts_info = ionic_get_ts_info, .nway_reset = ionic_nway_reset, }; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 31ccfcdc2b0a..0478b48d9895 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -34,6 +34,7 @@ enum ionic_cmd_opcode { IONIC_CMD_LIF_RESET = 22, IONIC_CMD_LIF_GETATTR = 23, IONIC_CMD_LIF_SETATTR = 24, + IONIC_CMD_LIF_SETPHC = 25, IONIC_CMD_RX_MODE_SET = 30, IONIC_CMD_RX_FILTER_ADD = 31, @@ -269,6 +270,9 @@ union ionic_drv_identity { * value in usecs to device units using: * device units = usecs * mult / div * @eq_count: Number of shared event queues + * @hwstamp_mask: Bitmask for subtraction of hardware tick values. + * @hwstamp_mult: Hardware tick to nanosecond multiplier. + * @hwstamp_shift: Hardware tick to nanosecond divisor (power of two). */ union ionic_dev_identity { struct { @@ -283,6 +287,9 @@ union ionic_dev_identity { __le32 intr_coal_mult; __le32 intr_coal_div; __le32 eq_count; + __le64 hwstamp_mask; + __le32 hwstamp_mult; + __le32 hwstamp_shift; }; __le32 words[478]; }; @@ -320,7 +327,7 @@ struct ionic_lif_identify_comp { /** * enum ionic_lif_capability - LIF capabilities * @IONIC_LIF_CAP_ETH: LIF supports Ethernet - * @IONIC_LIF_CAP_RDMA: LIF support RDMA + * @IONIC_LIF_CAP_RDMA: LIF supports RDMA */ enum ionic_lif_capability { IONIC_LIF_CAP_ETH = BIT(0), @@ -346,6 +353,68 @@ enum ionic_logical_qtype { }; /** + * enum ionic_q_feature - Common Features for most queue types + * + * Common features use bits 0-15. Per-queue-type features use higher bits. + * + * @IONIC_QIDENT_F_CQ: Queue has completion ring + * @IONIC_QIDENT_F_SG: Queue has scatter/gather ring + * @IONIC_QIDENT_F_EQ: Queue can use event queue + * @IONIC_QIDENT_F_CMB: Queue is in cmb bar + * @IONIC_Q_F_2X_DESC: Double main descriptor size + * @IONIC_Q_F_2X_CQ_DESC: Double cq descriptor size + * @IONIC_Q_F_2X_SG_DESC: Double sg descriptor size + * @IONIC_Q_F_4X_DESC: Quadruple main descriptor size + * @IONIC_Q_F_4X_CQ_DESC: Quadruple cq descriptor size + * @IONIC_Q_F_4X_SG_DESC: Quadruple sg descriptor size + */ +enum ionic_q_feature { + IONIC_QIDENT_F_CQ = BIT_ULL(0), + IONIC_QIDENT_F_SG = BIT_ULL(1), + IONIC_QIDENT_F_EQ = BIT_ULL(2), + IONIC_QIDENT_F_CMB = BIT_ULL(3), + IONIC_Q_F_2X_DESC = BIT_ULL(4), + IONIC_Q_F_2X_CQ_DESC = BIT_ULL(5), + IONIC_Q_F_2X_SG_DESC = BIT_ULL(6), + IONIC_Q_F_4X_DESC = BIT_ULL(7), + IONIC_Q_F_4X_CQ_DESC = BIT_ULL(8), + IONIC_Q_F_4X_SG_DESC = BIT_ULL(9), +}; + +/** + * enum ionic_rxq_feature - RXQ-specific Features + * + * Per-queue-type features use bits 16 and higher. + * + * @IONIC_RXQ_F_HWSTAMP: Queue supports Hardware Timestamping + */ +enum ionic_rxq_feature { + IONIC_RXQ_F_HWSTAMP = BIT_ULL(16), +}; + +/** + * enum ionic_txq_feature - TXQ-specific Features + * + * Per-queue-type features use bits 16 and higher. + * + * @IONIC_TXQ_F_HWSTAMP: Queue supports Hardware Timestamping + */ +enum ionic_txq_feature { + IONIC_TXQ_F_HWSTAMP = BIT(16), +}; + +/** + * struct ionic_hwstamp_bits - Hardware timestamp decoding bits + * @IONIC_HWSTAMP_INVALID: Invalid hardware timestamp value + * @IONIC_HWSTAMP_CQ_NEGOFFSET: Timestamp field negative offset + * from the base cq descriptor. + */ +enum ionic_hwstamp_bits { + IONIC_HWSTAMP_INVALID = ~0ull, + IONIC_HWSTAMP_CQ_NEGOFFSET = 8, +}; + +/** * struct ionic_lif_logical_qtype - Descriptor of logical to HW queue type * @qtype: Hardware Queue Type * @qid_count: Number of Queue IDs of the logical type @@ -404,7 +473,9 @@ union ionic_lif_config { * @max_ucast_filters: Number of perfect unicast addresses supported * @max_mcast_filters: Number of perfect multicast addresses supported * @min_frame_size: Minimum size of frames to be sent - * @max_frame_size: Maximim size of frames to be sent + * @max_frame_size: Maximum size of frames to be sent + * @hwstamp_tx_modes: Bitmask of BIT_ULL(enum ionic_txstamp_mode) + * @hwstamp_rx_filters: Bitmask of enum ionic_pkt_class * @config: LIF config struct with features, mtu, mac, q counts * * @rdma: RDMA identify structure @@ -438,7 +509,10 @@ union ionic_lif_identity { __le16 rss_ind_tbl_sz; __le32 min_frame_size; __le32 max_frame_size; - u8 rsvd2[106]; + u8 rsvd2[2]; + __le64 hwstamp_tx_modes; + __le64 hwstamp_rx_filters; + u8 rsvd3[88]; union ionic_lif_config config; } __packed eth; @@ -529,7 +603,7 @@ struct ionic_q_identify_comp { * union ionic_q_identity - queue identity information * @version: Queue type version that can be used with FW * @supported: Bitfield of queue versions, first bit = ver 0 - * @features: Queue features + * @features: Queue features (enum ionic_q_feature, etc) * @desc_sz: Descriptor size * @comp_sz: Completion descriptor size * @sg_desc_sz: Scatter/Gather descriptor size @@ -541,10 +615,6 @@ union ionic_q_identity { u8 version; u8 supported; u8 rsvd[6]; -#define IONIC_QIDENT_F_CQ 0x01 /* queue has completion ring */ -#define IONIC_QIDENT_F_SG 0x02 /* queue has scatter/gather ring */ -#define IONIC_QIDENT_F_EQ 0x04 /* queue can use event queue */ -#define IONIC_QIDENT_F_CMB 0x08 /* queue is in cmb bar */ __le64 features; __le16 desc_sz; __le16 comp_sz; @@ -585,6 +655,7 @@ union ionic_q_identity { * @ring_base: Queue ring base address * @cq_ring_base: Completion queue ring base address * @sg_ring_base: Scatter/Gather ring base address + * @features: Mask of queue features to enable, if not in the flags above. */ struct ionic_q_init_cmd { u8 opcode; @@ -608,7 +679,8 @@ struct ionic_q_init_cmd { __le64 ring_base; __le64 cq_ring_base; __le64 sg_ring_base; - u8 rsvd2[20]; + u8 rsvd2[12]; + __le64 features; } __packed; /** @@ -692,7 +764,7 @@ enum ionic_txq_desc_opcode { * checksums are also updated. * * IONIC_TXQ_DESC_OPCODE_TSO: - * Device preforms TCP segmentation offload + * Device performs TCP segmentation offload * (TSO). @hdr_len is the number of bytes * to the end of TCP header (the offset to * the TCP payload). @mss is the desired @@ -982,13 +1054,13 @@ struct ionic_rxq_comp { }; enum ionic_pkt_type { - IONIC_PKT_TYPE_NON_IP = 0x000, - IONIC_PKT_TYPE_IPV4 = 0x001, - IONIC_PKT_TYPE_IPV4_TCP = 0x003, - IONIC_PKT_TYPE_IPV4_UDP = 0x005, - IONIC_PKT_TYPE_IPV6 = 0x008, - IONIC_PKT_TYPE_IPV6_TCP = 0x018, - IONIC_PKT_TYPE_IPV6_UDP = 0x028, + IONIC_PKT_TYPE_NON_IP = 0x00, + IONIC_PKT_TYPE_IPV4 = 0x01, + IONIC_PKT_TYPE_IPV4_TCP = 0x03, + IONIC_PKT_TYPE_IPV4_UDP = 0x05, + IONIC_PKT_TYPE_IPV6 = 0x08, + IONIC_PKT_TYPE_IPV6_TCP = 0x18, + IONIC_PKT_TYPE_IPV6_UDP = 0x28, /* below types are only used if encap offloads are enabled on lif */ IONIC_PKT_TYPE_ENCAP_NON_IP = 0x40, IONIC_PKT_TYPE_ENCAP_IPV4 = 0x41, @@ -1019,7 +1091,64 @@ enum ionic_eth_hw_features { IONIC_ETH_HW_TSO_UDP_CSUM = BIT(16), IONIC_ETH_HW_RX_CSUM_GENEVE = BIT(17), IONIC_ETH_HW_TX_CSUM_GENEVE = BIT(18), - IONIC_ETH_HW_TSO_GENEVE = BIT(19) + IONIC_ETH_HW_TSO_GENEVE = BIT(19), + IONIC_ETH_HW_TIMESTAMP = BIT(20), +}; + +/** + * enum ionic_pkt_class - Packet classification mask. + * + * Used with rx steering filter, packets indicated by the mask can be steered + * toward a specific receive queue. + * + * @IONIC_PKT_CLS_NTP_ALL: All NTP packets. + * @IONIC_PKT_CLS_PTP1_SYNC: PTPv1 sync + * @IONIC_PKT_CLS_PTP1_DREQ: PTPv1 delay-request + * @IONIC_PKT_CLS_PTP1_ALL: PTPv1 all packets + * @IONIC_PKT_CLS_PTP2_L4_SYNC: PTPv2-UDP sync + * @IONIC_PKT_CLS_PTP2_L4_DREQ: PTPv2-UDP delay-request + * @IONIC_PKT_CLS_PTP2_L4_ALL: PTPv2-UDP all packets + * @IONIC_PKT_CLS_PTP2_L2_SYNC: PTPv2-ETH sync + * @IONIC_PKT_CLS_PTP2_L2_DREQ: PTPv2-ETH delay-request + * @IONIC_PKT_CLS_PTP2_L2_ALL: PTPv2-ETH all packets + * @IONIC_PKT_CLS_PTP2_SYNC: PTPv2 sync + * @IONIC_PKT_CLS_PTP2_DREQ: PTPv2 delay-request + * @IONIC_PKT_CLS_PTP2_ALL: PTPv2 all packets + * @IONIC_PKT_CLS_PTP_SYNC: PTP sync + * @IONIC_PKT_CLS_PTP_DREQ: PTP delay-request + * @IONIC_PKT_CLS_PTP_ALL: PTP all packets + */ +enum ionic_pkt_class { + IONIC_PKT_CLS_NTP_ALL = BIT(0), + + IONIC_PKT_CLS_PTP1_SYNC = BIT(1), + IONIC_PKT_CLS_PTP1_DREQ = BIT(2), + IONIC_PKT_CLS_PTP1_ALL = BIT(3) | + IONIC_PKT_CLS_PTP1_SYNC | IONIC_PKT_CLS_PTP1_DREQ, + + IONIC_PKT_CLS_PTP2_L4_SYNC = BIT(4), + IONIC_PKT_CLS_PTP2_L4_DREQ = BIT(5), + IONIC_PKT_CLS_PTP2_L4_ALL = BIT(6) | + IONIC_PKT_CLS_PTP2_L4_SYNC | IONIC_PKT_CLS_PTP2_L4_DREQ, + + IONIC_PKT_CLS_PTP2_L2_SYNC = BIT(7), + IONIC_PKT_CLS_PTP2_L2_DREQ = BIT(8), + IONIC_PKT_CLS_PTP2_L2_ALL = BIT(9) | + IONIC_PKT_CLS_PTP2_L2_SYNC | IONIC_PKT_CLS_PTP2_L2_DREQ, + + IONIC_PKT_CLS_PTP2_SYNC = + IONIC_PKT_CLS_PTP2_L4_SYNC | IONIC_PKT_CLS_PTP2_L2_SYNC, + IONIC_PKT_CLS_PTP2_DREQ = + IONIC_PKT_CLS_PTP2_L4_DREQ | IONIC_PKT_CLS_PTP2_L2_DREQ, + IONIC_PKT_CLS_PTP2_ALL = + IONIC_PKT_CLS_PTP2_L4_ALL | IONIC_PKT_CLS_PTP2_L2_ALL, + + IONIC_PKT_CLS_PTP_SYNC = + IONIC_PKT_CLS_PTP1_SYNC | IONIC_PKT_CLS_PTP2_SYNC, + IONIC_PKT_CLS_PTP_DREQ = + IONIC_PKT_CLS_PTP1_DREQ | IONIC_PKT_CLS_PTP2_DREQ, + IONIC_PKT_CLS_PTP_ALL = + IONIC_PKT_CLS_PTP1_ALL | IONIC_PKT_CLS_PTP2_ALL, }; /** @@ -1111,6 +1240,8 @@ enum ionic_xcvr_pid { IONIC_XCVR_PID_QSFP_100G_CWDM4 = 69, IONIC_XCVR_PID_QSFP_100G_PSM4 = 70, IONIC_XCVR_PID_SFP_25GBASE_ACC = 71, + IONIC_XCVR_PID_SFP_10GBASE_T = 72, + IONIC_XCVR_PID_SFP_1000BASE_T = 73, }; /** @@ -1327,11 +1458,25 @@ enum ionic_stats_ctl_cmd { }; /** + * enum ionic_txstamp_mode - List of TX Timestamping Modes + * @IONIC_TXSTAMP_OFF: Disable TX hardware timetamping. + * @IONIC_TXSTAMP_ON: Enable local TX hardware timetamping. + * @IONIC_TXSTAMP_ONESTEP_SYNC: Modify TX PTP Sync packets. + * @IONIC_TXSTAMP_ONESTEP_P2P: Modify TX PTP Sync and PDelayResp. + */ +enum ionic_txstamp_mode { + IONIC_TXSTAMP_OFF = 0, + IONIC_TXSTAMP_ON = 1, + IONIC_TXSTAMP_ONESTEP_SYNC = 2, + IONIC_TXSTAMP_ONESTEP_P2P = 3, +}; + +/** * enum ionic_port_attr - List of device attributes * @IONIC_PORT_ATTR_STATE: Port state attribute * @IONIC_PORT_ATTR_SPEED: Port speed attribute * @IONIC_PORT_ATTR_MTU: Port MTU attribute - * @IONIC_PORT_ATTR_AUTONEG: Port autonegotation attribute + * @IONIC_PORT_ATTR_AUTONEG: Port autonegotiation attribute * @IONIC_PORT_ATTR_FEC: Port FEC attribute * @IONIC_PORT_ATTR_PAUSE: Port pause attribute * @IONIC_PORT_ATTR_LOOPBACK: Port loopback attribute @@ -1568,6 +1713,7 @@ enum ionic_rss_hash_types { * @IONIC_LIF_ATTR_FEATURES: LIF features attribute * @IONIC_LIF_ATTR_RSS: LIF RSS attribute * @IONIC_LIF_ATTR_STATS_CTRL: LIF statistics control attribute + * @IONIC_LIF_ATTR_TXSTAMP: LIF TX timestamping mode */ enum ionic_lif_attr { IONIC_LIF_ATTR_STATE = 0, @@ -1577,6 +1723,7 @@ enum ionic_lif_attr { IONIC_LIF_ATTR_FEATURES = 4, IONIC_LIF_ATTR_RSS = 5, IONIC_LIF_ATTR_STATS_CTRL = 6, + IONIC_LIF_ATTR_TXSTAMP = 7, }; /** @@ -1594,6 +1741,7 @@ enum ionic_lif_attr { * @key: The hash secret key * @addr: Address for the indirection table shared memory * @stats_ctl: stats control commands (enum ionic_stats_ctl_cmd) + * @txstamp: TX Timestamping Mode (enum ionic_txstamp_mode) */ struct ionic_lif_setattr_cmd { u8 opcode; @@ -1612,6 +1760,7 @@ struct ionic_lif_setattr_cmd { __le64 addr; } rss; u8 stats_ctl; + __le16 txstamp_mode; u8 rsvd[60]; } __packed; }; @@ -1656,6 +1805,7 @@ struct ionic_lif_getattr_cmd { * @mtu: Mtu * @mac: Station mac * @features: Features (enum ionic_eth_hw_features) + * @txstamp: TX Timestamping Mode (enum ionic_txstamp_mode) * @color: Color bit */ struct ionic_lif_getattr_comp { @@ -1667,11 +1817,35 @@ struct ionic_lif_getattr_comp { __le32 mtu; u8 mac[6]; __le64 features; + __le16 txstamp_mode; u8 rsvd2[11]; } __packed; u8 color; }; +/** + * struct ionic_lif_setphc_cmd - Set LIF PTP Hardware Clock + * @opcode: Opcode + * @lif_index: LIF index + * @tick: Hardware stamp tick of an instant in time. + * @nsec: Nanosecond stamp of the same instant. + * @frac: Fractional nanoseconds at the same instant. + * @mult: Cycle to nanosecond multiplier. + * @shift: Cycle to nanosecond divisor (power of two). + */ +struct ionic_lif_setphc_cmd { + u8 opcode; + u8 rsvd1; + __le16 lif_index; + u8 rsvd2[4]; + __le64 tick; + __le64 nsec; + __le64 frac; + __le32 mult; + __le32 shift; + u8 rsvd3[24]; +}; + enum ionic_rx_mode { IONIC_RX_MODE_F_UNICAST = BIT(0), IONIC_RX_MODE_F_MULTICAST = BIT(1), @@ -1704,9 +1878,10 @@ struct ionic_rx_mode_set_cmd { typedef struct ionic_admin_comp ionic_rx_mode_set_comp; enum ionic_rx_filter_match_type { - IONIC_RX_FILTER_MATCH_VLAN = 0, - IONIC_RX_FILTER_MATCH_MAC, - IONIC_RX_FILTER_MATCH_MAC_VLAN, + IONIC_RX_FILTER_MATCH_VLAN = 0x0, + IONIC_RX_FILTER_MATCH_MAC = 0x1, + IONIC_RX_FILTER_MATCH_MAC_VLAN = 0x2, + IONIC_RX_FILTER_STEER_PKTCLASS = 0x10, }; /** @@ -1723,6 +1898,7 @@ enum ionic_rx_filter_match_type { * @mac_vlan: MACVLAN filter * @vlan: VLAN ID * @addr: MAC address (network-byte order) + * @pkt_class: Packet classification filter */ struct ionic_rx_filter_add_cmd { u8 opcode; @@ -1741,8 +1917,9 @@ struct ionic_rx_filter_add_cmd { __le16 vlan; u8 addr[6]; } mac_vlan; + __le64 pkt_class; u8 rsvd[54]; - }; + } __packed; }; /** @@ -1951,8 +2128,8 @@ enum ionic_qos_sched_type { * @pfc_cos: Priority-Flow Control class of service * @dwrr_weight: QoS class scheduling weight * @strict_rlmt: Rate limit for strict priority scheduling - * @rw_dot1q_pcp: Rewrite dot1q pcp to this value (valid iff F_RW_DOT1Q_PCP) - * @rw_ip_dscp: Rewrite ip dscp to this value (valid iff F_RW_IP_DSCP) + * @rw_dot1q_pcp: Rewrite dot1q pcp to value (valid iff F_RW_DOT1Q_PCP) + * @rw_ip_dscp: Rewrite ip dscp to value (valid iff F_RW_IP_DSCP) * @dot1q_pcp: Dot1q pcp value * @ndscp: Number of valid dscp values in the ip_dscp field * @ip_dscp: IP dscp values @@ -2743,6 +2920,16 @@ union ionic_dev_cmd_comp { }; /** + * struct ionic_hwstamp_regs - Hardware current timestamp registers + * @tick_low: Low 32 bits of hardware timestamp + * @tick_high: High 32 bits of hardware timestamp + */ +struct ionic_hwstamp_regs { + u32 tick_low; + u32 tick_high; +}; + +/** * union ionic_dev_info_regs - Device info register format (read-only) * @signature: Signature value of 0x44455649 ('DEVI') * @version: Current version of info @@ -2752,6 +2939,7 @@ union ionic_dev_cmd_comp { * @fw_heartbeat: Firmware heartbeat counter * @serial_num: Serial number * @fw_version: Firmware version + * @hwstamp_regs: Hardware current timestamp registers */ union ionic_dev_info_regs { #define IONIC_DEVINFO_FWVERS_BUFLEN 32 @@ -2766,6 +2954,8 @@ union ionic_dev_info_regs { u32 fw_heartbeat; char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN]; char serial_num[IONIC_DEVINFO_SERIAL_BUFLEN]; + u8 rsvd_pad1024[948]; + struct ionic_hwstamp_regs hwstamp; }; u32 words[512]; }; @@ -2813,6 +3003,7 @@ union ionic_adminq_cmd { struct ionic_q_control_cmd q_control; struct ionic_lif_setattr_cmd lif_setattr; struct ionic_lif_getattr_cmd lif_getattr; + struct ionic_lif_setphc_cmd lif_setphc; struct ionic_rx_mode_set_cmd rx_mode_set; struct ionic_rx_filter_add_cmd rx_filter_add; struct ionic_rx_filter_del_cmd rx_filter_del; @@ -2829,6 +3020,7 @@ union ionic_adminq_comp { struct ionic_q_init_comp q_init; struct ionic_lif_setattr_comp lif_setattr; struct ionic_lif_getattr_comp lif_getattr; + struct ionic_admin_comp lif_setphc; struct ionic_rx_filter_add_comp rx_filter_add; struct ionic_fw_control_comp fw_control; }; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 11140915c2da..af3a5368529c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -120,23 +120,34 @@ static void ionic_link_status_check(struct ionic_lif *lif) if (!test_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state)) return; + /* Don't put carrier back up if we're in a broken state */ + if (test_bit(IONIC_LIF_F_BROKEN, lif->state)) { + clear_bit(IONIC_LIF_F_LINK_CHECK_REQUESTED, lif->state); + return; + } + link_status = le16_to_cpu(lif->info->status.link_status); link_up = link_status == IONIC_PORT_OPER_STATUS_UP; if (link_up) { - if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) { + int err = 0; + + if (netdev->flags & IFF_UP && netif_running(netdev)) { mutex_lock(&lif->queue_lock); - ionic_start_queues(lif); + err = ionic_start_queues(lif); + if (err && err != -EBUSY) { + netdev_err(lif->netdev, + "Failed to start queues: %d\n", err); + set_bit(IONIC_LIF_F_BROKEN, lif->state); + netif_carrier_off(lif->netdev); + } mutex_unlock(&lif->queue_lock); } - if (!netif_carrier_ok(netdev)) { - u32 link_speed; - + if (!err && !netif_carrier_ok(netdev)) { ionic_port_identify(lif->ionic); - link_speed = le32_to_cpu(lif->info->status.link_speed); netdev_info(netdev, "Link up - %d Gbps\n", - link_speed / 1000); + le32_to_cpu(lif->info->status.link_speed) / 1000); netif_carrier_on(netdev); } } else { @@ -145,7 +156,7 @@ static void ionic_link_status_check(struct ionic_lif *lif) netif_carrier_off(netdev); } - if (lif->netdev->flags & IFF_UP && netif_running(lif->netdev)) { + if (netdev->flags & IFF_UP && netif_running(netdev)) { mutex_lock(&lif->queue_lock); ionic_stop_queues(lif); mutex_unlock(&lif->queue_lock); @@ -382,6 +393,8 @@ static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq) static void ionic_qcqs_free(struct ionic_lif *lif) { struct device *dev = lif->ionic->dev; + struct ionic_qcq *adminqcq; + unsigned long irqflags; if (lif->notifyqcq) { ionic_qcq_free(lif, lif->notifyqcq); @@ -390,9 +403,14 @@ static void ionic_qcqs_free(struct ionic_lif *lif) } if (lif->adminqcq) { - ionic_qcq_free(lif, lif->adminqcq); - devm_kfree(dev, lif->adminqcq); + spin_lock_irqsave(&lif->adminq_lock, irqflags); + adminqcq = READ_ONCE(lif->adminqcq); lif->adminqcq = NULL; + spin_unlock_irqrestore(&lif->adminq_lock, irqflags); + if (adminqcq) { + ionic_qcq_free(lif, adminqcq); + devm_kfree(dev, adminqcq); + } } if (lif->rxqcqs) { @@ -495,6 +513,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, goto err_out; } + new->q.dev = dev; new->flags = flags; new->q.info = devm_kcalloc(dev, num_descs, sizeof(*new->q.info), @@ -506,6 +525,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, } new->q.type = type; + new->q.max_sg_elems = lif->qtype_info[type].max_sg_elems; err = ionic_q_init(lif, idev, &new->q, index, name, num_descs, desc_size, sg_desc_size, pid); @@ -656,20 +676,20 @@ static int ionic_qcqs_alloc(struct ionic_lif *lif) err = -ENOMEM; lif->txqcqs = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif, - sizeof(struct ionic_qcq *), GFP_KERNEL); + sizeof(*lif->txqcqs), GFP_KERNEL); if (!lif->txqcqs) goto err_out; lif->rxqcqs = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif, - sizeof(struct ionic_qcq *), GFP_KERNEL); + sizeof(*lif->rxqcqs), GFP_KERNEL); if (!lif->rxqcqs) goto err_out; - lif->txqstats = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif, - sizeof(struct ionic_tx_stats), GFP_KERNEL); + lif->txqstats = devm_kcalloc(dev, lif->ionic->ntxqs_per_lif + 1, + sizeof(*lif->txqstats), GFP_KERNEL); if (!lif->txqstats) goto err_out; - lif->rxqstats = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif, - sizeof(struct ionic_rx_stats), GFP_KERNEL); + lif->rxqstats = devm_kcalloc(dev, lif->ionic->nrxqs_per_lif + 1, + sizeof(*lif->rxqstats), GFP_KERNEL); if (!lif->rxqstats) goto err_out; @@ -711,15 +731,14 @@ static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq) .ring_base = cpu_to_le64(q->base_pa), .cq_ring_base = cpu_to_le64(cq->base_pa), .sg_ring_base = cpu_to_le64(q->sg_base_pa), + .features = cpu_to_le64(q->features), }, }; unsigned int intr_index; int err; - if (qcq->flags & IONIC_QCQ_F_INTR) - intr_index = qcq->intr.index; - else - intr_index = lif->rxqcqs[q->index]->intr.index; + intr_index = qcq->intr.index; + ctx.cmd.q_init.intr_index = cpu_to_le16(intr_index); dev_dbg(dev, "txq_init.pid %d\n", ctx.cmd.q_init.pid); @@ -773,6 +792,7 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq) .ring_base = cpu_to_le64(q->base_pa), .cq_ring_base = cpu_to_le64(cq->base_pa), .sg_ring_base = cpu_to_le64(q->sg_base_pa), + .features = cpu_to_le64(q->features), }, }; int err; @@ -810,6 +830,254 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq) return 0; } +int ionic_lif_create_hwstamp_txq(struct ionic_lif *lif) +{ + unsigned int num_desc, desc_sz, comp_sz, sg_desc_sz; + unsigned int txq_i, flags; + struct ionic_qcq *txq; + u64 features; + int err; + + mutex_lock(&lif->queue_lock); + + if (lif->hwstamp_txq) + goto out; + + features = IONIC_Q_F_2X_CQ_DESC | IONIC_TXQ_F_HWSTAMP; + + num_desc = IONIC_MIN_TXRX_DESC; + desc_sz = sizeof(struct ionic_txq_desc); + comp_sz = 2 * sizeof(struct ionic_txq_comp); + + if (lif->qtype_info[IONIC_QTYPE_TXQ].version >= 1 && + lif->qtype_info[IONIC_QTYPE_TXQ].sg_desc_sz == sizeof(struct ionic_txq_sg_desc_v1)) + sg_desc_sz = sizeof(struct ionic_txq_sg_desc_v1); + else + sg_desc_sz = sizeof(struct ionic_txq_sg_desc); + + txq_i = lif->ionic->ntxqs_per_lif; + flags = IONIC_QCQ_F_TX_STATS | IONIC_QCQ_F_SG; + + err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, txq_i, "hwstamp_tx", flags, + num_desc, desc_sz, comp_sz, sg_desc_sz, + lif->kern_pid, &txq); + if (err) + goto err_qcq_alloc; + + txq->q.features = features; + + ionic_link_qcq_interrupts(lif->adminqcq, txq); + ionic_debugfs_add_qcq(lif, txq); + + lif->hwstamp_txq = txq; + + if (netif_running(lif->netdev)) { + err = ionic_lif_txq_init(lif, txq); + if (err) + goto err_qcq_init; + + if (test_bit(IONIC_LIF_F_UP, lif->state)) { + err = ionic_qcq_enable(txq); + if (err) + goto err_qcq_enable; + } + } + +out: + mutex_unlock(&lif->queue_lock); + + return 0; + +err_qcq_enable: + ionic_lif_qcq_deinit(lif, txq); +err_qcq_init: + lif->hwstamp_txq = NULL; + ionic_debugfs_del_qcq(txq); + ionic_qcq_free(lif, txq); + devm_kfree(lif->ionic->dev, txq); +err_qcq_alloc: + mutex_unlock(&lif->queue_lock); + return err; +} + +int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif) +{ + unsigned int num_desc, desc_sz, comp_sz, sg_desc_sz; + unsigned int rxq_i, flags; + struct ionic_qcq *rxq; + u64 features; + int err; + + mutex_lock(&lif->queue_lock); + + if (lif->hwstamp_rxq) + goto out; + + features = IONIC_Q_F_2X_CQ_DESC | IONIC_RXQ_F_HWSTAMP; + + num_desc = IONIC_MIN_TXRX_DESC; + desc_sz = sizeof(struct ionic_rxq_desc); + comp_sz = 2 * sizeof(struct ionic_rxq_comp); + sg_desc_sz = sizeof(struct ionic_rxq_sg_desc); + + rxq_i = lif->ionic->nrxqs_per_lif; + flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_SG; + + err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, rxq_i, "hwstamp_rx", flags, + num_desc, desc_sz, comp_sz, sg_desc_sz, + lif->kern_pid, &rxq); + if (err) + goto err_qcq_alloc; + + rxq->q.features = features; + + ionic_link_qcq_interrupts(lif->adminqcq, rxq); + ionic_debugfs_add_qcq(lif, rxq); + + lif->hwstamp_rxq = rxq; + + if (netif_running(lif->netdev)) { + err = ionic_lif_rxq_init(lif, rxq); + if (err) + goto err_qcq_init; + + if (test_bit(IONIC_LIF_F_UP, lif->state)) { + ionic_rx_fill(&rxq->q); + err = ionic_qcq_enable(rxq); + if (err) + goto err_qcq_enable; + } + } + +out: + mutex_unlock(&lif->queue_lock); + + return 0; + +err_qcq_enable: + ionic_lif_qcq_deinit(lif, rxq); +err_qcq_init: + lif->hwstamp_rxq = NULL; + ionic_debugfs_del_qcq(rxq); + ionic_qcq_free(lif, rxq); + devm_kfree(lif->ionic->dev, rxq); +err_qcq_alloc: + mutex_unlock(&lif->queue_lock); + return err; +} + +int ionic_lif_config_hwstamp_rxq_all(struct ionic_lif *lif, bool rx_all) +{ + struct ionic_queue_params qparam; + + ionic_init_queue_params(lif, &qparam); + + if (rx_all) + qparam.rxq_features = IONIC_Q_F_2X_CQ_DESC | IONIC_RXQ_F_HWSTAMP; + else + qparam.rxq_features = 0; + + /* if we're not running, just set the values and return */ + if (!netif_running(lif->netdev)) { + lif->rxq_features = qparam.rxq_features; + return 0; + } + + return ionic_reconfigure_queues(lif, &qparam); +} + +int ionic_lif_set_hwstamp_txmode(struct ionic_lif *lif, u16 txstamp_mode) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.lif_setattr = { + .opcode = IONIC_CMD_LIF_SETATTR, + .index = cpu_to_le16(lif->index), + .attr = IONIC_LIF_ATTR_TXSTAMP, + .txstamp_mode = cpu_to_le16(txstamp_mode), + }, + }; + + return ionic_adminq_post_wait(lif, &ctx); +} + +static void ionic_lif_del_hwstamp_rxfilt(struct ionic_lif *lif) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_filter_del = { + .opcode = IONIC_CMD_RX_FILTER_DEL, + .lif_index = cpu_to_le16(lif->index), + }, + }; + struct ionic_rx_filter *f; + u32 filter_id; + int err; + + spin_lock_bh(&lif->rx_filters.lock); + + f = ionic_rx_filter_rxsteer(lif); + if (!f) { + spin_unlock_bh(&lif->rx_filters.lock); + return; + } + + filter_id = f->filter_id; + ionic_rx_filter_free(lif, f); + + spin_unlock_bh(&lif->rx_filters.lock); + + netdev_dbg(lif->netdev, "rx_filter del RXSTEER (id %d)\n", filter_id); + + ctx.cmd.rx_filter_del.filter_id = cpu_to_le32(filter_id); + + err = ionic_adminq_post_wait(lif, &ctx); + if (err && err != -EEXIST) + netdev_dbg(lif->netdev, "failed to delete rx_filter RXSTEER (id %d)\n", filter_id); +} + +static int ionic_lif_add_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class) +{ + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_filter_add = { + .opcode = IONIC_CMD_RX_FILTER_ADD, + .lif_index = cpu_to_le16(lif->index), + .match = cpu_to_le16(IONIC_RX_FILTER_STEER_PKTCLASS), + .pkt_class = cpu_to_le64(pkt_class), + }, + }; + u8 qtype; + u32 qid; + int err; + + if (!lif->hwstamp_rxq) + return -EINVAL; + + qtype = lif->hwstamp_rxq->q.type; + ctx.cmd.rx_filter_add.qtype = qtype; + + qid = lif->hwstamp_rxq->q.index; + ctx.cmd.rx_filter_add.qid = cpu_to_le32(qid); + + netdev_dbg(lif->netdev, "rx_filter add RXSTEER\n"); + err = ionic_adminq_post_wait(lif, &ctx); + if (err && err != -EEXIST) + return err; + + return ionic_rx_filter_save(lif, 0, qid, 0, &ctx); +} + +int ionic_lif_set_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class) +{ + ionic_lif_del_hwstamp_rxfilt(lif); + + if (!pkt_class) + return 0; + + return ionic_lif_add_hwstamp_rxfilt(lif, pkt_class); +} + static bool ionic_notifyq_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) { @@ -837,7 +1105,7 @@ static bool ionic_notifyq_service(struct ionic_cq *cq, switch (le16_to_cpu(comp->event.ecode)) { case IONIC_EVENT_LINK_CHANGE: - ionic_link_status_check_request(lif, false); + ionic_link_status_check_request(lif, CAN_NOT_SLEEP); break; case IONIC_EVENT_RESET: work = kzalloc(sizeof(*work), GFP_ATOMIC); @@ -875,30 +1143,43 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget) struct ionic_intr_info *intr = napi_to_cq(napi)->bound_intr; struct ionic_lif *lif = napi_to_cq(napi)->lif; struct ionic_dev *idev = &lif->ionic->idev; + unsigned long irqflags; unsigned int flags = 0; + int rx_work = 0; + int tx_work = 0; int n_work = 0; int a_work = 0; int work_done; + int credits; if (lif->notifyqcq && lif->notifyqcq->flags & IONIC_QCQ_F_INITED) n_work = ionic_cq_service(&lif->notifyqcq->cq, budget, ionic_notifyq_service, NULL, NULL); + spin_lock_irqsave(&lif->adminq_lock, irqflags); if (lif->adminqcq && lif->adminqcq->flags & IONIC_QCQ_F_INITED) a_work = ionic_cq_service(&lif->adminqcq->cq, budget, ionic_adminq_service, NULL, NULL); + spin_unlock_irqrestore(&lif->adminq_lock, irqflags); + + if (lif->hwstamp_rxq) + rx_work = ionic_cq_service(&lif->hwstamp_rxq->cq, budget, + ionic_rx_service, NULL, NULL); + + if (lif->hwstamp_txq) + tx_work = ionic_cq_service(&lif->hwstamp_txq->cq, budget, + ionic_tx_service, NULL, NULL); - work_done = max(n_work, a_work); + work_done = max(max(n_work, a_work), max(rx_work, tx_work)); if (work_done < budget && napi_complete_done(napi, work_done)) { flags |= IONIC_INTR_CRED_UNMASK; - lif->adminqcq->cq.bound_intr->rearm_count++; + intr->rearm_count++; } if (work_done || flags) { flags |= IONIC_INTR_CRED_RESET_COALESCE; - ionic_intr_credits(idev->intr_ctrl, - intr->index, - n_work + a_work, flags); + credits = n_work + a_work + rx_work + tx_work; + ionic_intr_credits(idev->intr_ctrl, intr->index, credits, flags); } return work_done; @@ -1258,6 +1539,10 @@ static int ionic_set_nic_features(struct ionic_lif *lif, int err; ctx.cmd.lif_setattr.features = ionic_netdev_features_to_nic(features); + + if (lif->phc) + ctx.cmd.lif_setattr.features |= cpu_to_le64(IONIC_ETH_HW_TIMESTAMP); + err = ionic_adminq_post_wait(lif, &ctx); if (err) return err; @@ -1305,6 +1590,8 @@ static int ionic_set_nic_features(struct ionic_lif *lif, dev_dbg(dev, "feature ETH_HW_TSO_UDP\n"); if (lif->hw_features & IONIC_ETH_HW_TSO_UDP_CSUM) dev_dbg(dev, "feature ETH_HW_TSO_UDP_CSUM\n"); + if (lif->hw_features & IONIC_ETH_HW_TIMESTAMP) + dev_dbg(dev, "feature ETH_HW_TIMESTAMP\n"); return 0; } @@ -1441,7 +1728,7 @@ static int ionic_start_queues_reconfig(struct ionic_lif *lif) */ err = ionic_txrx_init(lif); mutex_unlock(&lif->queue_lock); - ionic_link_status_check_request(lif, true); + ionic_link_status_check_request(lif, CAN_SLEEP); netif_device_attach(lif->netdev); return err; @@ -1480,7 +1767,8 @@ static void ionic_tx_timeout_work(struct work_struct *ws) { struct ionic_lif *lif = container_of(ws, struct ionic_lif, tx_timeout_work); - netdev_info(lif->netdev, "Tx Timeout recovery\n"); + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + return; /* if we were stopped before this scheduled job was launched, * don't bother the queues as they are already stopped. @@ -1496,6 +1784,7 @@ static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ionic_lif *lif = netdev_priv(netdev); + netdev_info(lif->netdev, "Tx Timeout triggered - txq %d\n", txqueue); schedule_work(&lif->tx_timeout_work); } @@ -1645,11 +1934,17 @@ static void ionic_txrx_disable(struct ionic_lif *lif) err = ionic_qcq_disable(lif->txqcqs[i], (err != -ETIMEDOUT)); } + if (lif->hwstamp_txq) + err = ionic_qcq_disable(lif->hwstamp_txq, (err != -ETIMEDOUT)); + if (lif->rxqcqs) { for (i = 0; i < lif->nxqs; i++) err = ionic_qcq_disable(lif->rxqcqs[i], (err != -ETIMEDOUT)); } + if (lif->hwstamp_rxq) + err = ionic_qcq_disable(lif->hwstamp_rxq, (err != -ETIMEDOUT)); + ionic_lif_quiesce(lif); } @@ -1672,6 +1967,17 @@ static void ionic_txrx_deinit(struct ionic_lif *lif) } } lif->rx_mode = 0; + + if (lif->hwstamp_txq) { + ionic_lif_qcq_deinit(lif, lif->hwstamp_txq); + ionic_tx_flush(&lif->hwstamp_txq->cq); + ionic_tx_empty(&lif->hwstamp_txq->q); + } + + if (lif->hwstamp_rxq) { + ionic_lif_qcq_deinit(lif, lif->hwstamp_rxq); + ionic_rx_empty(&lif->hwstamp_rxq->q); + } } static void ionic_txrx_free(struct ionic_lif *lif) @@ -1693,15 +1999,30 @@ static void ionic_txrx_free(struct ionic_lif *lif) lif->rxqcqs[i] = NULL; } } + + if (lif->hwstamp_txq) { + ionic_qcq_free(lif, lif->hwstamp_txq); + devm_kfree(lif->ionic->dev, lif->hwstamp_txq); + lif->hwstamp_txq = NULL; + } + + if (lif->hwstamp_rxq) { + ionic_qcq_free(lif, lif->hwstamp_rxq); + devm_kfree(lif->ionic->dev, lif->hwstamp_rxq); + lif->hwstamp_rxq = NULL; + } } static int ionic_txrx_alloc(struct ionic_lif *lif) { - unsigned int sg_desc_sz; - unsigned int flags; - unsigned int i; + unsigned int comp_sz, desc_sz, num_desc, sg_desc_sz; + unsigned int flags, i; int err = 0; + num_desc = lif->ntxq_descs; + desc_sz = sizeof(struct ionic_txq_desc); + comp_sz = sizeof(struct ionic_txq_comp); + if (lif->qtype_info[IONIC_QTYPE_TXQ].version >= 1 && lif->qtype_info[IONIC_QTYPE_TXQ].sg_desc_sz == sizeof(struct ionic_txq_sg_desc_v1)) @@ -1714,10 +2035,7 @@ static int ionic_txrx_alloc(struct ionic_lif *lif) flags |= IONIC_QCQ_F_INTR; for (i = 0; i < lif->nxqs; i++) { err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags, - lif->ntxq_descs, - sizeof(struct ionic_txq_desc), - sizeof(struct ionic_txq_comp), - sg_desc_sz, + num_desc, desc_sz, comp_sz, sg_desc_sz, lif->kern_pid, &lif->txqcqs[i]); if (err) goto err_out; @@ -1734,16 +2052,24 @@ static int ionic_txrx_alloc(struct ionic_lif *lif) } flags = IONIC_QCQ_F_RX_STATS | IONIC_QCQ_F_SG | IONIC_QCQ_F_INTR; + + num_desc = lif->nrxq_descs; + desc_sz = sizeof(struct ionic_rxq_desc); + comp_sz = sizeof(struct ionic_rxq_comp); + sg_desc_sz = sizeof(struct ionic_rxq_sg_desc); + + if (lif->rxq_features & IONIC_Q_F_2X_CQ_DESC) + comp_sz *= 2; + for (i = 0; i < lif->nxqs; i++) { err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags, - lif->nrxq_descs, - sizeof(struct ionic_rxq_desc), - sizeof(struct ionic_rxq_comp), - sizeof(struct ionic_rxq_sg_desc), + num_desc, desc_sz, comp_sz, sg_desc_sz, lif->kern_pid, &lif->rxqcqs[i]); if (err) goto err_out; + lif->rxqcqs[i]->q.features = lif->rxq_features; + ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, lif->rxqcqs[i]->intr.index, lif->rx_coalesce_hw); @@ -1822,8 +2148,26 @@ static int ionic_txrx_enable(struct ionic_lif *lif) } } + if (lif->hwstamp_rxq) { + ionic_rx_fill(&lif->hwstamp_rxq->q); + err = ionic_qcq_enable(lif->hwstamp_rxq); + if (err) + goto err_out_hwstamp_rx; + } + + if (lif->hwstamp_txq) { + err = ionic_qcq_enable(lif->hwstamp_txq); + if (err) + goto err_out_hwstamp_tx; + } + return 0; +err_out_hwstamp_tx: + if (lif->hwstamp_rxq) + derr = ionic_qcq_disable(lif->hwstamp_rxq, (derr != -ETIMEDOUT)); +err_out_hwstamp_rx: + i = lif->nxqs; err_out: while (i--) { derr = ionic_qcq_disable(lif->txqcqs[i], (derr != -ETIMEDOUT)); @@ -1837,6 +2181,12 @@ static int ionic_start_queues(struct ionic_lif *lif) { int err; + if (test_bit(IONIC_LIF_F_BROKEN, lif->state)) + return -EIO; + + if (test_bit(IONIC_LIF_F_FW_RESET, lif->state)) + return -EBUSY; + if (test_and_set_bit(IONIC_LIF_F_UP, lif->state)) return 0; @@ -1855,13 +2205,17 @@ static int ionic_open(struct net_device *netdev) struct ionic_lif *lif = netdev_priv(netdev); int err; + /* If recovering from a broken state, clear the bit and we'll try again */ + if (test_and_clear_bit(IONIC_LIF_F_BROKEN, lif->state)) + netdev_info(netdev, "clearing broken state\n"); + err = ionic_txrx_alloc(lif); if (err) return err; err = ionic_txrx_init(lif); if (err) - goto err_out; + goto err_txrx_free; err = netif_set_real_num_tx_queues(netdev, lif->nxqs); if (err) @@ -1882,7 +2236,7 @@ static int ionic_open(struct net_device *netdev) err_txrx_deinit: ionic_txrx_deinit(lif); -err_out: +err_txrx_free: ionic_txrx_free(lif); return err; } @@ -1910,6 +2264,20 @@ static int ionic_stop(struct net_device *netdev) return 0; } +static int ionic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + struct ionic_lif *lif = netdev_priv(netdev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return ionic_lif_hwstamp_set(lif, ifr); + case SIOCGHWTSTAMP: + return ionic_lif_hwstamp_get(lif, ifr); + default: + return -EOPNOTSUPP; + } +} + static int ionic_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivf) { @@ -2158,6 +2526,7 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set) static const struct net_device_ops ionic_netdev_ops = { .ndo_open = ionic_open, .ndo_stop = ionic_stop, + .ndo_do_ioctl = ionic_do_ioctl, .ndo_start_xmit = ionic_start_xmit, .ndo_get_stats64 = ionic_get_stats64, .ndo_set_rx_mode = ionic_ndo_set_rx_mode, @@ -2181,7 +2550,9 @@ static const struct net_device_ops ionic_netdev_ops = { static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b) { /* only swapping the queues, not the napi, flags, or other stuff */ + swap(a->q.features, b->q.features); swap(a->q.num_descs, b->q.num_descs); + swap(a->q.desc_size, b->q.desc_size); swap(a->q.base, b->q.base); swap(a->q.base_pa, b->q.base_pa); swap(a->q.info, b->q.info); @@ -2189,6 +2560,7 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b) swap(a->q_base_pa, b->q_base_pa); swap(a->q_size, b->q_size); + swap(a->q.sg_desc_size, b->q.sg_desc_size); swap(a->q.sg_base, b->q.sg_base); swap(a->q.sg_base_pa, b->q.sg_base_pa); swap(a->sg_base, b->sg_base); @@ -2196,23 +2568,26 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b) swap(a->sg_size, b->sg_size); swap(a->cq.num_descs, b->cq.num_descs); + swap(a->cq.desc_size, b->cq.desc_size); swap(a->cq.base, b->cq.base); swap(a->cq.base_pa, b->cq.base_pa); swap(a->cq.info, b->cq.info); swap(a->cq_base, b->cq_base); swap(a->cq_base_pa, b->cq_base_pa); swap(a->cq_size, b->cq_size); + + ionic_debugfs_del_qcq(a); + ionic_debugfs_add_qcq(a->q.lif, a); } int ionic_reconfigure_queues(struct ionic_lif *lif, struct ionic_queue_params *qparam) { + unsigned int comp_sz, desc_sz, num_desc, sg_desc_sz; struct ionic_qcq **tx_qcqs = NULL; struct ionic_qcq **rx_qcqs = NULL; - unsigned int sg_desc_sz; - unsigned int flags; + unsigned int flags, i; int err = -ENOMEM; - unsigned int i; /* allocate temporary qcq arrays to hold new queue structs */ if (qparam->nxqs != lif->nxqs || qparam->ntxq_descs != lif->ntxq_descs) { @@ -2221,7 +2596,9 @@ int ionic_reconfigure_queues(struct ionic_lif *lif, if (!tx_qcqs) goto err_out; } - if (qparam->nxqs != lif->nxqs || qparam->nrxq_descs != lif->nrxq_descs) { + if (qparam->nxqs != lif->nxqs || + qparam->nrxq_descs != lif->nrxq_descs || + qparam->rxq_features != lif->rxq_features) { rx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->nrxqs_per_lif, sizeof(struct ionic_qcq *), GFP_KERNEL); if (!rx_qcqs) @@ -2231,21 +2608,22 @@ int ionic_reconfigure_queues(struct ionic_lif *lif, /* allocate new desc_info and rings, but leave the interrupt setup * until later so as to not mess with the still-running queues */ - if (lif->qtype_info[IONIC_QTYPE_TXQ].version >= 1 && - lif->qtype_info[IONIC_QTYPE_TXQ].sg_desc_sz == - sizeof(struct ionic_txq_sg_desc_v1)) - sg_desc_sz = sizeof(struct ionic_txq_sg_desc_v1); - else - sg_desc_sz = sizeof(struct ionic_txq_sg_desc); - if (tx_qcqs) { + num_desc = qparam->ntxq_descs; + desc_sz = sizeof(struct ionic_txq_desc); + comp_sz = sizeof(struct ionic_txq_comp); + + if (lif->qtype_info[IONIC_QTYPE_TXQ].version >= 1 && + lif->qtype_info[IONIC_QTYPE_TXQ].sg_desc_sz == + sizeof(struct ionic_txq_sg_desc_v1)) + sg_desc_sz = sizeof(struct ionic_txq_sg_desc_v1); + else + sg_desc_sz = sizeof(struct ionic_txq_sg_desc); + for (i = 0; i < qparam->nxqs; i++) { flags = lif->txqcqs[i]->flags & ~IONIC_QCQ_F_INTR; err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags, - qparam->ntxq_descs, - sizeof(struct ionic_txq_desc), - sizeof(struct ionic_txq_comp), - sg_desc_sz, + num_desc, desc_sz, comp_sz, sg_desc_sz, lif->kern_pid, &tx_qcqs[i]); if (err) goto err_out; @@ -2253,16 +2631,23 @@ int ionic_reconfigure_queues(struct ionic_lif *lif, } if (rx_qcqs) { + num_desc = qparam->nrxq_descs; + desc_sz = sizeof(struct ionic_rxq_desc); + comp_sz = sizeof(struct ionic_rxq_comp); + sg_desc_sz = sizeof(struct ionic_rxq_sg_desc); + + if (qparam->rxq_features & IONIC_Q_F_2X_CQ_DESC) + comp_sz *= 2; + for (i = 0; i < qparam->nxqs; i++) { flags = lif->rxqcqs[i]->flags & ~IONIC_QCQ_F_INTR; err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags, - qparam->nrxq_descs, - sizeof(struct ionic_rxq_desc), - sizeof(struct ionic_rxq_comp), - sizeof(struct ionic_rxq_sg_desc), + num_desc, desc_sz, comp_sz, sg_desc_sz, lif->kern_pid, &rx_qcqs[i]); if (err) goto err_out; + + rx_qcqs[i]->q.features = qparam->rxq_features; } } @@ -2349,9 +2734,10 @@ int ionic_reconfigure_queues(struct ionic_lif *lif, } swap(lif->nxqs, qparam->nxqs); + swap(lif->rxq_features, qparam->rxq_features); err_out_reinit_unlock: - /* re-init the queues, but don't loose an error code */ + /* re-init the queues, but don't lose an error code */ if (err) ionic_start_queues_reconfig(lif); else @@ -2450,7 +2836,6 @@ int ionic_lif_alloc(struct ionic *ionic) lif->index = 0; lif->ntxq_descs = IONIC_DEF_TXRX_DESC; lif->nrxq_descs = IONIC_DEF_TXRX_DESC; - lif->tx_budget = IONIC_TX_BUDGET_DEFAULT; /* Convert the default coalesce value to actual hw resolution */ lif->rx_coalesce_usecs = IONIC_ITR_COAL_USEC_DEFAULT; @@ -2501,6 +2886,8 @@ int ionic_lif_alloc(struct ionic *ionic) } netdev_rss_key_fill(lif->rss_hash_key, IONIC_RSS_HASH_KEY_SIZE); + ionic_lif_alloc_phc(lif); + return 0; err_out_free_qcqs: @@ -2601,10 +2988,13 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif) } clear_bit(IONIC_LIF_F_FW_RESET, lif->state); - ionic_link_status_check_request(lif, true); + ionic_link_status_check_request(lif, CAN_SLEEP); netif_device_attach(lif->netdev); dev_info(ionic->dev, "FW Up: LIFs restarted\n"); + /* restore the hardware timestamping queues */ + ionic_lif_hwstamp_replay(lif); + return; err_txrx_free: @@ -2621,6 +3011,8 @@ void ionic_lif_free(struct ionic_lif *lif) { struct device *dev = lif->ionic->dev; + ionic_lif_free_phc(lif); + /* free rss indirection table */ dma_free_coherent(dev, lif->rss_ind_tbl_sz, lif->rss_ind_tbl, lif->rss_ind_tbl_pa); @@ -2957,6 +3349,8 @@ int ionic_lif_register(struct ionic_lif *lif) { int err; + ionic_lif_register_phc(lif); + INIT_WORK(&lif->ionic->nb_work, ionic_lif_notify_work); lif->ionic->nb.notifier_call = ionic_lif_notify; @@ -2969,10 +3363,11 @@ int ionic_lif_register(struct ionic_lif *lif) err = register_netdev(lif->netdev); if (err) { dev_err(lif->ionic->dev, "Cannot register net device, aborting\n"); + ionic_lif_unregister_phc(lif); return err; } - ionic_link_status_check_request(lif, true); + ionic_link_status_check_request(lif, CAN_SLEEP); lif->registered = true; ionic_lif_set_netdev_info(lif); @@ -2989,6 +3384,9 @@ void ionic_lif_unregister(struct ionic_lif *lif) if (lif->netdev->reg_state == NETREG_REGISTERED) unregister_netdev(lif->netdev); + + ionic_lif_unregister_phc(lif); + lif->registered = false; } @@ -3128,6 +3526,16 @@ int ionic_lif_size(struct ionic *ionic) ntxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_TXQ]); nrxqs_per_lif = le32_to_cpu(lc->queue_count[IONIC_QTYPE_RXQ]); + /* reserve last queue id for hardware timestamping */ + if (lc->features & cpu_to_le64(IONIC_ETH_HW_TIMESTAMP)) { + if (ntxqs_per_lif <= 1 || nrxqs_per_lif <= 1) { + lc->features &= cpu_to_le64(~IONIC_ETH_HW_TIMESTAMP); + } else { + ntxqs_per_lif -= 1; + nrxqs_per_lif -= 1; + } + } + nxqs = min(ntxqs_per_lif, nrxqs_per_lif); nxqs = min(nxqs, num_online_cpus()); neqs = min(neqs_per_lif, num_online_cpus()); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 563dba384a53..346506f01715 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -4,6 +4,9 @@ #ifndef _IONIC_LIF_H_ #define _IONIC_LIF_H_ +#include <linux/ptp_clock_kernel.h> +#include <linux/timecounter.h> +#include <uapi/linux/net_tstamp.h> #include <linux/dim.h> #include <linux/pci.h> #include "ionic_rx_filter.h" @@ -36,6 +39,8 @@ struct ionic_tx_stats { u64 crc32_csum; u64 sg_cntr[IONIC_MAX_NUM_SG_CNTR]; u64 dma_map_err; + u64 hwstamp_valid; + u64 hwstamp_invalid; }; struct ionic_rx_stats { @@ -49,6 +54,8 @@ struct ionic_rx_stats { u64 csum_error; u64 dma_map_err; u64 alloc_err; + u64 hwstamp_valid; + u64 hwstamp_invalid; }; #define IONIC_QCQ_F_INITED BIT(0) @@ -125,6 +132,10 @@ struct ionic_lif_sw_stats { u64 rx_csum_none; u64 rx_csum_complete; u64 rx_csum_error; + u64 tx_hwstamp_valid; + u64 tx_hwstamp_invalid; + u64 rx_hwstamp_valid; + u64 rx_hwstamp_invalid; u64 hw_tx_dropped; u64 hw_rx_dropped; u64 hw_rx_over_errors; @@ -139,6 +150,7 @@ enum ionic_lif_state_flags { IONIC_LIF_F_LINK_CHECK_REQUESTED, IONIC_LIF_F_FW_RESET, IONIC_LIF_F_SPLIT_INTR, + IONIC_LIF_F_BROKEN, IONIC_LIF_F_TX_DIM_INTR, IONIC_LIF_F_RX_DIM_INTR, @@ -157,40 +169,45 @@ struct ionic_qtype_info { u16 sg_desc_stride; }; +struct ionic_phc; + #define IONIC_LIF_NAME_MAX_SZ 32 struct ionic_lif { - char name[IONIC_LIF_NAME_MAX_SZ]; - struct list_head list; struct net_device *netdev; DECLARE_BITMAP(state, IONIC_LIF_F_STATE_SIZE); struct ionic *ionic; - bool registered; unsigned int index; unsigned int hw_index; - unsigned int kern_pid; - u64 __iomem *kern_dbpage; struct mutex queue_lock; /* lock for queue structures */ spinlock_t adminq_lock; /* lock for AdminQ operations */ struct ionic_qcq *adminqcq; struct ionic_qcq *notifyqcq; struct ionic_qcq **txqcqs; + struct ionic_qcq *hwstamp_txq; struct ionic_tx_stats *txqstats; struct ionic_qcq **rxqcqs; + struct ionic_qcq *hwstamp_rxq; struct ionic_rx_stats *rxqstats; + struct ionic_deferred deferred; + struct work_struct tx_timeout_work; u64 last_eid; + unsigned int kern_pid; + u64 __iomem *kern_dbpage; unsigned int neqs; unsigned int nxqs; unsigned int ntxq_descs; unsigned int nrxq_descs; u32 rx_copybreak; - u32 tx_budget; + u64 rxq_features; unsigned int rx_mode; u64 hw_features; + bool registered; bool mc_overflow; - unsigned int nmcast; bool uc_overflow; u16 lif_type; + unsigned int nmcast; unsigned int nucast; + char name[IONIC_LIF_NAME_MAX_SZ]; union ionic_lif_identity *identity; struct ionic_lif_info *info; @@ -205,16 +222,34 @@ struct ionic_lif { u32 rss_ind_tbl_sz; struct ionic_rx_filters rx_filters; - struct ionic_deferred deferred; - unsigned long *dbid_inuse; - unsigned int dbid_count; - struct dentry *dentry; u32 rx_coalesce_usecs; /* what the user asked for */ u32 rx_coalesce_hw; /* what the hw is using */ u32 tx_coalesce_usecs; /* what the user asked for */ u32 tx_coalesce_hw; /* what the hw is using */ + unsigned long *dbid_inuse; + unsigned int dbid_count; - struct work_struct tx_timeout_work; + struct ionic_phc *phc; + + struct dentry *dentry; +}; + +struct ionic_phc { + spinlock_t lock; /* lock for cc and tc */ + struct cyclecounter cc; + struct timecounter tc; + + struct mutex config_lock; /* lock for ts_config */ + struct hwtstamp_config ts_config; + u64 ts_config_rx_filt; + u32 ts_config_tx_mode; + + u32 init_cc_mult; + long aux_work_delay; + + struct ptp_clock_info ptp_info; + struct ptp_clock *ptp; + struct ionic_lif *lif; }; struct ionic_queue_params { @@ -222,6 +257,7 @@ struct ionic_queue_params { unsigned int ntxq_descs; unsigned int nrxq_descs; unsigned int intr_split; + u64 rxq_features; }; static inline void ionic_init_queue_params(struct ionic_lif *lif, @@ -231,6 +267,7 @@ static inline void ionic_init_queue_params(struct ionic_lif *lif, qparam->ntxq_descs = lif->ntxq_descs; qparam->nrxq_descs = lif->nrxq_descs; qparam->intr_split = test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state); + qparam->rxq_features = lif->rxq_features; } static inline u32 ionic_coal_usec_to_hw(struct ionic *ionic, u32 usecs) @@ -263,6 +300,49 @@ void ionic_lif_unregister(struct ionic_lif *lif); int ionic_lif_identify(struct ionic *ionic, u8 lif_type, union ionic_lif_identity *lif_ident); int ionic_lif_size(struct ionic *ionic); + +#if IS_ENABLED(CONFIG_PTP_1588_CLOCK) +int ionic_lif_hwstamp_replay(struct ionic_lif *lif); +int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr); +int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr); +ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter); +void ionic_lif_register_phc(struct ionic_lif *lif); +void ionic_lif_unregister_phc(struct ionic_lif *lif); +void ionic_lif_alloc_phc(struct ionic_lif *lif); +void ionic_lif_free_phc(struct ionic_lif *lif); +#else +static inline int ionic_lif_hwstamp_replay(struct ionic_lif *lif) +{ + return -EOPNOTSUPP; +} + +static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) +{ + return -EOPNOTSUPP; +} + +static inline int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr) +{ + return -EOPNOTSUPP; +} + +static inline ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter) +{ + return ns_to_ktime(0); +} + +static inline void ionic_lif_register_phc(struct ionic_lif *lif) {} +static inline void ionic_lif_unregister_phc(struct ionic_lif *lif) {} +static inline void ionic_lif_alloc_phc(struct ionic_lif *lif) {} +static inline void ionic_lif_free_phc(struct ionic_lif *lif) {} +#endif + +int ionic_lif_create_hwstamp_txq(struct ionic_lif *lif); +int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif); +int ionic_lif_config_hwstamp_rxq_all(struct ionic_lif *lif, bool rx_all); +int ionic_lif_set_hwstamp_txmode(struct ionic_lif *lif, u16 txstamp_mode); +int ionic_lif_set_hwstamp_rxfilt(struct ionic_lif *lif, u64 pkt_class); + int ionic_lif_rss_config(struct ionic_lif *lif, u16 types, const u8 *key, const u32 *indir); int ionic_reconfigure_queues(struct ionic_lif *lif, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index fbc57de6683e..61cfe2120817 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -148,6 +148,8 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode) return "IONIC_CMD_LIF_SETATTR"; case IONIC_CMD_LIF_GETATTR: return "IONIC_CMD_LIF_GETATTR"; + case IONIC_CMD_LIF_SETPHC: + return "IONIC_CMD_LIF_SETPHC"; case IONIC_CMD_RX_MODE_SET: return "IONIC_CMD_RX_MODE_SET"; case IONIC_CMD_RX_FILTER_ADD: @@ -187,10 +189,17 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode) static void ionic_adminq_flush(struct ionic_lif *lif) { - struct ionic_queue *q = &lif->adminqcq->q; struct ionic_desc_info *desc_info; + unsigned long irqflags; + struct ionic_queue *q; - spin_lock(&lif->adminq_lock); + spin_lock_irqsave(&lif->adminq_lock, irqflags); + if (!lif->adminqcq) { + spin_unlock_irqrestore(&lif->adminq_lock, irqflags); + return; + } + + q = &lif->adminqcq->q; while (q->tail_idx != q->head_idx) { desc_info = &q->info[q->tail_idx]; @@ -199,7 +208,7 @@ static void ionic_adminq_flush(struct ionic_lif *lif) desc_info->cb_arg = NULL; q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); } - spin_unlock(&lif->adminq_lock); + spin_unlock_irqrestore(&lif->adminq_lock, irqflags); } static int ionic_adminq_check_err(struct ionic_lif *lif, @@ -234,35 +243,36 @@ static void ionic_adminq_cb(struct ionic_queue *q, { struct ionic_admin_ctx *ctx = cb_arg; struct ionic_admin_comp *comp; - struct device *dev; if (!ctx) return; comp = cq_info->cq_desc; - dev = &q->lif->netdev->dev; memcpy(&ctx->comp, comp, sizeof(*comp)); - dev_dbg(dev, "comp admin queue command:\n"); + dev_dbg(q->dev, "comp admin queue command:\n"); dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1, &ctx->comp, sizeof(ctx->comp), true); complete_all(&ctx->work); } -static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) +int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) { struct ionic_desc_info *desc_info; + unsigned long irqflags; struct ionic_queue *q; int err = 0; - if (!lif->adminqcq) + spin_lock_irqsave(&lif->adminq_lock, irqflags); + if (!lif->adminqcq) { + spin_unlock_irqrestore(&lif->adminq_lock, irqflags); return -EIO; + } q = &lif->adminqcq->q; - spin_lock(&lif->adminq_lock); if (!ionic_q_has_space(q, 1)) { err = -ENOSPC; goto err_out; @@ -282,19 +292,17 @@ static int ionic_adminq_post(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) ionic_q_post(q, true, ionic_adminq_cb, ctx); err_out: - spin_unlock(&lif->adminq_lock); + spin_unlock_irqrestore(&lif->adminq_lock, irqflags); return err; } -int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) +int ionic_adminq_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx, int err) { struct net_device *netdev = lif->netdev; unsigned long remaining; const char *name; - int err; - err = ionic_adminq_post(lif, ctx); if (err) { if (!test_bit(IONIC_LIF_F_FW_RESET, lif->state)) { name = ionic_opcode_to_str(ctx->cmd.cmd.opcode); @@ -309,6 +317,15 @@ int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) return ionic_adminq_check_err(lif, ctx, (remaining == 0)); } +int ionic_adminq_post_wait(struct ionic_lif *lif, struct ionic_admin_ctx *ctx) +{ + int err; + + err = ionic_adminq_post(lif, ctx); + + return ionic_adminq_wait(lif, ctx, err); +} + static void ionic_dev_cmd_clean(struct ionic *ionic) { union __iomem ionic_dev_cmd_regs *regs = ionic->idev.dev_cmd_regs; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c new file mode 100644 index 000000000000..a87c87e86aef --- /dev/null +++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c @@ -0,0 +1,615 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2021 Pensando Systems, Inc */ + +#include <linux/netdevice.h> +#include <linux/etherdevice.h> + +#include "ionic.h" +#include "ionic_bus.h" +#include "ionic_lif.h" +#include "ionic_ethtool.h" + +static int ionic_hwstamp_tx_mode(int config_tx_type) +{ + switch (config_tx_type) { + case HWTSTAMP_TX_OFF: + return IONIC_TXSTAMP_OFF; + case HWTSTAMP_TX_ON: + return IONIC_TXSTAMP_ON; + case HWTSTAMP_TX_ONESTEP_SYNC: + return IONIC_TXSTAMP_ONESTEP_SYNC; + case HWTSTAMP_TX_ONESTEP_P2P: + return IONIC_TXSTAMP_ONESTEP_P2P; + default: + return -ERANGE; + } +} + +static u64 ionic_hwstamp_rx_filt(int config_rx_filter) +{ + switch (config_rx_filter) { + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + return IONIC_PKT_CLS_PTP1_ALL; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + return IONIC_PKT_CLS_PTP1_SYNC; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + return IONIC_PKT_CLS_PTP1_SYNC | IONIC_PKT_CLS_PTP1_DREQ; + + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + return IONIC_PKT_CLS_PTP2_L4_ALL; + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + return IONIC_PKT_CLS_PTP2_L4_SYNC; + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + return IONIC_PKT_CLS_PTP2_L4_SYNC | IONIC_PKT_CLS_PTP2_L4_DREQ; + + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + return IONIC_PKT_CLS_PTP2_L2_ALL; + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + return IONIC_PKT_CLS_PTP2_L2_SYNC; + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + return IONIC_PKT_CLS_PTP2_L2_SYNC | IONIC_PKT_CLS_PTP2_L2_DREQ; + + case HWTSTAMP_FILTER_PTP_V2_EVENT: + return IONIC_PKT_CLS_PTP2_ALL; + case HWTSTAMP_FILTER_PTP_V2_SYNC: + return IONIC_PKT_CLS_PTP2_SYNC; + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + return IONIC_PKT_CLS_PTP2_SYNC | IONIC_PKT_CLS_PTP2_DREQ; + + case HWTSTAMP_FILTER_NTP_ALL: + return IONIC_PKT_CLS_NTP_ALL; + + default: + return 0; + } +} + +static int ionic_lif_hwstamp_set_ts_config(struct ionic_lif *lif, + struct hwtstamp_config *new_ts) +{ + struct ionic *ionic = lif->ionic; + struct hwtstamp_config *config; + struct hwtstamp_config ts; + int tx_mode = 0; + u64 rx_filt = 0; + int err, err2; + bool rx_all; + __le64 mask; + + if (!lif->phc || !lif->phc->ptp) + return -EOPNOTSUPP; + + mutex_lock(&lif->phc->config_lock); + + if (new_ts) { + config = new_ts; + } else { + /* If called with new_ts == NULL, replay the previous request + * primarily for recovery after a FW_RESET. + * We saved the previous configuration request info, so copy + * the previous request for reference, clear the current state + * to match the device's reset state, and run with it. + */ + config = &ts; + memcpy(config, &lif->phc->ts_config, sizeof(*config)); + memset(&lif->phc->ts_config, 0, sizeof(lif->phc->ts_config)); + lif->phc->ts_config_tx_mode = 0; + lif->phc->ts_config_rx_filt = 0; + } + + tx_mode = ionic_hwstamp_tx_mode(config->tx_type); + if (tx_mode < 0) { + err = tx_mode; + goto err_queues; + } + + mask = cpu_to_le64(BIT_ULL(tx_mode)); + if ((ionic->ident.lif.eth.hwstamp_tx_modes & mask) != mask) { + err = -ERANGE; + goto err_queues; + } + + rx_filt = ionic_hwstamp_rx_filt(config->rx_filter); + rx_all = config->rx_filter != HWTSTAMP_FILTER_NONE && !rx_filt; + + mask = cpu_to_le64(rx_filt); + if ((ionic->ident.lif.eth.hwstamp_rx_filters & mask) != mask) { + rx_filt = 0; + rx_all = true; + config->rx_filter = HWTSTAMP_FILTER_ALL; + } + + dev_dbg(ionic->dev, "config_rx_filter %d rx_filt %#llx rx_all %d\n", + config->rx_filter, rx_filt, rx_all); + + if (tx_mode) { + err = ionic_lif_create_hwstamp_txq(lif); + if (err) + goto err_queues; + } + + if (rx_filt) { + err = ionic_lif_create_hwstamp_rxq(lif); + if (err) + goto err_queues; + } + + if (tx_mode != lif->phc->ts_config_tx_mode) { + err = ionic_lif_set_hwstamp_txmode(lif, tx_mode); + if (err) + goto err_txmode; + } + + if (rx_filt != lif->phc->ts_config_rx_filt) { + err = ionic_lif_set_hwstamp_rxfilt(lif, rx_filt); + if (err) + goto err_rxfilt; + } + + if (rx_all != (lif->phc->ts_config.rx_filter == HWTSTAMP_FILTER_ALL)) { + err = ionic_lif_config_hwstamp_rxq_all(lif, rx_all); + if (err) + goto err_rxall; + } + + memcpy(&lif->phc->ts_config, config, sizeof(*config)); + lif->phc->ts_config_rx_filt = rx_filt; + lif->phc->ts_config_tx_mode = tx_mode; + + mutex_unlock(&lif->phc->config_lock); + + return 0; + +err_rxall: + if (rx_filt != lif->phc->ts_config_rx_filt) { + rx_filt = lif->phc->ts_config_rx_filt; + err2 = ionic_lif_set_hwstamp_rxfilt(lif, rx_filt); + if (err2) + dev_err(ionic->dev, + "Failed to revert rx timestamp filter: %d\n", err2); + } +err_rxfilt: + if (tx_mode != lif->phc->ts_config_tx_mode) { + tx_mode = lif->phc->ts_config_tx_mode; + err2 = ionic_lif_set_hwstamp_txmode(lif, tx_mode); + if (err2) + dev_err(ionic->dev, + "Failed to revert tx timestamp mode: %d\n", err2); + } +err_txmode: + /* special queues remain allocated, just unused */ +err_queues: + mutex_unlock(&lif->phc->config_lock); + return err; +} + +int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + err = ionic_lif_hwstamp_set_ts_config(lif, &config); + if (err) { + netdev_info(lif->netdev, "hwstamp set failed: %d\n", err); + return err; + } + + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + + return 0; +} + +int ionic_lif_hwstamp_replay(struct ionic_lif *lif) +{ + int err; + + err = ionic_lif_hwstamp_set_ts_config(lif, NULL); + if (err) + netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err); + + return err; +} + +int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr) +{ + struct hwtstamp_config config; + + if (!lif->phc || !lif->phc->ptp) + return -EOPNOTSUPP; + + mutex_lock(&lif->phc->config_lock); + memcpy(&config, &lif->phc->ts_config, sizeof(config)); + mutex_unlock(&lif->phc->config_lock); + + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + return 0; +} + +static u64 ionic_hwstamp_read(struct ionic *ionic, + struct ptp_system_timestamp *sts) +{ + u32 tick_high_before, tick_high, tick_low; + + /* read and discard low part to defeat hw staging of high part */ + (void)ioread32(&ionic->idev.hwstamp_regs->tick_low); + + tick_high_before = ioread32(&ionic->idev.hwstamp_regs->tick_high); + + ptp_read_system_prets(sts); + tick_low = ioread32(&ionic->idev.hwstamp_regs->tick_low); + ptp_read_system_postts(sts); + + tick_high = ioread32(&ionic->idev.hwstamp_regs->tick_high); + + /* If tick_high changed, re-read tick_low once more. Assume tick_high + * cannot change again so soon as in the span of re-reading tick_low. + */ + if (tick_high != tick_high_before) { + ptp_read_system_prets(sts); + tick_low = ioread32(&ionic->idev.hwstamp_regs->tick_low); + ptp_read_system_postts(sts); + } + + return (u64)tick_low | ((u64)tick_high << 32); +} + +static u64 ionic_cc_read(const struct cyclecounter *cc) +{ + struct ionic_phc *phc = container_of(cc, struct ionic_phc, cc); + struct ionic *ionic = phc->lif->ionic; + + return ionic_hwstamp_read(ionic, NULL); +} + +static int ionic_setphc_cmd(struct ionic_phc *phc, struct ionic_admin_ctx *ctx) +{ + ctx->work = COMPLETION_INITIALIZER_ONSTACK(ctx->work); + + ctx->cmd.lif_setphc.opcode = IONIC_CMD_LIF_SETPHC; + ctx->cmd.lif_setphc.lif_index = cpu_to_le16(phc->lif->index); + + ctx->cmd.lif_setphc.tick = cpu_to_le64(phc->tc.cycle_last); + ctx->cmd.lif_setphc.nsec = cpu_to_le64(phc->tc.nsec); + ctx->cmd.lif_setphc.frac = cpu_to_le64(phc->tc.frac); + ctx->cmd.lif_setphc.mult = cpu_to_le32(phc->cc.mult); + ctx->cmd.lif_setphc.shift = cpu_to_le32(phc->cc.shift); + + return ionic_adminq_post(phc->lif, ctx); +} + +static int ionic_phc_adjfine(struct ptp_clock_info *info, long scaled_ppm) +{ + struct ionic_phc *phc = container_of(info, struct ionic_phc, ptp_info); + struct ionic_admin_ctx ctx = {}; + unsigned long irqflags; + s64 adj; + int err; + + /* Reject phc adjustments during device upgrade */ + if (test_bit(IONIC_LIF_F_FW_RESET, phc->lif->state)) + return -EBUSY; + + /* Adjustment value scaled by 2^16 million */ + adj = (s64)scaled_ppm * phc->init_cc_mult; + + /* Adjustment value to scale */ + adj /= (s64)SCALED_PPM; + + /* Final adjusted multiplier */ + adj += phc->init_cc_mult; + + spin_lock_irqsave(&phc->lock, irqflags); + + /* update the point-in-time basis to now, before adjusting the rate */ + timecounter_read(&phc->tc); + phc->cc.mult = adj; + + /* Setphc commands are posted in-order, sequenced by phc->lock. We + * need to drop the lock before waiting for the command to complete. + */ + err = ionic_setphc_cmd(phc, &ctx); + + spin_unlock_irqrestore(&phc->lock, irqflags); + + return ionic_adminq_wait(phc->lif, &ctx, err); +} + +static int ionic_phc_adjtime(struct ptp_clock_info *info, s64 delta) +{ + struct ionic_phc *phc = container_of(info, struct ionic_phc, ptp_info); + struct ionic_admin_ctx ctx = {}; + unsigned long irqflags; + int err; + + /* Reject phc adjustments during device upgrade */ + if (test_bit(IONIC_LIF_F_FW_RESET, phc->lif->state)) + return -EBUSY; + + spin_lock_irqsave(&phc->lock, irqflags); + + timecounter_adjtime(&phc->tc, delta); + + /* Setphc commands are posted in-order, sequenced by phc->lock. We + * need to drop the lock before waiting for the command to complete. + */ + err = ionic_setphc_cmd(phc, &ctx); + + spin_unlock_irqrestore(&phc->lock, irqflags); + + return ionic_adminq_wait(phc->lif, &ctx, err); +} + +static int ionic_phc_settime64(struct ptp_clock_info *info, + const struct timespec64 *ts) +{ + struct ionic_phc *phc = container_of(info, struct ionic_phc, ptp_info); + struct ionic_admin_ctx ctx = {}; + unsigned long irqflags; + int err; + u64 ns; + + /* Reject phc adjustments during device upgrade */ + if (test_bit(IONIC_LIF_F_FW_RESET, phc->lif->state)) + return -EBUSY; + + ns = timespec64_to_ns(ts); + + spin_lock_irqsave(&phc->lock, irqflags); + + timecounter_init(&phc->tc, &phc->cc, ns); + + /* Setphc commands are posted in-order, sequenced by phc->lock. We + * need to drop the lock before waiting for the command to complete. + */ + err = ionic_setphc_cmd(phc, &ctx); + + spin_unlock_irqrestore(&phc->lock, irqflags); + + return ionic_adminq_wait(phc->lif, &ctx, err); +} + +static int ionic_phc_gettimex64(struct ptp_clock_info *info, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct ionic_phc *phc = container_of(info, struct ionic_phc, ptp_info); + struct ionic *ionic = phc->lif->ionic; + unsigned long irqflags; + u64 tick, ns; + + /* Do not attempt to read device time during upgrade */ + if (test_bit(IONIC_LIF_F_FW_RESET, phc->lif->state)) + return -EBUSY; + + spin_lock_irqsave(&phc->lock, irqflags); + + tick = ionic_hwstamp_read(ionic, sts); + + ns = timecounter_cyc2time(&phc->tc, tick); + + spin_unlock_irqrestore(&phc->lock, irqflags); + + *ts = ns_to_timespec64(ns); + + return 0; +} + +static long ionic_phc_aux_work(struct ptp_clock_info *info) +{ + struct ionic_phc *phc = container_of(info, struct ionic_phc, ptp_info); + struct ionic_admin_ctx ctx = {}; + unsigned long irqflags; + int err; + + /* Do not update phc during device upgrade, but keep polling to resume + * after upgrade. Since we don't update the point in time basis, there + * is no expectation that we are maintaining the phc time during the + * upgrade. After upgrade, it will need to be readjusted back to the + * correct time by the ptp daemon. + */ + if (test_bit(IONIC_LIF_F_FW_RESET, phc->lif->state)) + return phc->aux_work_delay; + + spin_lock_irqsave(&phc->lock, irqflags); + + /* update point-in-time basis to now */ + timecounter_read(&phc->tc); + + /* Setphc commands are posted in-order, sequenced by phc->lock. We + * need to drop the lock before waiting for the command to complete. + */ + err = ionic_setphc_cmd(phc, &ctx); + + spin_unlock_irqrestore(&phc->lock, irqflags); + + ionic_adminq_wait(phc->lif, &ctx, err); + + return phc->aux_work_delay; +} + +ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 tick) +{ + unsigned long irqflags; + u64 ns; + + if (!lif->phc) + return 0; + + spin_lock_irqsave(&lif->phc->lock, irqflags); + ns = timecounter_cyc2time(&lif->phc->tc, tick); + spin_unlock_irqrestore(&lif->phc->lock, irqflags); + + return ns_to_ktime(ns); +} + +static const struct ptp_clock_info ionic_ptp_info = { + .owner = THIS_MODULE, + .name = "ionic_ptp", + .adjfine = ionic_phc_adjfine, + .adjtime = ionic_phc_adjtime, + .gettimex64 = ionic_phc_gettimex64, + .settime64 = ionic_phc_settime64, + .do_aux_work = ionic_phc_aux_work, +}; + +void ionic_lif_register_phc(struct ionic_lif *lif) +{ + if (!lif->phc || !(lif->hw_features & IONIC_ETH_HW_TIMESTAMP)) + return; + + lif->phc->ptp = ptp_clock_register(&lif->phc->ptp_info, lif->ionic->dev); + + if (IS_ERR(lif->phc->ptp)) { + dev_warn(lif->ionic->dev, "Cannot register phc device: %ld\n", + PTR_ERR(lif->phc->ptp)); + + lif->phc->ptp = NULL; + } + + if (lif->phc->ptp) + ptp_schedule_worker(lif->phc->ptp, lif->phc->aux_work_delay); +} + +void ionic_lif_unregister_phc(struct ionic_lif *lif) +{ + if (!lif->phc || !lif->phc->ptp) + return; + + ptp_clock_unregister(lif->phc->ptp); + + lif->phc->ptp = NULL; +} + +void ionic_lif_alloc_phc(struct ionic_lif *lif) +{ + struct ionic *ionic = lif->ionic; + struct ionic_phc *phc; + u64 delay, diff, mult; + u64 frac = 0; + u64 features; + u32 shift; + + if (!ionic->idev.hwstamp_regs) + return; + + features = le64_to_cpu(ionic->ident.lif.eth.config.features); + if (!(features & IONIC_ETH_HW_TIMESTAMP)) + return; + + phc = devm_kzalloc(ionic->dev, sizeof(*phc), GFP_KERNEL); + if (!phc) + return; + + phc->lif = lif; + + phc->cc.read = ionic_cc_read; + phc->cc.mask = le64_to_cpu(ionic->ident.dev.hwstamp_mask); + phc->cc.mult = le32_to_cpu(ionic->ident.dev.hwstamp_mult); + phc->cc.shift = le32_to_cpu(ionic->ident.dev.hwstamp_shift); + + if (!phc->cc.mult) { + dev_err(lif->ionic->dev, + "Invalid device PHC mask multiplier %u, disabling HW timestamp support\n", + phc->cc.mult); + devm_kfree(lif->ionic->dev, phc); + lif->phc = NULL; + return; + } + + dev_dbg(lif->ionic->dev, "Device PHC mask %#llx mult %u shift %u\n", + phc->cc.mask, phc->cc.mult, phc->cc.shift); + + spin_lock_init(&phc->lock); + mutex_init(&phc->config_lock); + + /* max ticks is limited by the multiplier, or by the update period. */ + if (phc->cc.shift + 2 + ilog2(IONIC_PHC_UPDATE_NS) >= 64) { + /* max ticks that do not overflow when multiplied by max + * adjusted multiplier (twice the initial multiplier) + */ + diff = U64_MAX / phc->cc.mult / 2; + } else { + /* approx ticks at four times the update period */ + diff = (u64)IONIC_PHC_UPDATE_NS << (phc->cc.shift + 2); + diff = DIV_ROUND_UP(diff, phc->cc.mult); + } + + /* transform to bitmask */ + diff |= diff >> 1; + diff |= diff >> 2; + diff |= diff >> 4; + diff |= diff >> 8; + diff |= diff >> 16; + diff |= diff >> 32; + + /* constrain to the hardware bitmask, and use this as the bitmask */ + diff &= phc->cc.mask; + phc->cc.mask = diff; + + /* the wrap period is now defined by diff (or phc->cc.mask) + * + * we will update the time basis at about 1/4 the wrap period, so + * should not see a difference of more than +/- diff/4. + * + * this is sufficient not see a difference of more than +/- diff/2, as + * required by timecounter_cyc2time, to detect an old time stamp. + * + * adjust the initial multiplier, being careful to avoid overflow: + * - do not overflow 63 bits: init_cc_mult * SCALED_PPM + * - do not overflow 64 bits: max_mult * (diff / 2) + * + * we want to increase the initial multiplier as much as possible, to + * allow for more precise adjustment in ionic_phc_adjfine. + * + * only adjust the multiplier if we can double it or more. + */ + mult = U64_MAX / 2 / max(diff / 2, SCALED_PPM); + shift = mult / phc->cc.mult; + if (shift >= 2) { + /* initial multiplier will be 2^n of hardware cc.mult */ + shift = fls(shift); + /* increase cc.mult and cc.shift by the same 2^n and n. */ + phc->cc.mult <<= shift; + phc->cc.shift += shift; + } + + dev_dbg(lif->ionic->dev, "Initial PHC mask %#llx mult %u shift %u\n", + phc->cc.mask, phc->cc.mult, phc->cc.shift); + + /* frequency adjustments are relative to the initial multiplier */ + phc->init_cc_mult = phc->cc.mult; + + timecounter_init(&phc->tc, &phc->cc, ktime_get_real_ns()); + + /* Update cycle_last at 1/4 the wrap period, or IONIC_PHC_UPDATE_NS */ + delay = min_t(u64, IONIC_PHC_UPDATE_NS, + cyclecounter_cyc2ns(&phc->cc, diff / 4, 0, &frac)); + dev_dbg(lif->ionic->dev, "Work delay %llu ms\n", delay / NSEC_PER_MSEC); + + phc->aux_work_delay = nsecs_to_jiffies(delay); + + phc->ptp_info = ionic_ptp_info; + + /* We have allowed to adjust the multiplier up to +/- 1 part per 1. + * Here expressed as NORMAL_PPB (1 billion parts per billion). + */ + phc->ptp_info.max_adj = NORMAL_PPB; + + lif->phc = phc; +} + +void ionic_lif_free_phc(struct ionic_lif *lif) +{ + if (!lif->phc) + return; + + mutex_destroy(&lif->phc->config_lock); + + devm_kfree(lif->ionic->dev, lif->phc); + lif->phc = NULL; +} diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c index cd0076fc3044..d71316d9ded2 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.c @@ -140,6 +140,9 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index, case IONIC_RX_FILTER_MATCH_MAC_VLAN: key = le16_to_cpu(ac->mac_vlan.vlan); break; + case IONIC_RX_FILTER_STEER_PKTCLASS: + key = 0; + break; default: return -EINVAL; } @@ -210,3 +213,21 @@ struct ionic_rx_filter *ionic_rx_filter_by_addr(struct ionic_lif *lif, return NULL; } + +struct ionic_rx_filter *ionic_rx_filter_rxsteer(struct ionic_lif *lif) +{ + struct ionic_rx_filter *f; + struct hlist_head *head; + unsigned int key; + + key = hash_32(0, IONIC_RX_FILTER_HASH_BITS); + head = &lif->rx_filters.by_hash[key]; + + hlist_for_each_entry(f, head, by_hash) { + if (le16_to_cpu(f->cmd.match) != IONIC_RX_FILTER_STEER_PKTCLASS) + continue; + return f; + } + + return NULL; +} diff --git a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h index cf8f4c0a961c..1ead48be3c83 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_rx_filter.h @@ -31,5 +31,6 @@ int ionic_rx_filter_save(struct ionic_lif *lif, u32 flow_id, u16 rxq_index, u32 hash, struct ionic_admin_ctx *ctx); struct ionic_rx_filter *ionic_rx_filter_by_vlan(struct ionic_lif *lif, u16 vid); struct ionic_rx_filter *ionic_rx_filter_by_addr(struct ionic_lif *lif, const u8 *addr); +struct ionic_rx_filter *ionic_rx_filter_rxsteer(struct ionic_lif *lif); #endif /* _IONIC_RX_FILTER_H_ */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c index 6ae75b771a15..58a854666c62 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c @@ -130,6 +130,8 @@ static const struct ionic_stat_desc ionic_tx_stats_desc[] = { IONIC_TX_STAT_DESC(frags), IONIC_TX_STAT_DESC(tso), IONIC_TX_STAT_DESC(tso_bytes), + IONIC_TX_STAT_DESC(hwstamp_valid), + IONIC_TX_STAT_DESC(hwstamp_invalid), IONIC_TX_STAT_DESC(csum_none), IONIC_TX_STAT_DESC(csum), IONIC_TX_STAT_DESC(vlan_inserted), @@ -143,6 +145,8 @@ static const struct ionic_stat_desc ionic_rx_stats_desc[] = { IONIC_RX_STAT_DESC(csum_none), IONIC_RX_STAT_DESC(csum_complete), IONIC_RX_STAT_DESC(csum_error), + IONIC_RX_STAT_DESC(hwstamp_valid), + IONIC_RX_STAT_DESC(hwstamp_invalid), IONIC_RX_STAT_DESC(dropped), IONIC_RX_STAT_DESC(vlan_stripped), }; @@ -177,33 +181,54 @@ static const struct ionic_stat_desc ionic_dbg_napi_stats_desc[] = { #define MAX_Q(lif) ((lif)->netdev->real_num_tx_queues) +static void ionic_add_lif_txq_stats(struct ionic_lif *lif, int q_num, + struct ionic_lif_sw_stats *stats) +{ + struct ionic_tx_stats *txstats = &lif->txqstats[q_num]; + + stats->tx_packets += txstats->pkts; + stats->tx_bytes += txstats->bytes; + stats->tx_tso += txstats->tso; + stats->tx_tso_bytes += txstats->tso_bytes; + stats->tx_csum_none += txstats->csum_none; + stats->tx_csum += txstats->csum; + stats->tx_hwstamp_valid += txstats->hwstamp_valid; + stats->tx_hwstamp_invalid += txstats->hwstamp_invalid; +} + +static void ionic_add_lif_rxq_stats(struct ionic_lif *lif, int q_num, + struct ionic_lif_sw_stats *stats) +{ + struct ionic_rx_stats *rxstats = &lif->rxqstats[q_num]; + + stats->rx_packets += rxstats->pkts; + stats->rx_bytes += rxstats->bytes; + stats->rx_csum_none += rxstats->csum_none; + stats->rx_csum_complete += rxstats->csum_complete; + stats->rx_csum_error += rxstats->csum_error; + stats->rx_hwstamp_valid += rxstats->hwstamp_valid; + stats->rx_hwstamp_invalid += rxstats->hwstamp_invalid; +} + static void ionic_get_lif_stats(struct ionic_lif *lif, struct ionic_lif_sw_stats *stats) { - struct ionic_tx_stats *txstats; - struct ionic_rx_stats *rxstats; struct rtnl_link_stats64 ns; int q_num; memset(stats, 0, sizeof(*stats)); for (q_num = 0; q_num < MAX_Q(lif); q_num++) { - txstats = &lif->txqstats[q_num]; - stats->tx_packets += txstats->pkts; - stats->tx_bytes += txstats->bytes; - stats->tx_tso += txstats->tso; - stats->tx_tso_bytes += txstats->tso_bytes; - stats->tx_csum_none += txstats->csum_none; - stats->tx_csum += txstats->csum; - - rxstats = &lif->rxqstats[q_num]; - stats->rx_packets += rxstats->pkts; - stats->rx_bytes += rxstats->bytes; - stats->rx_csum_none += rxstats->csum_none; - stats->rx_csum_complete += rxstats->csum_complete; - stats->rx_csum_error += rxstats->csum_error; + ionic_add_lif_txq_stats(lif, q_num, stats); + ionic_add_lif_rxq_stats(lif, q_num, stats); } + if (lif->hwstamp_txq) + ionic_add_lif_txq_stats(lif, lif->hwstamp_txq->q.index, stats); + + if (lif->hwstamp_rxq) + ionic_add_lif_rxq_stats(lif, lif->hwstamp_rxq->q.index, stats); + ionic_get_stats64(lif->netdev, &ns); stats->hw_tx_dropped = ns.tx_dropped; stats->hw_rx_dropped = ns.rx_dropped; @@ -214,30 +239,30 @@ static void ionic_get_lif_stats(struct ionic_lif *lif, static u64 ionic_sw_stats_get_count(struct ionic_lif *lif) { - u64 total = 0; - - /* lif stats */ - total += IONIC_NUM_LIF_STATS; + u64 total = 0, tx_queues = MAX_Q(lif), rx_queues = MAX_Q(lif); - /* tx stats */ - total += MAX_Q(lif) * IONIC_NUM_TX_STATS; + if (lif->hwstamp_txq) + tx_queues += 1; - /* rx stats */ - total += MAX_Q(lif) * IONIC_NUM_RX_STATS; + if (lif->hwstamp_rxq) + rx_queues += 1; - /* port stats */ + total += IONIC_NUM_LIF_STATS; total += IONIC_NUM_PORT_STATS; + total += tx_queues * IONIC_NUM_TX_STATS; + total += rx_queues * IONIC_NUM_RX_STATS; + if (test_bit(IONIC_LIF_F_UP, lif->state) && test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) { /* tx debug stats */ - total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS + + total += tx_queues * (IONIC_NUM_DBG_CQ_STATS + IONIC_NUM_TX_Q_STATS + IONIC_NUM_DBG_INTR_STATS + IONIC_MAX_NUM_SG_CNTR); /* rx debug stats */ - total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS + + total += rx_queues * (IONIC_NUM_DBG_CQ_STATS + IONIC_NUM_DBG_INTR_STATS + IONIC_NUM_DBG_NAPI_STATS + IONIC_MAX_NUM_NAPI_CNTR); @@ -246,97 +271,167 @@ static u64 ionic_sw_stats_get_count(struct ionic_lif *lif) return total; } +static void ionic_sw_stats_get_tx_strings(struct ionic_lif *lif, u8 **buf, + int q_num) +{ + int i; + + for (i = 0; i < IONIC_NUM_TX_STATS; i++) + ethtool_sprintf(buf, "tx_%d_%s", q_num, + ionic_tx_stats_desc[i].name); + + if (!test_bit(IONIC_LIF_F_UP, lif->state) || + !test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) + return; + + for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) + ethtool_sprintf(buf, "txq_%d_%s", q_num, + ionic_txq_stats_desc[i].name); + for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) + ethtool_sprintf(buf, "txq_%d_cq_%s", q_num, + ionic_dbg_cq_stats_desc[i].name); + for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) + ethtool_sprintf(buf, "txq_%d_intr_%s", q_num, + ionic_dbg_intr_stats_desc[i].name); + for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) + ethtool_sprintf(buf, "txq_%d_sg_cntr_%d", q_num, i); +} + +static void ionic_sw_stats_get_rx_strings(struct ionic_lif *lif, u8 **buf, + int q_num) +{ + int i; + + for (i = 0; i < IONIC_NUM_RX_STATS; i++) + ethtool_sprintf(buf, "rx_%d_%s", q_num, + ionic_rx_stats_desc[i].name); + + if (!test_bit(IONIC_LIF_F_UP, lif->state) || + !test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) + return; + + for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) + ethtool_sprintf(buf, "rxq_%d_cq_%s", q_num, + ionic_dbg_cq_stats_desc[i].name); + for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) + ethtool_sprintf(buf, "rxq_%d_intr_%s", q_num, + ionic_dbg_intr_stats_desc[i].name); + for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) + ethtool_sprintf(buf, "rxq_%d_napi_%s", q_num, + ionic_dbg_napi_stats_desc[i].name); + for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) + ethtool_sprintf(buf, "rxq_%d_napi_work_done_%d", q_num, i); +} + static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf) { int i, q_num; - for (i = 0; i < IONIC_NUM_LIF_STATS; i++) { - snprintf(*buf, ETH_GSTRING_LEN, ionic_lif_stats_desc[i].name); - *buf += ETH_GSTRING_LEN; + for (i = 0; i < IONIC_NUM_LIF_STATS; i++) + ethtool_sprintf(buf, ionic_lif_stats_desc[i].name); + + for (i = 0; i < IONIC_NUM_PORT_STATS; i++) + ethtool_sprintf(buf, ionic_port_stats_desc[i].name); + + for (q_num = 0; q_num < MAX_Q(lif); q_num++) + ionic_sw_stats_get_tx_strings(lif, buf, q_num); + + if (lif->hwstamp_txq) + ionic_sw_stats_get_tx_strings(lif, buf, lif->hwstamp_txq->q.index); + + for (q_num = 0; q_num < MAX_Q(lif); q_num++) + ionic_sw_stats_get_rx_strings(lif, buf, q_num); + + if (lif->hwstamp_rxq) + ionic_sw_stats_get_rx_strings(lif, buf, lif->hwstamp_rxq->q.index); +} + +static void ionic_sw_stats_get_txq_values(struct ionic_lif *lif, u64 **buf, + int q_num) +{ + struct ionic_tx_stats *txstats; + struct ionic_qcq *txqcq; + int i; + + txstats = &lif->txqstats[q_num]; + + for (i = 0; i < IONIC_NUM_TX_STATS; i++) { + **buf = IONIC_READ_STAT64(txstats, &ionic_tx_stats_desc[i]); + (*buf)++; } - for (i = 0; i < IONIC_NUM_PORT_STATS; i++) { - snprintf(*buf, ETH_GSTRING_LEN, - ionic_port_stats_desc[i].name); - *buf += ETH_GSTRING_LEN; + if (!test_bit(IONIC_LIF_F_UP, lif->state) || + !test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) + return; + + txqcq = lif->txqcqs[q_num]; + for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) { + **buf = IONIC_READ_STAT64(&txqcq->q, + &ionic_txq_stats_desc[i]); + (*buf)++; } + for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { + **buf = IONIC_READ_STAT64(&txqcq->cq, + &ionic_dbg_cq_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { + **buf = IONIC_READ_STAT64(&txqcq->intr, + &ionic_dbg_intr_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) { + **buf = IONIC_READ_STAT64(&txqcq->napi_stats, + &ionic_dbg_napi_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) { + **buf = txqcq->napi_stats.work_done_cntr[i]; + (*buf)++; + } + for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) { + **buf = txstats->sg_cntr[i]; + (*buf)++; + } +} - for (q_num = 0; q_num < MAX_Q(lif); q_num++) { - for (i = 0; i < IONIC_NUM_TX_STATS; i++) { - snprintf(*buf, ETH_GSTRING_LEN, "tx_%d_%s", - q_num, ionic_tx_stats_desc[i].name); - *buf += ETH_GSTRING_LEN; - } - - if (test_bit(IONIC_LIF_F_UP, lif->state) && - test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) { - for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) { - snprintf(*buf, ETH_GSTRING_LEN, - "txq_%d_%s", - q_num, - ionic_txq_stats_desc[i].name); - *buf += ETH_GSTRING_LEN; - } - for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { - snprintf(*buf, ETH_GSTRING_LEN, - "txq_%d_cq_%s", - q_num, - ionic_dbg_cq_stats_desc[i].name); - *buf += ETH_GSTRING_LEN; - } - for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { - snprintf(*buf, ETH_GSTRING_LEN, - "txq_%d_intr_%s", - q_num, - ionic_dbg_intr_stats_desc[i].name); - *buf += ETH_GSTRING_LEN; - } - for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) { - snprintf(*buf, ETH_GSTRING_LEN, - "txq_%d_sg_cntr_%d", - q_num, i); - *buf += ETH_GSTRING_LEN; - } - } +static void ionic_sw_stats_get_rxq_values(struct ionic_lif *lif, u64 **buf, + int q_num) +{ + struct ionic_rx_stats *rxstats; + struct ionic_qcq *rxqcq; + int i; + + rxstats = &lif->rxqstats[q_num]; + + for (i = 0; i < IONIC_NUM_RX_STATS; i++) { + **buf = IONIC_READ_STAT64(rxstats, &ionic_rx_stats_desc[i]); + (*buf)++; } - for (q_num = 0; q_num < MAX_Q(lif); q_num++) { - for (i = 0; i < IONIC_NUM_RX_STATS; i++) { - snprintf(*buf, ETH_GSTRING_LEN, - "rx_%d_%s", - q_num, ionic_rx_stats_desc[i].name); - *buf += ETH_GSTRING_LEN; - } - - if (test_bit(IONIC_LIF_F_UP, lif->state) && - test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) { - for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { - snprintf(*buf, ETH_GSTRING_LEN, - "rxq_%d_cq_%s", - q_num, - ionic_dbg_cq_stats_desc[i].name); - *buf += ETH_GSTRING_LEN; - } - for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { - snprintf(*buf, ETH_GSTRING_LEN, - "rxq_%d_intr_%s", - q_num, - ionic_dbg_intr_stats_desc[i].name); - *buf += ETH_GSTRING_LEN; - } - for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) { - snprintf(*buf, ETH_GSTRING_LEN, - "rxq_%d_napi_%s", - q_num, - ionic_dbg_napi_stats_desc[i].name); - *buf += ETH_GSTRING_LEN; - } - for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) { - snprintf(*buf, ETH_GSTRING_LEN, - "rxq_%d_napi_work_done_%d", - q_num, i); - *buf += ETH_GSTRING_LEN; - } - } + + if (!test_bit(IONIC_LIF_F_UP, lif->state) || + !test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) + return; + + rxqcq = lif->rxqcqs[q_num]; + for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { + **buf = IONIC_READ_STAT64(&rxqcq->cq, + &ionic_dbg_cq_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { + **buf = IONIC_READ_STAT64(&rxqcq->intr, + &ionic_dbg_intr_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) { + **buf = IONIC_READ_STAT64(&rxqcq->napi_stats, + &ionic_dbg_napi_stats_desc[i]); + (*buf)++; + } + for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) { + **buf = rxqcq->napi_stats.work_done_cntr[i]; + (*buf)++; } } @@ -344,9 +439,6 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf) { struct ionic_port_stats *port_stats; struct ionic_lif_sw_stats lif_stats; - struct ionic_qcq *txqcq, *rxqcq; - struct ionic_tx_stats *txstats; - struct ionic_rx_stats *rxstats; int i, q_num; ionic_get_lif_stats(lif, &lif_stats); @@ -363,73 +455,17 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf) (*buf)++; } - for (q_num = 0; q_num < MAX_Q(lif); q_num++) { - txstats = &lif->txqstats[q_num]; - - for (i = 0; i < IONIC_NUM_TX_STATS; i++) { - **buf = IONIC_READ_STAT64(txstats, - &ionic_tx_stats_desc[i]); - (*buf)++; - } - - if (test_bit(IONIC_LIF_F_UP, lif->state) && - test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) { - txqcq = lif->txqcqs[q_num]; - for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) { - **buf = IONIC_READ_STAT64(&txqcq->q, - &ionic_txq_stats_desc[i]); - (*buf)++; - } - for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { - **buf = IONIC_READ_STAT64(&txqcq->cq, - &ionic_dbg_cq_stats_desc[i]); - (*buf)++; - } - for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { - **buf = IONIC_READ_STAT64(&txqcq->intr, - &ionic_dbg_intr_stats_desc[i]); - (*buf)++; - } - for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) { - **buf = txstats->sg_cntr[i]; - (*buf)++; - } - } - } + for (q_num = 0; q_num < MAX_Q(lif); q_num++) + ionic_sw_stats_get_txq_values(lif, buf, q_num); - for (q_num = 0; q_num < MAX_Q(lif); q_num++) { - rxstats = &lif->rxqstats[q_num]; - - for (i = 0; i < IONIC_NUM_RX_STATS; i++) { - **buf = IONIC_READ_STAT64(rxstats, - &ionic_rx_stats_desc[i]); - (*buf)++; - } - - if (test_bit(IONIC_LIF_F_UP, lif->state) && - test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) { - rxqcq = lif->rxqcqs[q_num]; - for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) { - **buf = IONIC_READ_STAT64(&rxqcq->cq, - &ionic_dbg_cq_stats_desc[i]); - (*buf)++; - } - for (i = 0; i < IONIC_NUM_DBG_INTR_STATS; i++) { - **buf = IONIC_READ_STAT64(&rxqcq->intr, - &ionic_dbg_intr_stats_desc[i]); - (*buf)++; - } - for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) { - **buf = IONIC_READ_STAT64(&rxqcq->napi_stats, - &ionic_dbg_napi_stats_desc[i]); - (*buf)++; - } - for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) { - **buf = rxqcq->napi_stats.work_done_cntr[i]; - (*buf)++; - } - } - } + if (lif->hwstamp_txq) + ionic_sw_stats_get_txq_values(lif, buf, lif->hwstamp_txq->q.index); + + for (q_num = 0; q_num < MAX_Q(lif); q_num++) + ionic_sw_stats_get_rxq_values(lif, buf, q_num); + + if (lif->hwstamp_rxq) + ionic_sw_stats_get_rxq_values(lif, buf, lif->hwstamp_rxq->q.index); } const struct ionic_stats_group_intf ionic_stats_groups[] = { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 162a1ff1e9d2..08934888575c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -10,14 +10,6 @@ #include "ionic_lif.h" #include "ionic_txrx.h" -static void ionic_rx_clean(struct ionic_queue *q, - struct ionic_desc_info *desc_info, - struct ionic_cq_info *cq_info, - void *cb_arg); - -static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info); - -static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info); static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell, ionic_desc_cb cb_func, void *cb_arg) @@ -40,72 +32,149 @@ static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q) return netdev_get_tx_queue(q->lif->netdev, q->index); } -static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q, - unsigned int len, bool frags) +static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info) +{ + buf_info->page = NULL; + buf_info->page_offset = 0; + buf_info->dma_addr = 0; +} + +static int ionic_rx_page_alloc(struct ionic_queue *q, + struct ionic_buf_info *buf_info) { - struct ionic_lif *lif = q->lif; + struct net_device *netdev = q->lif->netdev; struct ionic_rx_stats *stats; - struct net_device *netdev; - struct sk_buff *skb; + struct device *dev; - netdev = lif->netdev; - stats = &q->lif->rxqstats[q->index]; + dev = q->dev; + stats = q_to_rx_stats(q); - if (frags) - skb = napi_get_frags(&q_to_qcq(q)->napi); - else - skb = netdev_alloc_skb_ip_align(netdev, len); + if (unlikely(!buf_info)) { + net_err_ratelimited("%s: %s invalid buf_info in alloc\n", + netdev->name, q->name); + return -EINVAL; + } - if (unlikely(!skb)) { - net_warn_ratelimited("%s: SKB alloc failed on %s!\n", - netdev->name, q->name); + buf_info->page = alloc_pages(IONIC_PAGE_GFP_MASK, 0); + if (unlikely(!buf_info->page)) { + net_err_ratelimited("%s: %s page alloc failed\n", + netdev->name, q->name); stats->alloc_err++; - return NULL; + return -ENOMEM; } + buf_info->page_offset = 0; - return skb; + buf_info->dma_addr = dma_map_page(dev, buf_info->page, buf_info->page_offset, + IONIC_PAGE_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) { + __free_pages(buf_info->page, 0); + ionic_rx_buf_reset(buf_info); + net_err_ratelimited("%s: %s dma map failed\n", + netdev->name, q->name); + stats->dma_map_err++; + return -EIO; + } + + return 0; +} + +static void ionic_rx_page_free(struct ionic_queue *q, + struct ionic_buf_info *buf_info) +{ + struct net_device *netdev = q->lif->netdev; + struct device *dev = q->dev; + + if (unlikely(!buf_info)) { + net_err_ratelimited("%s: %s invalid buf_info in free\n", + netdev->name, q->name); + return; + } + + if (!buf_info->page) + return; + + dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE); + __free_pages(buf_info->page, 0); + ionic_rx_buf_reset(buf_info); +} + +static bool ionic_rx_buf_recycle(struct ionic_queue *q, + struct ionic_buf_info *buf_info, u32 used) +{ + u32 size; + + /* don't re-use pages allocated in low-mem condition */ + if (page_is_pfmemalloc(buf_info->page)) + return false; + + /* don't re-use buffers from non-local numa nodes */ + if (page_to_nid(buf_info->page) != numa_mem_id()) + return false; + + size = ALIGN(used, IONIC_PAGE_SPLIT_SZ); + buf_info->page_offset += size; + if (buf_info->page_offset >= IONIC_PAGE_SIZE) + return false; + + get_page(buf_info->page); + + return true; } static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, struct ionic_desc_info *desc_info, - struct ionic_cq_info *cq_info) + struct ionic_rxq_comp *comp) { - struct ionic_rxq_comp *comp = cq_info->cq_desc; - struct device *dev = q->lif->ionic->dev; - struct ionic_page_info *page_info; + struct net_device *netdev = q->lif->netdev; + struct ionic_buf_info *buf_info; + struct ionic_rx_stats *stats; + struct device *dev = q->dev; struct sk_buff *skb; unsigned int i; u16 frag_len; u16 len; - page_info = &desc_info->pages[0]; + stats = q_to_rx_stats(q); + + buf_info = &desc_info->bufs[0]; len = le16_to_cpu(comp->len); - prefetch(page_address(page_info->page) + NET_IP_ALIGN); + prefetch(buf_info->page); - skb = ionic_rx_skb_alloc(q, len, true); - if (unlikely(!skb)) + skb = napi_get_frags(&q_to_qcq(q)->napi); + if (unlikely(!skb)) { + net_warn_ratelimited("%s: SKB alloc failed on %s!\n", + netdev->name, q->name); + stats->alloc_err++; return NULL; + } i = comp->num_sg_elems + 1; do { - if (unlikely(!page_info->page)) { - struct napi_struct *napi = &q_to_qcq(q)->napi; - - napi->skb = NULL; + if (unlikely(!buf_info->page)) { dev_kfree_skb(skb); return NULL; } - frag_len = min(len, (u16)PAGE_SIZE); + frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset); len -= frag_len; - dma_unmap_page(dev, dma_unmap_addr(page_info, dma_addr), - PAGE_SIZE, DMA_FROM_DEVICE); + dma_sync_single_for_cpu(dev, + buf_info->dma_addr + buf_info->page_offset, + frag_len, DMA_FROM_DEVICE); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - page_info->page, 0, frag_len, PAGE_SIZE); - page_info->page = NULL; - page_info++; + buf_info->page, buf_info->page_offset, frag_len, + IONIC_PAGE_SIZE); + + if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) { + dma_unmap_page(dev, buf_info->dma_addr, + IONIC_PAGE_SIZE, DMA_FROM_DEVICE); + ionic_rx_buf_reset(buf_info); + } + + buf_info++; + i--; } while (i > 0); @@ -114,30 +183,37 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q, struct ionic_desc_info *desc_info, - struct ionic_cq_info *cq_info) + struct ionic_rxq_comp *comp) { - struct ionic_rxq_comp *comp = cq_info->cq_desc; - struct device *dev = q->lif->ionic->dev; - struct ionic_page_info *page_info; + struct net_device *netdev = q->lif->netdev; + struct ionic_buf_info *buf_info; + struct ionic_rx_stats *stats; + struct device *dev = q->dev; struct sk_buff *skb; u16 len; - page_info = &desc_info->pages[0]; + stats = q_to_rx_stats(q); + + buf_info = &desc_info->bufs[0]; len = le16_to_cpu(comp->len); - skb = ionic_rx_skb_alloc(q, len, false); - if (unlikely(!skb)) + skb = napi_alloc_skb(&q_to_qcq(q)->napi, len); + if (unlikely(!skb)) { + net_warn_ratelimited("%s: SKB alloc failed on %s!\n", + netdev->name, q->name); + stats->alloc_err++; return NULL; + } - if (unlikely(!page_info->page)) { + if (unlikely(!buf_info->page)) { dev_kfree_skb(skb); return NULL; } - dma_sync_single_for_cpu(dev, dma_unmap_addr(page_info, dma_addr), + dma_sync_single_for_cpu(dev, buf_info->dma_addr + buf_info->page_offset, len, DMA_FROM_DEVICE); - skb_copy_to_linear_data(skb, page_address(page_info->page), len); - dma_sync_single_for_device(dev, dma_unmap_addr(page_info, dma_addr), + skb_copy_to_linear_data(skb, page_address(buf_info->page) + buf_info->page_offset, len); + dma_sync_single_for_device(dev, buf_info->dma_addr + buf_info->page_offset, len, DMA_FROM_DEVICE); skb_put(skb, len); @@ -151,14 +227,15 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_cq_info *cq_info, void *cb_arg) { - struct ionic_rxq_comp *comp = cq_info->cq_desc; + struct net_device *netdev = q->lif->netdev; struct ionic_qcq *qcq = q_to_qcq(q); struct ionic_rx_stats *stats; - struct net_device *netdev; + struct ionic_rxq_comp *comp; struct sk_buff *skb; + comp = cq_info->cq_desc + qcq->cq.desc_size - sizeof(*comp); + stats = q_to_rx_stats(q); - netdev = q->lif->netdev; if (comp->status) { stats->dropped++; @@ -169,9 +246,9 @@ static void ionic_rx_clean(struct ionic_queue *q, stats->bytes += le16_to_cpu(comp->len); if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak) - skb = ionic_rx_copybreak(q, desc_info, cq_info); + skb = ionic_rx_copybreak(q, desc_info, comp); else - skb = ionic_rx_frags(q, desc_info, cq_info); + skb = ionic_rx_frags(q, desc_info, comp); if (unlikely(!skb)) { stats->dropped++; @@ -219,17 +296,39 @@ static void ionic_rx_clean(struct ionic_queue *q, stats->vlan_stripped++; } + if (unlikely(q->features & IONIC_RXQ_F_HWSTAMP)) { + __le64 *cq_desc_hwstamp; + u64 hwstamp; + + cq_desc_hwstamp = + cq_info->cq_desc + + qcq->cq.desc_size - + sizeof(struct ionic_rxq_comp) - + IONIC_HWSTAMP_CQ_NEGOFFSET; + + hwstamp = le64_to_cpu(*cq_desc_hwstamp); + + if (hwstamp != IONIC_HWSTAMP_INVALID) { + skb_hwtstamps(skb)->hwtstamp = ionic_lif_phc_ktime(q->lif, hwstamp); + stats->hwstamp_valid++; + } else { + stats->hwstamp_invalid++; + } + } + if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak) napi_gro_receive(&qcq->napi, skb); else napi_gro_frags(&qcq->napi); } -static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) +bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) { - struct ionic_rxq_comp *comp = cq_info->cq_desc; struct ionic_queue *q = cq->bound_q; struct ionic_desc_info *desc_info; + struct ionic_rxq_comp *comp; + + comp = cq_info->cq_desc + cq->desc_size - sizeof(*comp); if (!color_match(comp->pkt_type_color, cq->done_color)) return false; @@ -253,138 +352,75 @@ static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) return true; } -static int ionic_rx_page_alloc(struct ionic_queue *q, - struct ionic_page_info *page_info) -{ - struct ionic_lif *lif = q->lif; - struct ionic_rx_stats *stats; - struct net_device *netdev; - struct device *dev; - - netdev = lif->netdev; - dev = lif->ionic->dev; - stats = q_to_rx_stats(q); - - if (unlikely(!page_info)) { - net_err_ratelimited("%s: %s invalid page_info in alloc\n", - netdev->name, q->name); - return -EINVAL; - } - - page_info->page = dev_alloc_page(); - if (unlikely(!page_info->page)) { - net_err_ratelimited("%s: %s page alloc failed\n", - netdev->name, q->name); - stats->alloc_err++; - return -ENOMEM; - } - - page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) { - put_page(page_info->page); - page_info->dma_addr = 0; - page_info->page = NULL; - net_err_ratelimited("%s: %s dma map failed\n", - netdev->name, q->name); - stats->dma_map_err++; - return -EIO; - } - - return 0; -} - -static void ionic_rx_page_free(struct ionic_queue *q, - struct ionic_page_info *page_info) -{ - struct ionic_lif *lif = q->lif; - struct net_device *netdev; - struct device *dev; - - netdev = lif->netdev; - dev = lif->ionic->dev; - - if (unlikely(!page_info)) { - net_err_ratelimited("%s: %s invalid page_info in free\n", - netdev->name, q->name); - return; - } - - if (unlikely(!page_info->page)) { - net_err_ratelimited("%s: %s invalid page in free\n", - netdev->name, q->name); - return; - } - - dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE); - - put_page(page_info->page); - page_info->dma_addr = 0; - page_info->page = NULL; -} - void ionic_rx_fill(struct ionic_queue *q) { struct net_device *netdev = q->lif->netdev; struct ionic_desc_info *desc_info; - struct ionic_page_info *page_info; struct ionic_rxq_sg_desc *sg_desc; struct ionic_rxq_sg_elem *sg_elem; + struct ionic_buf_info *buf_info; struct ionic_rxq_desc *desc; unsigned int remain_len; - unsigned int seg_len; + unsigned int frag_len; unsigned int nfrags; unsigned int i, j; unsigned int len; len = netdev->mtu + ETH_HLEN + VLAN_HLEN; - nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE; for (i = ionic_q_space_avail(q); i; i--) { + nfrags = 0; remain_len = len; desc_info = &q->info[q->head_idx]; desc = desc_info->desc; - sg_desc = desc_info->sg_desc; - page_info = &desc_info->pages[0]; - - if (page_info->page) { /* recycle the buffer */ - ionic_rxq_post(q, false, ionic_rx_clean, NULL); - continue; - } + buf_info = &desc_info->bufs[0]; - /* fill main descriptor - pages[0] */ - desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG : - IONIC_RXQ_DESC_OPCODE_SIMPLE; - desc_info->npages = nfrags; - if (unlikely(ionic_rx_page_alloc(q, page_info))) { - desc->addr = 0; - desc->len = 0; - return; + if (!buf_info->page) { /* alloc a new buffer? */ + if (unlikely(ionic_rx_page_alloc(q, buf_info))) { + desc->addr = 0; + desc->len = 0; + return; + } } - desc->addr = cpu_to_le64(page_info->dma_addr); - seg_len = min_t(unsigned int, PAGE_SIZE, len); - desc->len = cpu_to_le16(seg_len); - remain_len -= seg_len; - page_info++; - /* fill sg descriptors - pages[1..n] */ - for (j = 0; j < nfrags - 1; j++) { - if (page_info->page) /* recycle the sg buffer */ - continue; + /* fill main descriptor - buf[0] */ + desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset); + frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset); + desc->len = cpu_to_le16(frag_len); + remain_len -= frag_len; + buf_info++; + nfrags++; + /* fill sg descriptors - buf[1..n] */ + sg_desc = desc_info->sg_desc; + for (j = 0; remain_len > 0 && j < q->max_sg_elems; j++) { sg_elem = &sg_desc->elems[j]; - if (unlikely(ionic_rx_page_alloc(q, page_info))) { - sg_elem->addr = 0; - sg_elem->len = 0; - return; + if (!buf_info->page) { /* alloc a new sg buffer? */ + if (unlikely(ionic_rx_page_alloc(q, buf_info))) { + sg_elem->addr = 0; + sg_elem->len = 0; + return; + } } - sg_elem->addr = cpu_to_le64(page_info->dma_addr); - seg_len = min_t(unsigned int, PAGE_SIZE, remain_len); - sg_elem->len = cpu_to_le16(seg_len); - remain_len -= seg_len; - page_info++; + + sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset); + frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset); + sg_elem->len = cpu_to_le16(frag_len); + remain_len -= frag_len; + buf_info++; + nfrags++; } + /* clear end sg element as a sentinel */ + if (j < q->max_sg_elems) { + sg_elem = &sg_desc->elems[j]; + memset(sg_elem, 0, sizeof(*sg_elem)); + } + + desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG : + IONIC_RXQ_DESC_OPCODE_SIMPLE; + desc_info->nbufs = nfrags; + ionic_rxq_post(q, false, ionic_rx_clean, NULL); } @@ -395,21 +431,24 @@ void ionic_rx_fill(struct ionic_queue *q) void ionic_rx_empty(struct ionic_queue *q) { struct ionic_desc_info *desc_info; - struct ionic_page_info *page_info; + struct ionic_buf_info *buf_info; unsigned int i, j; for (i = 0; i < q->num_descs; i++) { desc_info = &q->info[i]; for (j = 0; j < IONIC_RX_MAX_SG_ELEMS + 1; j++) { - page_info = &desc_info->pages[j]; - if (page_info->page) - ionic_rx_page_free(q, page_info); + buf_info = &desc_info->bufs[j]; + if (buf_info->page) + ionic_rx_page_free(q, buf_info); } - desc_info->npages = 0; + desc_info->nbufs = 0; desc_info->cb = NULL; desc_info->cb_arg = NULL; } + + q->head_idx = 0; + q->tail_idx = 0; } static void ionic_dim_update(struct ionic_qcq *qcq) @@ -525,7 +564,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) idev = &lif->ionic->idev; txcq = &lif->txqcqs[qi]->cq; - tx_work_done = ionic_cq_service(txcq, lif->tx_budget, + tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT, ionic_tx_service, NULL, NULL); rx_work_done = ionic_cq_service(rxcq, budget, @@ -558,7 +597,7 @@ static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, void *data, size_t len) { struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct device *dev = q->lif->ionic->dev; + struct device *dev = q->dev; dma_addr_t dma_addr; dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE); @@ -576,7 +615,7 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q, size_t offset, size_t len) { struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct device *dev = q->lif->ionic->dev; + struct device *dev = q->dev; dma_addr_t dma_addr; dma_addr = skb_frag_dma_map(dev, frag, offset, len, DMA_TO_DEVICE); @@ -588,62 +627,130 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q, return dma_addr; } +static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) +{ + struct ionic_buf_info *buf_info = desc_info->bufs; + struct ionic_tx_stats *stats = q_to_tx_stats(q); + struct device *dev = q->dev; + dma_addr_t dma_addr; + unsigned int nfrags; + skb_frag_t *frag; + int frag_idx; + + dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb)); + if (dma_mapping_error(dev, dma_addr)) { + stats->dma_map_err++; + return -EIO; + } + buf_info->dma_addr = dma_addr; + buf_info->len = skb_headlen(skb); + buf_info++; + + frag = skb_shinfo(skb)->frags; + nfrags = skb_shinfo(skb)->nr_frags; + for (frag_idx = 0; frag_idx < nfrags; frag_idx++, frag++) { + dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag)); + if (dma_mapping_error(dev, dma_addr)) { + stats->dma_map_err++; + goto dma_fail; + } + buf_info->dma_addr = dma_addr; + buf_info->len = skb_frag_size(frag); + buf_info++; + } + + desc_info->nbufs = 1 + nfrags; + + return 0; + +dma_fail: + /* unwind the frag mappings and the head mapping */ + while (frag_idx > 0) { + frag_idx--; + buf_info--; + dma_unmap_page(dev, buf_info->dma_addr, + buf_info->len, DMA_TO_DEVICE); + } + dma_unmap_single(dev, buf_info->dma_addr, buf_info->len, DMA_TO_DEVICE); + return -EIO; +} + static void ionic_tx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info, struct ionic_cq_info *cq_info, void *cb_arg) { - struct ionic_txq_sg_desc *sg_desc = desc_info->sg_desc; - struct ionic_txq_sg_elem *elem = sg_desc->elems; + struct ionic_buf_info *buf_info = desc_info->bufs; struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct ionic_txq_desc *desc = desc_info->desc; - struct device *dev = q->lif->ionic->dev; - u8 opcode, flags, nsge; - u16 queue_index; + struct ionic_qcq *qcq = q_to_qcq(q); + struct sk_buff *skb = cb_arg; + struct device *dev = q->dev; unsigned int i; - u64 addr; + u16 qi; + + if (desc_info->nbufs) { + dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr, + buf_info->len, DMA_TO_DEVICE); + buf_info++; + for (i = 1; i < desc_info->nbufs; i++, buf_info++) + dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr, + buf_info->len, DMA_TO_DEVICE); + } - decode_txq_desc_cmd(le64_to_cpu(desc->cmd), - &opcode, &flags, &nsge, &addr); + if (!skb) + return; - /* use unmap_single only if either this is not TSO, - * or this is first descriptor of a TSO - */ - if (opcode != IONIC_TXQ_DESC_OPCODE_TSO || - flags & IONIC_TXQ_DESC_FLAG_TSO_SOT) - dma_unmap_single(dev, (dma_addr_t)addr, - le16_to_cpu(desc->len), DMA_TO_DEVICE); - else - dma_unmap_page(dev, (dma_addr_t)addr, - le16_to_cpu(desc->len), DMA_TO_DEVICE); - - for (i = 0; i < nsge; i++, elem++) - dma_unmap_page(dev, (dma_addr_t)le64_to_cpu(elem->addr), - le16_to_cpu(elem->len), DMA_TO_DEVICE); - - if (cb_arg) { - struct sk_buff *skb = cb_arg; - u32 len = skb->len; - - queue_index = skb_get_queue_mapping(skb); - if (unlikely(__netif_subqueue_stopped(q->lif->netdev, - queue_index))) { - netif_wake_subqueue(q->lif->netdev, queue_index); - q->wake++; + qi = skb_get_queue_mapping(skb); + + if (unlikely(q->features & IONIC_TXQ_F_HWSTAMP)) { + if (cq_info) { + struct skb_shared_hwtstamps hwts = {}; + __le64 *cq_desc_hwstamp; + u64 hwstamp; + + cq_desc_hwstamp = + cq_info->cq_desc + + qcq->cq.desc_size - + sizeof(struct ionic_txq_comp) - + IONIC_HWSTAMP_CQ_NEGOFFSET; + + hwstamp = le64_to_cpu(*cq_desc_hwstamp); + + if (hwstamp != IONIC_HWSTAMP_INVALID) { + hwts.hwtstamp = ionic_lif_phc_ktime(q->lif, hwstamp); + + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_tstamp_tx(skb, &hwts); + + stats->hwstamp_valid++; + } else { + stats->hwstamp_invalid++; + } } - dev_kfree_skb_any(skb); - stats->clean++; - netdev_tx_completed_queue(q_to_ndq(q), 1, len); + + } else if (unlikely(__netif_subqueue_stopped(q->lif->netdev, qi))) { + netif_wake_subqueue(q->lif->netdev, qi); + q->wake++; } + + desc_info->bytes = skb->len; + stats->clean++; + + dev_consume_skb_any(skb); } -static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) +bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) { - struct ionic_txq_comp *comp = cq_info->cq_desc; struct ionic_queue *q = cq->bound_q; struct ionic_desc_info *desc_info; + struct ionic_txq_comp *comp; + int bytes = 0; + int pkts = 0; u16 index; + comp = cq_info->cq_desc + cq->desc_size - sizeof(*comp); + if (!color_match(comp->color, cq->done_color)) return false; @@ -652,13 +759,21 @@ static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) */ do { desc_info = &q->info[q->tail_idx]; + desc_info->bytes = 0; index = q->tail_idx; q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); ionic_tx_clean(q, desc_info, cq_info, desc_info->cb_arg); + if (desc_info->cb_arg) { + pkts++; + bytes += desc_info->bytes; + } desc_info->cb = NULL; desc_info->cb_arg = NULL; } while (index != le16_to_cpu(comp->comp_index)); + if (pkts && bytes && !unlikely(q->features & IONIC_TXQ_F_HWSTAMP)) + netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes); + return true; } @@ -677,15 +792,25 @@ void ionic_tx_flush(struct ionic_cq *cq) void ionic_tx_empty(struct ionic_queue *q) { struct ionic_desc_info *desc_info; + int bytes = 0; + int pkts = 0; /* walk the not completed tx entries, if any */ while (q->head_idx != q->tail_idx) { desc_info = &q->info[q->tail_idx]; + desc_info->bytes = 0; q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); ionic_tx_clean(q, desc_info, NULL, desc_info->cb_arg); + if (desc_info->cb_arg) { + pkts++; + bytes += desc_info->bytes; + } desc_info->cb = NULL; desc_info->cb_arg = NULL; } + + if (pkts && bytes && !unlikely(q->features & IONIC_TXQ_F_HWSTAMP)) + netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes); } static int ionic_tx_tcp_inner_pseudo_csum(struct sk_buff *skb) @@ -756,50 +881,34 @@ static void ionic_tx_tso_post(struct ionic_queue *q, struct ionic_txq_desc *desc desc->hdr_len = cpu_to_le16(hdrlen); desc->mss = cpu_to_le16(mss); - if (done) { + if (start) { skb_tx_timestamp(skb); - netdev_tx_sent_queue(q_to_ndq(q), skb->len); - ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb); + if (!unlikely(q->features & IONIC_TXQ_F_HWSTAMP)) + netdev_tx_sent_queue(q_to_ndq(q), skb->len); + ionic_txq_post(q, false, ionic_tx_clean, skb); } else { - ionic_txq_post(q, false, ionic_tx_clean, NULL); + ionic_txq_post(q, done, NULL, NULL); } } -static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q, - struct ionic_txq_sg_elem **elem) -{ - struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc; - struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc; - - *elem = sg_desc->elems; - return desc; -} - static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) { struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct ionic_desc_info *rewind_desc_info; - struct device *dev = q->lif->ionic->dev; + struct ionic_desc_info *desc_info; + struct ionic_buf_info *buf_info; struct ionic_txq_sg_elem *elem; struct ionic_txq_desc *desc; - unsigned int frag_left = 0; - unsigned int offset = 0; - u16 abort = q->head_idx; - unsigned int len_left; + unsigned int chunk_len; + unsigned int frag_rem; + unsigned int tso_rem; + unsigned int seg_rem; dma_addr_t desc_addr; + dma_addr_t frag_addr; unsigned int hdrlen; - unsigned int nfrags; - unsigned int seglen; - u64 total_bytes = 0; - u64 total_pkts = 0; - u16 rewind = abort; - unsigned int left; unsigned int len; unsigned int mss; - skb_frag_t *frag; bool start, done; bool outer_csum; - dma_addr_t addr; bool has_vlan; u16 desc_len; u8 desc_nsge; @@ -807,9 +916,14 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) bool encap; int err; + desc_info = &q->info[q->head_idx]; + buf_info = desc_info->bufs; + + if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) + return -EIO; + + len = skb->len; mss = skb_shinfo(skb)->gso_size; - nfrags = skb_shinfo(skb)->nr_frags; - len_left = skb->len - skb_headlen(skb); outer_csum = (skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM) || (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); has_vlan = !!skb_vlan_tag_present(skb); @@ -834,125 +948,75 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) else hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); - seglen = hdrlen + mss; - left = skb_headlen(skb); + tso_rem = len; + seg_rem = min(tso_rem, hdrlen + mss); - desc = ionic_tx_tso_next(q, &elem); - start = true; + frag_addr = 0; + frag_rem = 0; - /* Chop skb->data up into desc segments */ + start = true; - while (left > 0) { - len = min(seglen, left); - frag_left = seglen - len; - desc_addr = ionic_tx_map_single(q, skb->data + offset, len); - if (dma_mapping_error(dev, desc_addr)) - goto err_out_abort; - desc_len = len; + while (tso_rem > 0) { + desc = NULL; + elem = NULL; + desc_addr = 0; + desc_len = 0; desc_nsge = 0; - left -= len; - offset += len; - if (nfrags > 0 && frag_left > 0) - continue; - done = (nfrags == 0 && left == 0); - ionic_tx_tso_post(q, desc, skb, - desc_addr, desc_nsge, desc_len, - hdrlen, mss, - outer_csum, - vlan_tci, has_vlan, - start, done); - total_pkts++; - total_bytes += start ? len : len + hdrlen; - desc = ionic_tx_tso_next(q, &elem); - start = false; - seglen = mss; - } - - /* Chop skb frags into desc segments */ - - for (frag = skb_shinfo(skb)->frags; len_left; frag++) { - offset = 0; - left = skb_frag_size(frag); - len_left -= left; - nfrags--; - stats->frags++; - - while (left > 0) { - if (frag_left > 0) { - len = min(frag_left, left); - frag_left -= len; - addr = ionic_tx_map_frag(q, frag, offset, len); - if (dma_mapping_error(dev, addr)) - goto err_out_abort; - elem->addr = cpu_to_le64(addr); - elem->len = cpu_to_le16(len); + /* use fragments until we have enough to post a single descriptor */ + while (seg_rem > 0) { + /* if the fragment is exhausted then move to the next one */ + if (frag_rem == 0) { + /* grab the next fragment */ + frag_addr = buf_info->dma_addr; + frag_rem = buf_info->len; + buf_info++; + } + chunk_len = min(frag_rem, seg_rem); + if (!desc) { + /* fill main descriptor */ + desc = desc_info->txq_desc; + elem = desc_info->txq_sg_desc->elems; + desc_addr = frag_addr; + desc_len = chunk_len; + } else { + /* fill sg descriptor */ + elem->addr = cpu_to_le64(frag_addr); + elem->len = cpu_to_le16(chunk_len); elem++; desc_nsge++; - left -= len; - offset += len; - if (nfrags > 0 && frag_left > 0) - continue; - done = (nfrags == 0 && left == 0); - ionic_tx_tso_post(q, desc, skb, desc_addr, - desc_nsge, desc_len, - hdrlen, mss, outer_csum, - vlan_tci, has_vlan, - start, done); - total_pkts++; - total_bytes += start ? len : len + hdrlen; - desc = ionic_tx_tso_next(q, &elem); - start = false; - } else { - len = min(mss, left); - frag_left = mss - len; - desc_addr = ionic_tx_map_frag(q, frag, - offset, len); - if (dma_mapping_error(dev, desc_addr)) - goto err_out_abort; - desc_len = len; - desc_nsge = 0; - left -= len; - offset += len; - if (nfrags > 0 && frag_left > 0) - continue; - done = (nfrags == 0 && left == 0); - ionic_tx_tso_post(q, desc, skb, desc_addr, - desc_nsge, desc_len, - hdrlen, mss, outer_csum, - vlan_tci, has_vlan, - start, done); - total_pkts++; - total_bytes += start ? len : len + hdrlen; - desc = ionic_tx_tso_next(q, &elem); - start = false; } + frag_addr += chunk_len; + frag_rem -= chunk_len; + tso_rem -= chunk_len; + seg_rem -= chunk_len; } + seg_rem = min(tso_rem, mss); + done = (tso_rem == 0); + /* post descriptor */ + ionic_tx_tso_post(q, desc, skb, + desc_addr, desc_nsge, desc_len, + hdrlen, mss, outer_csum, vlan_tci, has_vlan, + start, done); + start = false; + /* Buffer information is stored with the first tso descriptor */ + desc_info = &q->info[q->head_idx]; + desc_info->nbufs = 0; } - stats->pkts += total_pkts; - stats->bytes += total_bytes; + stats->pkts += DIV_ROUND_UP(len - hdrlen, mss); + stats->bytes += len; stats->tso++; - stats->tso_bytes += total_bytes; + stats->tso_bytes = len; return 0; - -err_out_abort: - while (rewind != q->head_idx) { - rewind_desc_info = &q->info[rewind]; - ionic_tx_clean(q, rewind_desc_info, NULL, NULL); - rewind = (rewind + 1) & (q->num_descs - 1); - } - q->head_idx = abort; - - return -ENOMEM; } -static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb) +static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) { - struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc; + struct ionic_txq_desc *desc = desc_info->txq_desc; + struct ionic_buf_info *buf_info = desc_info->bufs; struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct device *dev = q->lif->ionic->dev; - dma_addr_t dma_addr; bool has_vlan; u8 flags = 0; bool encap; @@ -961,23 +1025,22 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb) has_vlan = !!skb_vlan_tag_present(skb); encap = skb->encapsulation; - dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb)); - if (dma_mapping_error(dev, dma_addr)) - return -ENOMEM; - flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0; flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0; cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL, - flags, skb_shinfo(skb)->nr_frags, dma_addr); + flags, skb_shinfo(skb)->nr_frags, + buf_info->dma_addr); desc->cmd = cpu_to_le64(cmd); - desc->len = cpu_to_le16(skb_headlen(skb)); - desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb)); - desc->csum_offset = cpu_to_le16(skb->csum_offset); + desc->len = cpu_to_le16(buf_info->len); if (has_vlan) { desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb)); stats->vlan_inserted++; + } else { + desc->vlan_tci = 0; } + desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb)); + desc->csum_offset = cpu_to_le16(skb->csum_offset); if (skb_csum_is_sctp(skb)) stats->crc32_csum++; @@ -987,12 +1050,12 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb) return 0; } -static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb) +static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) { - struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc; + struct ionic_txq_desc *desc = desc_info->txq_desc; + struct ionic_buf_info *buf_info = desc_info->bufs; struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct device *dev = q->lif->ionic->dev; - dma_addr_t dma_addr; bool has_vlan; u8 flags = 0; bool encap; @@ -1001,67 +1064,66 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb) has_vlan = !!skb_vlan_tag_present(skb); encap = skb->encapsulation; - dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb)); - if (dma_mapping_error(dev, dma_addr)) - return -ENOMEM; - flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0; flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0; cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_NONE, - flags, skb_shinfo(skb)->nr_frags, dma_addr); + flags, skb_shinfo(skb)->nr_frags, + buf_info->dma_addr); desc->cmd = cpu_to_le64(cmd); - desc->len = cpu_to_le16(skb_headlen(skb)); + desc->len = cpu_to_le16(buf_info->len); if (has_vlan) { desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb)); stats->vlan_inserted++; + } else { + desc->vlan_tci = 0; } + desc->csum_start = 0; + desc->csum_offset = 0; stats->csum_none++; return 0; } -static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb) +static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) { - struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc; - unsigned int len_left = skb->len - skb_headlen(skb); + struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc; + struct ionic_buf_info *buf_info = &desc_info->bufs[1]; struct ionic_txq_sg_elem *elem = sg_desc->elems; struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct device *dev = q->lif->ionic->dev; - dma_addr_t dma_addr; - skb_frag_t *frag; - u16 len; + unsigned int i; - for (frag = skb_shinfo(skb)->frags; len_left; frag++, elem++) { - len = skb_frag_size(frag); - elem->len = cpu_to_le16(len); - dma_addr = ionic_tx_map_frag(q, frag, 0, len); - if (dma_mapping_error(dev, dma_addr)) - return -ENOMEM; - elem->addr = cpu_to_le64(dma_addr); - len_left -= len; - stats->frags++; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, buf_info++, elem++) { + elem->addr = cpu_to_le64(buf_info->dma_addr); + elem->len = cpu_to_le16(buf_info->len); } + stats->frags += skb_shinfo(skb)->nr_frags; + return 0; } static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb) { + struct ionic_desc_info *desc_info = &q->info[q->head_idx]; struct ionic_tx_stats *stats = q_to_tx_stats(q); int err; + if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) + return -EIO; + /* set up the initial descriptor */ if (skb->ip_summed == CHECKSUM_PARTIAL) - err = ionic_tx_calc_csum(q, skb); + err = ionic_tx_calc_csum(q, skb, desc_info); else - err = ionic_tx_calc_no_csum(q, skb); + err = ionic_tx_calc_no_csum(q, skb, desc_info); if (err) return err; /* add frags */ - err = ionic_tx_skb_frags(q, skb); + err = ionic_tx_skb_frags(q, skb, desc_info); if (err) return err; @@ -1069,7 +1131,8 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb) stats->pkts++; stats->bytes += skb->len; - netdev_tx_sent_queue(q_to_ndq(q), skb->len); + if (!unlikely(q->features & IONIC_TXQ_F_HWSTAMP)) + netdev_tx_sent_queue(q_to_ndq(q), skb->len); ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb); return 0; @@ -1077,17 +1140,19 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb) static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) { - int sg_elems = q->lif->qtype_info[IONIC_QTYPE_TXQ].max_sg_elems; struct ionic_tx_stats *stats = q_to_tx_stats(q); + int ndescs; int err; - /* If TSO, need roundup(skb->len/mss) descs */ + /* Each desc is mss long max, so a descriptor for each gso_seg */ if (skb_is_gso(skb)) - return (skb->len / skb_shinfo(skb)->gso_size) + 1; + ndescs = skb_shinfo(skb)->gso_segs; + else + ndescs = 1; /* If non-TSO, just need 1 desc and nr_frags sg elems */ - if (skb_shinfo(skb)->nr_frags <= sg_elems) - return 1; + if (skb_shinfo(skb)->nr_frags <= q->max_sg_elems) + return ndescs; /* Too many frags, so linearize */ err = skb_linearize(skb); @@ -1096,8 +1161,7 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) stats->linearize++; - /* Need 1 desc and zero sg elems */ - return 1; + return ndescs; } static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs) @@ -1120,6 +1184,42 @@ static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs) return stopped; } +static netdev_tx_t ionic_start_hwstamp_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_queue *q = &lif->hwstamp_txq->q; + int err, ndescs; + + /* Does not stop/start txq, because we post to a separate tx queue + * for timestamping, and if a packet can't be posted immediately to + * the timestamping queue, it is dropped. + */ + + ndescs = ionic_tx_descs_needed(q, skb); + if (unlikely(ndescs < 0)) + goto err_out_drop; + + if (unlikely(!ionic_q_has_space(q, ndescs))) + goto err_out_drop; + + skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP; + if (skb_is_gso(skb)) + err = ionic_tx_tso(q, skb); + else + err = ionic_tx(q, skb); + + if (err) + goto err_out_drop; + + return NETDEV_TX_OK; + +err_out_drop: + q->drop++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev) { u16 queue_index = skb_get_queue_mapping(skb); @@ -1133,6 +1233,10 @@ netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + if (lif->hwstamp_txq && lif->phc->ts_config_tx_mode) + return ionic_start_hwstamp_xmit(skb, netdev); + if (unlikely(queue_index >= lif->nxqs)) queue_index = 0; q = &lif->txqcqs[queue_index]->q; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h index 7667b72232b8..d7cbaad8a6fb 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h @@ -14,4 +14,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget); int ionic_txrx_napi(struct napi_struct *napi, int budget); netdev_tx_t ionic_start_xmit(struct sk_buff *skb, struct net_device *netdev); +bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info); +bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info); + #endif /* _IONIC_TXRX_H_ */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 07824bf9d68d..dfaf10edfabf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -396,6 +396,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, tpa_param->tpa_ipv6_en_flg = 1; tpa_param->tpa_pkt_split_flg = 1; tpa_param->tpa_gro_consistent_flg = 1; + break; default: break; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index b8dc5c4591ef..ed2b6fe5a78d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -4734,6 +4734,7 @@ void qed_inform_vf_link_state(struct qed_hwfn *hwfn) */ link.speed = (hwfn->cdev->num_hwfns > 1) ? 100000 : 40000; + break; default: /* In auto mode pass PF link image to VF */ break; diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 8c47a9d2a965..8e150dd4f899 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -345,7 +345,7 @@ int qede_xdp_transmit(struct net_device *dev, int n_frames, struct qede_tx_queue *xdp_tx; struct xdp_frame *xdpf; dma_addr_t mapping; - int i, drops = 0; + int i, nxmit = 0; u16 xdp_prod; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) @@ -364,18 +364,13 @@ int qede_xdp_transmit(struct net_device *dev, int n_frames, mapping = dma_map_single(dmadev, xdpf->data, xdpf->len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dmadev, mapping))) { - xdp_return_frame_rx_napi(xdpf); - drops++; - - continue; - } + if (unlikely(dma_mapping_error(dmadev, mapping))) + break; if (unlikely(qede_xdp_xmit(xdp_tx, mapping, 0, xdpf->len, - NULL, xdpf))) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + NULL, xdpf))) + break; + nxmit++; } if (flags & XDP_XMIT_FLUSH) { @@ -387,7 +382,7 @@ int qede_xdp_transmit(struct net_device *dev, int n_frames, spin_unlock(&xdp_tx->xdp_tx_lock); - return n_frames - drops; + return nxmit; } int qede_txq_has_work(struct qede_tx_queue *txq) @@ -1214,12 +1209,9 @@ static int qede_rx_build_jumbo(struct qede_dev *edev, dma_unmap_page(rxq->dev, bd->mapping, PAGE_SIZE, DMA_FROM_DEVICE); - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++, - bd->data, rxq->rx_headroom, cur_size); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, bd->data, + rxq->rx_headroom, cur_size, PAGE_SIZE); - skb->truesize += PAGE_SIZE; - skb->data_len += cur_size; - skb->len += cur_size; pkt_len -= cur_size; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 4d952036ba82..01ac1e93d27a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -7,7 +7,6 @@ #include <linux/crash_dump.h> #include <linux/module.h> #include <linux/pci.h> -#include <linux/version.h> #include <linux/device.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c index 7760a3394e93..7ecb3dfe30bd 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c @@ -1425,6 +1425,7 @@ void qlcnic_83xx_get_minidump_template(struct qlcnic_adapter *adapter) if (fw_dump->tmpl_hdr == NULL || current_version > prev_version) { vfree(fw_dump->tmpl_hdr); + fw_dump->tmpl_hdr = NULL; if (qlcnic_83xx_md_check_extended_dump_capability(adapter)) extended = !qlcnic_83xx_extend_md_capab(adapter); @@ -1443,6 +1444,8 @@ void qlcnic_83xx_get_minidump_template(struct qlcnic_adapter *adapter) struct qlcnic_83xx_dump_template_hdr *hdr; hdr = fw_dump->tmpl_hdr; + if (!hdr) + return; hdr->drv_cap_mask = 0x1f; fw_dump->cap_mask = 0x1f; dev_info(&pdev->dev, diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 117188e3c7de..87b8c032195d 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1437,6 +1437,7 @@ netdev_tx_t emac_mac_tx_buf_send(struct emac_adapter *adpt, { struct emac_tpd tpd; u32 prod_idx; + int len; memset(&tpd, 0, sizeof(tpd)); @@ -1456,9 +1457,10 @@ netdev_tx_t emac_mac_tx_buf_send(struct emac_adapter *adpt, if (skb_network_offset(skb) != ETH_HLEN) TPD_TYP_SET(&tpd, 1); + len = skb->len; emac_tx_fill_tpd(adpt, tx_q, skb, &tpd); - netdev_sent_queue(adpt->netdev, skb->len); + netdev_sent_queue(adpt->netdev, len); /* Make sure the are enough free descriptors to hold one * maximum-sized SKB. We need one desc for each fragment, diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 5a3b65a6eb4f..ab9b02574a15 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -885,7 +885,7 @@ qca_spi_probe(struct spi_device *spi) struct net_device *qcaspi_devs = NULL; u8 legacy_mode = 0; u16 signature; - const char *mac; + int ret; if (!spi->dev.of_node) { dev_err(&spi->dev, "Missing device tree\n"); @@ -962,12 +962,8 @@ qca_spi_probe(struct spi_device *spi) spi_set_drvdata(spi, qcaspi_devs); - mac = of_get_mac_address(spi->dev.of_node); - - if (!IS_ERR(mac)) - ether_addr_copy(qca->net_dev->dev_addr, mac); - - if (!is_valid_ether_addr(qca->net_dev->dev_addr)) { + ret = of_get_mac_address(spi->dev.of_node, qca->net_dev->dev_addr); + if (ret) { eth_hw_addr_random(qca->net_dev); dev_info(&spi->dev, "Using random MAC address: %pM\n", qca->net_dev->dev_addr); diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c index 362b4f5c162c..bcdeca7b3366 100644 --- a/drivers/net/ethernet/qualcomm/qca_uart.c +++ b/drivers/net/ethernet/qualcomm/qca_uart.c @@ -323,7 +323,6 @@ static int qca_uart_probe(struct serdev_device *serdev) { struct net_device *qcauart_dev = alloc_etherdev(sizeof(struct qcauart)); struct qcauart *qca; - const char *mac; u32 speed = 115200; int ret; @@ -348,12 +347,8 @@ static int qca_uart_probe(struct serdev_device *serdev) of_property_read_u32(serdev->dev.of_node, "current-speed", &speed); - mac = of_get_mac_address(serdev->dev.of_node); - - if (!IS_ERR(mac)) - ether_addr_copy(qca->net_dev->dev_addr, mac); - - if (!is_valid_ether_addr(qca->net_dev->dev_addr)) { + ret = of_get_mac_address(serdev->dev.of_node, qca->net_dev->dev_addr); + if (ret) { eth_hw_addr_random(qca->net_dev); dev_info(&serdev->dev, "Using random MAC address: %pM\n", qca->net_dev->dev_addr); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 3d00b3232308..0be5ac7ab261 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -56,20 +56,22 @@ static void __rmnet_map_ingress_handler(struct sk_buff *skb, struct rmnet_port *port) { + struct rmnet_map_header *map_header = (void *)skb->data; struct rmnet_endpoint *ep; u16 len, pad; u8 mux_id; - if (RMNET_MAP_GET_CD_BIT(skb)) { + if (map_header->flags & MAP_CMD_FLAG) { + /* Packet contains a MAP command (not data) */ if (port->data_format & RMNET_FLAGS_INGRESS_MAP_COMMANDS) return rmnet_map_command(skb, port); goto free_skb; } - mux_id = RMNET_MAP_GET_MUX_ID(skb); - pad = RMNET_MAP_GET_PAD(skb); - len = RMNET_MAP_GET_LENGTH(skb) - pad; + mux_id = map_header->mux_id; + pad = map_header->flags & MAP_PAD_LEN_MASK; + len = ntohs(map_header->pkt_len) - pad; if (mux_id >= RMNET_MAX_LOGICAL_EP) goto free_skb; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h index 576501db2a0b..2aea153f4247 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h @@ -32,18 +32,6 @@ enum rmnet_map_commands { RMNET_MAP_COMMAND_ENUM_LENGTH }; -#define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \ - (Y)->data)->mux_id) -#define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \ - (Y)->data)->cd_bit) -#define RMNET_MAP_GET_PAD(Y) (((struct rmnet_map_header *) \ - (Y)->data)->pad_len) -#define RMNET_MAP_GET_CMD_START(Y) ((struct rmnet_map_control_command *) \ - ((Y)->data + \ - sizeof(struct rmnet_map_header))) -#define RMNET_MAP_GET_LENGTH(Y) (ntohs(((struct rmnet_map_header *) \ - (Y)->data)->pkt_len)) - #define RMNET_MAP_COMMAND_REQUEST 0 #define RMNET_MAP_COMMAND_ACK 1 #define RMNET_MAP_COMMAND_UNSUPPORTED 2 diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c index beaee4962128..add0f5ade2e6 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c @@ -12,12 +12,13 @@ static u8 rmnet_map_do_flow_control(struct sk_buff *skb, struct rmnet_port *port, int enable) { + struct rmnet_map_header *map_header = (void *)skb->data; struct rmnet_endpoint *ep; struct net_device *vnd; u8 mux_id; int r; - mux_id = RMNET_MAP_GET_MUX_ID(skb); + mux_id = map_header->mux_id; if (mux_id >= RMNET_MAX_LOGICAL_EP) { kfree_skb(skb); @@ -49,6 +50,7 @@ static void rmnet_map_send_ack(struct sk_buff *skb, unsigned char type, struct rmnet_port *port) { + struct rmnet_map_header *map_header = (void *)skb->data; struct rmnet_map_control_command *cmd; struct net_device *dev = skb->dev; @@ -58,7 +60,8 @@ static void rmnet_map_send_ack(struct sk_buff *skb, skb->protocol = htons(ETH_P_MAP); - cmd = RMNET_MAP_GET_CMD_START(skb); + /* Command data immediately follows the MAP header */ + cmd = (struct rmnet_map_control_command *)(map_header + 1); cmd->cmd_type = type & 0x03; netif_tx_lock(dev); @@ -71,11 +74,13 @@ static void rmnet_map_send_ack(struct sk_buff *skb, */ void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port) { + struct rmnet_map_header *map_header = (void *)skb->data; struct rmnet_map_control_command *cmd; unsigned char command_name; unsigned char rc = 0; - cmd = RMNET_MAP_GET_CMD_START(skb); + /* Command data immediately follows the MAP header */ + cmd = (struct rmnet_map_control_command *)(map_header + 1); command_name = cmd->command_name; switch (command_name) { diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c index 21d38167f961..0ac2ff828320 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c @@ -197,22 +197,16 @@ rmnet_map_ipv4_ul_csum_header(void *iphdr, struct rmnet_map_ul_csum_header *ul_header, struct sk_buff *skb) { - struct iphdr *ip4h = (struct iphdr *)iphdr; - __be16 *hdr = (__be16 *)ul_header, offset; + struct iphdr *ip4h = iphdr; + u16 val; - offset = htons((__force u16)(skb_transport_header(skb) - - (unsigned char *)iphdr)); - ul_header->csum_start_offset = offset; - ul_header->csum_insert_offset = skb->csum_offset; - ul_header->csum_enabled = 1; + val = MAP_CSUM_UL_ENABLED_FLAG; if (ip4h->protocol == IPPROTO_UDP) - ul_header->udp_ind = 1; - else - ul_header->udp_ind = 0; + val |= MAP_CSUM_UL_UDP_FLAG; + val |= skb->csum_offset & MAP_CSUM_UL_OFFSET_MASK; - /* Changing remaining fields to network order */ - hdr++; - *hdr = htons((__force u16)*hdr); + ul_header->csum_start_offset = htons(skb_network_header_len(skb)); + ul_header->csum_info = htons(val); skb->ip_summed = CHECKSUM_NONE; @@ -239,23 +233,16 @@ rmnet_map_ipv6_ul_csum_header(void *ip6hdr, struct rmnet_map_ul_csum_header *ul_header, struct sk_buff *skb) { - struct ipv6hdr *ip6h = (struct ipv6hdr *)ip6hdr; - __be16 *hdr = (__be16 *)ul_header, offset; - - offset = htons((__force u16)(skb_transport_header(skb) - - (unsigned char *)ip6hdr)); - ul_header->csum_start_offset = offset; - ul_header->csum_insert_offset = skb->csum_offset; - ul_header->csum_enabled = 1; + struct ipv6hdr *ip6h = ip6hdr; + u16 val; + val = MAP_CSUM_UL_ENABLED_FLAG; if (ip6h->nexthdr == IPPROTO_UDP) - ul_header->udp_ind = 1; - else - ul_header->udp_ind = 0; + val |= MAP_CSUM_UL_UDP_FLAG; + val |= skb->csum_offset & MAP_CSUM_UL_OFFSET_MASK; - /* Changing remaining fields to network order */ - hdr++; - *hdr = htons((__force u16)*hdr); + ul_header->csum_start_offset = htons(skb_network_header_len(skb)); + ul_header->csum_info = htons(val); skb->ip_summed = CHECKSUM_NONE; @@ -284,6 +271,7 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb, return map_header; } + BUILD_BUG_ON(MAP_PAD_LEN_MASK < 3); padding = ALIGN(map_datalen, 4) - map_datalen; if (padding == 0) @@ -297,7 +285,8 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb, done: map_header->pkt_len = htons(map_datalen + padding); - map_header->pad_len = padding & 0x3F; + /* This is a data packet, so the CMD bit is 0 */ + map_header->flags = padding & MAP_PAD_LEN_MASK; return map_header; } @@ -319,7 +308,7 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb, return NULL; maph = (struct rmnet_map_header *)skb->data; - packet_len = ntohs(maph->pkt_len) + sizeof(struct rmnet_map_header); + packet_len = ntohs(maph->pkt_len) + sizeof(*maph); if (port->data_format & RMNET_FLAGS_INGRESS_MAP_CKSUMV4) packet_len += sizeof(struct rmnet_map_dl_csum_trailer); @@ -328,7 +317,7 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb, return NULL; /* Some hardware can send us empty frames. Catch them */ - if (ntohs(maph->pkt_len) == 0) + if (!maph->pkt_len) return NULL; skbn = alloc_skb(packet_len + RMNET_MAP_DEAGGR_SPACING, GFP_ATOMIC); @@ -361,7 +350,7 @@ int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len) csum_trailer = (struct rmnet_map_dl_csum_trailer *)(skb->data + len); - if (!csum_trailer->valid) { + if (!(csum_trailer->flags & MAP_CSUM_DL_VALID_FLAG)) { priv->stats.csum_valid_unset++; return -EINVAL; } @@ -421,10 +410,7 @@ void rmnet_map_checksum_uplink_packet(struct sk_buff *skb, } sw_csum: - ul_header->csum_start_offset = 0; - ul_header->csum_insert_offset = 0; - ul_header->csum_enabled = 0; - ul_header->udp_ind = 0; + memset(ul_header, 0, sizeof(*ul_header)); priv->stats.csum_sw++; } diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 7aad0ba53372..2c89cde7da1e 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1586,12 +1586,10 @@ DECLARE_RTL_COND(rtl_counters_cond) static void rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd) { - dma_addr_t paddr = tp->counters_phys_addr; - u32 cmd; + u32 cmd = lower_32_bits(tp->counters_phys_addr); - RTL_W32(tp, CounterAddrHigh, (u64)paddr >> 32); + RTL_W32(tp, CounterAddrHigh, upper_32_bits(tp->counters_phys_addr)); rtl_pci_commit(tp); - cmd = (u64)paddr & DMA_BIT_MASK(32); RTL_W32(tp, CounterAddrLow, cmd); RTL_W32(tp, CounterAddrLow, cmd | counter_cmd); @@ -1903,6 +1901,41 @@ static int rtl8169_set_eee(struct net_device *dev, struct ethtool_eee *data) return ret; } +static void rtl8169_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *data) +{ + data->rx_max_pending = NUM_RX_DESC; + data->rx_pending = NUM_RX_DESC; + data->tx_max_pending = NUM_TX_DESC; + data->tx_pending = NUM_TX_DESC; +} + +static void rtl8169_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *data) +{ + struct rtl8169_private *tp = netdev_priv(dev); + bool tx_pause, rx_pause; + + phy_get_pause(tp->phydev, &tx_pause, &rx_pause); + + data->autoneg = tp->phydev->autoneg; + data->tx_pause = tx_pause ? 1 : 0; + data->rx_pause = rx_pause ? 1 : 0; +} + +static int rtl8169_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *data) +{ + struct rtl8169_private *tp = netdev_priv(dev); + + if (dev->mtu > ETH_DATA_LEN) + return -EOPNOTSUPP; + + phy_set_asym_pause(tp->phydev, data->rx_pause, data->tx_pause); + + return 0; +} + static const struct ethtool_ops rtl8169_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, @@ -1923,6 +1956,9 @@ static const struct ethtool_ops rtl8169_ethtool_ops = { .set_eee = rtl8169_set_eee, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_ringparam = rtl8169_get_ringparam, + .get_pauseparam = rtl8169_get_pauseparam, + .set_pauseparam = rtl8169_set_pauseparam, }; static void rtl_enable_eee(struct rtl8169_private *tp) @@ -2350,6 +2386,15 @@ static void rtl_jumbo_config(struct rtl8169_private *tp) if (pci_is_pcie(tp->pci_dev) && tp->supports_gmii) pcie_set_readrq(tp->pci_dev, readrq); + + /* Chip doesn't support pause in jumbo mode */ + if (jumbo) { + linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, + tp->phydev->advertising); + linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + tp->phydev->advertising); + phy_start_aneg(tp->phydev); + } } DECLARE_RTL_COND(rtl_chipcmd_cond) @@ -2728,11 +2773,6 @@ static void rtl_hw_start_8168c_2(struct rtl8169_private *tp) __rtl_hw_start_8168cp(tp); } -static void rtl_hw_start_8168c_3(struct rtl8169_private *tp) -{ - rtl_hw_start_8168c_2(tp); -} - static void rtl_hw_start_8168c_4(struct rtl8169_private *tp) { rtl_set_def_aspm_entry_latency(tp); @@ -3645,7 +3685,7 @@ static void rtl_hw_config(struct rtl8169_private *tp) [RTL_GIGA_MAC_VER_18] = rtl_hw_start_8168cp_1, [RTL_GIGA_MAC_VER_19] = rtl_hw_start_8168c_1, [RTL_GIGA_MAC_VER_20] = rtl_hw_start_8168c_2, - [RTL_GIGA_MAC_VER_21] = rtl_hw_start_8168c_3, + [RTL_GIGA_MAC_VER_21] = rtl_hw_start_8168c_2, [RTL_GIGA_MAC_VER_22] = rtl_hw_start_8168c_4, [RTL_GIGA_MAC_VER_23] = rtl_hw_start_8168cp_2, [RTL_GIGA_MAC_VER_24] = rtl_hw_start_8168cp_3, @@ -4358,20 +4398,6 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev) if (net_ratelimit()) netdev_err(dev, "PCI error (cmd = 0x%04x, status_errs = 0x%04x)\n", pci_cmd, pci_status_errs); - /* - * The recovery sequence below admits a very elaborated explanation: - * - it seems to work; - * - I did not see what else could be done; - * - it makes iop3xx happy. - * - * Feel free to adjust to your needs. - */ - if (pdev->broken_parity_status) - pci_cmd &= ~PCI_COMMAND_PARITY; - else - pci_cmd |= PCI_COMMAND_SERR | PCI_COMMAND_PARITY; - - pci_write_config_word(pdev, PCI_COMMAND, pci_cmd); rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); } @@ -4630,8 +4656,6 @@ static int r8169_phy_connect(struct rtl8169_private *tp) if (!tp->supports_gmii) phy_set_max_speed(phydev, SPEED_100); - phy_support_asym_pause(phydev); - phy_attached_info(phydev); return 0; @@ -4646,6 +4670,9 @@ static void rtl8169_down(struct rtl8169_private *tp) rtl8169_update_counters(tp); + pci_clear_master(tp->pci_dev); + rtl_pci_commit(tp); + rtl8169_cleanup(tp, true); rtl_prepare_power_down(tp); @@ -4653,6 +4680,8 @@ static void rtl8169_down(struct rtl8169_private *tp) static void rtl8169_up(struct rtl8169_private *tp) { + pci_set_master(tp->pci_dev); + phy_init_hw(tp->phydev); phy_resume(tp->phydev); rtl8169_init_phy(tp); napi_enable(&tp->napi); @@ -5078,6 +5107,10 @@ static int r8169_mdio_register(struct rtl8169_private *tp) return -EUNATCH; } + tp->phydev->mac_managed_pm = 1; + + phy_support_asym_pause(tp->phydev); + /* PHY will be woken up in rtl_open() */ phy_suspend(tp->phydev); @@ -5307,8 +5340,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl_hw_reset(tp); - pci_set_master(pdev); - rc = rtl_alloc_irq(tp); if (rc < 0) { dev_err(&pdev->dev, "Can't allocate interrupt\n"); diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index cb47e68c1a3e..86a1eb0634e8 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -993,6 +993,7 @@ struct ravb_private { struct platform_device *pdev; void __iomem *addr; struct clk *clk; + struct clk *refclk; struct mdiobb_ctrl mdiobb; u32 num_rx_ring[NUM_RX_QUEUE]; u32 num_tx_ring[NUM_TX_QUEUE]; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index eb0c03bdb12d..4afff320dfd0 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -109,11 +109,13 @@ static void ravb_set_buffer_align(struct sk_buff *skb) * Ethernet AVB device doesn't have ROM for MAC address. * This function gets the MAC address that was used by a bootloader. */ -static void ravb_read_mac_address(struct net_device *ndev, const u8 *mac) +static void ravb_read_mac_address(struct device_node *np, + struct net_device *ndev) { - if (!IS_ERR(mac)) { - ether_addr_copy(ndev->dev_addr, mac); - } else { + int ret; + + ret = of_get_mac_address(np, ndev->dev_addr); + if (ret) { u32 mahr = ravb_read(ndev, MAHR); u32 malr = ravb_read(ndev, MALR); @@ -911,31 +913,20 @@ static int ravb_poll(struct napi_struct *napi, int budget) int q = napi - priv->napi; int mask = BIT(q); int quota = budget; - u32 ris0, tis; - for (;;) { - tis = ravb_read(ndev, TIS); - ris0 = ravb_read(ndev, RIS0); - if (!((ris0 & mask) || (tis & mask))) - break; + /* Processing RX Descriptor Ring */ + /* Clear RX interrupt */ + ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0); + if (ravb_rx(ndev, "a, q)) + goto out; - /* Processing RX Descriptor Ring */ - if (ris0 & mask) { - /* Clear RX interrupt */ - ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0); - if (ravb_rx(ndev, "a, q)) - goto out; - } - /* Processing TX Descriptor Ring */ - if (tis & mask) { - spin_lock_irqsave(&priv->lock, flags); - /* Clear TX interrupt */ - ravb_write(ndev, ~(mask | TIS_RESERVED), TIS); - ravb_tx_free(ndev, q, true); - netif_wake_subqueue(ndev, q); - spin_unlock_irqrestore(&priv->lock, flags); - } - } + /* Processing RX Descriptor Ring */ + spin_lock_irqsave(&priv->lock, flags); + /* Clear TX interrupt */ + ravb_write(ndev, ~(mask | TIS_RESERVED), TIS); + ravb_tx_free(ndev, q, true); + netif_wake_subqueue(ndev, q); + spin_unlock_irqrestore(&priv->lock, flags); napi_complete(napi); @@ -2148,6 +2139,13 @@ static int ravb_probe(struct platform_device *pdev) goto out_release; } + priv->refclk = devm_clk_get_optional(&pdev->dev, "refclk"); + if (IS_ERR(priv->refclk)) { + error = PTR_ERR(priv->refclk); + goto out_release; + } + clk_prepare_enable(priv->refclk); + ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); ndev->min_mtu = ETH_MIN_MTU; @@ -2164,7 +2162,7 @@ static int ravb_probe(struct platform_device *pdev) /* Set GTI value */ error = ravb_set_gti(ndev); if (error) - goto out_release; + goto out_disable_refclk; /* Request GTI loading */ ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI); @@ -2183,7 +2181,7 @@ static int ravb_probe(struct platform_device *pdev) "Cannot allocate desc base address table (size %d bytes)\n", priv->desc_bat_size); error = -ENOMEM; - goto out_release; + goto out_disable_refclk; } for (q = RAVB_BE; q < DBAT_ENTRY_NUM; q++) priv->desc_bat[q].die_dt = DT_EOS; @@ -2200,7 +2198,7 @@ static int ravb_probe(struct platform_device *pdev) priv->msg_enable = RAVB_DEF_MSG_ENABLE; /* Read and set MAC address */ - ravb_read_mac_address(ndev, of_get_mac_address(np)); + ravb_read_mac_address(np, ndev); if (!is_valid_ether_addr(ndev->dev_addr)) { dev_warn(&pdev->dev, "no valid MAC address supplied, using a random one\n"); @@ -2243,6 +2241,8 @@ out_dma_free: /* Stop PTP Clock driver */ if (chip_id != RCAR_GEN2) ravb_ptp_stop(ndev); +out_disable_refclk: + clk_disable_unprepare(priv->refclk); out_release: free_netdev(ndev); @@ -2260,6 +2260,8 @@ static int ravb_remove(struct platform_device *pdev) if (priv->chip_id != RCAR_GEN2) ravb_ptp_stop(ndev); + clk_disable_unprepare(priv->refclk); + dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); /* Set reset mode */ diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index f029c7c03804..c5b154868c1f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -560,7 +560,7 @@ static struct sh_eth_cpu_data r7s72100_data = { EESR_TDE, .fdr_value = 0x0000070f, - .trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5, + .trscer_err_mask = TRSCER_RMAFCE | TRSCER_RRFCE, .no_psr = 1, .apr = 1, @@ -701,7 +701,7 @@ static struct sh_eth_cpu_data rcar_gen2_data = { EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE, .fdr_value = 0x00000f0f, - .trscer_err_mask = DESC_I_RINT8, + .trscer_err_mask = TRSCER_RMAFCE, .apr = 1, .mpr = 1, @@ -782,7 +782,7 @@ static struct sh_eth_cpu_data r7s9210_data = { .fdr_value = 0x0000070f, - .trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5, + .trscer_err_mask = TRSCER_RMAFCE | TRSCER_RRFCE, .apr = 1, .mpr = 1, @@ -1094,7 +1094,7 @@ static struct sh_eth_cpu_data sh771x_data = { EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP | EESIPR_PREIP | EESIPR_CERFIP, - .trscer_err_mask = DESC_I_RINT8, + .trscer_err_mask = TRSCER_RMAFCE, .tsu = 1, .dual_port = 1, @@ -1749,7 +1749,7 @@ static void sh_eth_emac_interrupt(struct net_device *ndev) link_stat = sh_eth_read(ndev, PSR); if (mdp->ether_link_active_low) link_stat = ~link_stat; - if (!(link_stat & PHY_ST_LINK)) { + if (!(link_stat & PSR_LMON)) { sh_eth_rcv_snd_disable(ndev); } else { /* Link Up */ @@ -3170,7 +3170,6 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) struct device_node *np = dev->of_node; struct sh_eth_plat_data *pdata; phy_interface_t interface; - const char *mac_addr; int ret; pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); @@ -3182,9 +3181,7 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) return NULL; pdata->phy_interface = interface; - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(pdata->mac_addr, mac_addr); + of_get_mac_address(np, pdata->mac_addr); pdata->no_ether_link = of_property_read_bool(np, "renesas,no-ether-link"); diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index c1b3751b12c4..a5c07c6ff44a 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -171,7 +171,7 @@ enum GECMR_BIT { }; /* EDMR */ -enum DMAC_M_BIT { +enum EDMR_BIT { EDMR_NBST = 0x80, EDMR_EL = 0x40, /* Litte endian */ EDMR_DL1 = 0x20, EDMR_DL0 = 0x10, @@ -180,13 +180,13 @@ enum DMAC_M_BIT { }; /* EDTRR */ -enum DMAC_T_BIT { +enum EDTRR_BIT { EDTRR_TRNS_GETHER = 0x03, EDTRR_TRNS_ETHER = 0x01, }; /* EDRRR */ -enum EDRRR_R_BIT { +enum EDRRR_BIT { EDRRR_R = 0x01, }; @@ -208,7 +208,7 @@ enum PIR_BIT { }; /* PSR */ -enum PHY_STATUS_BIT { PHY_ST_LINK = 0x01, }; +enum PSR_BIT { PSR_LMON = 0x01, }; /* EESR */ enum EESR_BIT { @@ -288,27 +288,6 @@ enum EESIPR_BIT { EESIPR_CERFIP = 0x00000001, }; -/* Receive descriptor 0 bits */ -enum RD_STS_BIT { - RD_RACT = 0x80000000, RD_RDLE = 0x40000000, - RD_RFP1 = 0x20000000, RD_RFP0 = 0x10000000, - RD_RFE = 0x08000000, RD_RFS10 = 0x00000200, - RD_RFS9 = 0x00000100, RD_RFS8 = 0x00000080, - RD_RFS7 = 0x00000040, RD_RFS6 = 0x00000020, - RD_RFS5 = 0x00000010, RD_RFS4 = 0x00000008, - RD_RFS3 = 0x00000004, RD_RFS2 = 0x00000002, - RD_RFS1 = 0x00000001, -}; -#define RDF1ST RD_RFP1 -#define RDFEND RD_RFP0 -#define RD_RFP (RD_RFP1|RD_RFP0) - -/* Receive descriptor 1 bits */ -enum RD_LEN_BIT { - RD_RFL = 0x0000ffff, /* receive frame length */ - RD_RBL = 0xffff0000, /* receive buffer length */ -}; - /* FCFTR */ enum FCFTR_BIT { FCFTR_RFF2 = 0x00040000, FCFTR_RFF1 = 0x00020000, @@ -318,28 +297,13 @@ enum FCFTR_BIT { #define DEFAULT_FIFO_F_D_RFF (FCFTR_RFF2 | FCFTR_RFF1 | FCFTR_RFF0) #define DEFAULT_FIFO_F_D_RFD (FCFTR_RFD2 | FCFTR_RFD1 | FCFTR_RFD0) -/* Transmit descriptor 0 bits */ -enum TD_STS_BIT { - TD_TACT = 0x80000000, TD_TDLE = 0x40000000, - TD_TFP1 = 0x20000000, TD_TFP0 = 0x10000000, - TD_TFE = 0x08000000, TD_TWBI = 0x04000000, -}; -#define TDF1ST TD_TFP1 -#define TDFEND TD_TFP0 -#define TD_TFP (TD_TFP1|TD_TFP0) - -/* Transmit descriptor 1 bits */ -enum TD_LEN_BIT { - TD_TBL = 0xffff0000, /* transmit buffer length */ -}; - /* RMCR */ enum RMCR_BIT { RMCR_RNC = 0x00000001, }; /* ECMR */ -enum FELIC_MODE_BIT { +enum ECMR_BIT { ECMR_TRCCM = 0x04000000, ECMR_RCSC = 0x00800000, ECMR_DPAD = 0x00200000, ECMR_RZPF = 0x00100000, ECMR_ZPF = 0x00080000, ECMR_PFR = 0x00040000, ECMR_RXF = 0x00020000, @@ -350,7 +314,7 @@ enum FELIC_MODE_BIT { }; /* ECSR */ -enum ECSR_STATUS_BIT { +enum ECSR_BIT { ECSR_BRCRX = 0x20, ECSR_PSRTO = 0x10, ECSR_LCHNG = 0x04, ECSR_MPD = 0x02, ECSR_ICD = 0x01, @@ -360,7 +324,7 @@ enum ECSR_STATUS_BIT { ECSR_ICD | ECSIPR_MPDIP) /* ECSIPR */ -enum ECSIPR_STATUS_MASK_BIT { +enum ECSIPR_BIT { ECSIPR_BRCRXIP = 0x20, ECSIPR_PSRTOIP = 0x10, ECSIPR_LCHNGIP = 0x04, ECSIPR_MPDIP = 0x02, ECSIPR_ICDIP = 0x01, @@ -380,14 +344,20 @@ enum MPR_BIT { }; /* TRSCER */ -enum DESC_I_BIT { - DESC_I_TINT4 = 0x0800, DESC_I_TINT3 = 0x0400, DESC_I_TINT2 = 0x0200, - DESC_I_TINT1 = 0x0100, DESC_I_RINT8 = 0x0080, DESC_I_RINT5 = 0x0010, - DESC_I_RINT4 = 0x0008, DESC_I_RINT3 = 0x0004, DESC_I_RINT2 = 0x0002, - DESC_I_RINT1 = 0x0001, +enum TRSCER_BIT { + TRSCER_CNDCE = 0x00000800, + TRSCER_DLCCE = 0x00000400, + TRSCER_CDCE = 0x00000200, + TRSCER_TROCE = 0x00000100, + TRSCER_RMAFCE = 0x00000080, + TRSCER_RRFCE = 0x00000010, + TRSCER_RTLFCE = 0x00000008, + TRSCER_RTSFCE = 0x00000004, + TRSCER_PRECE = 0x00000002, + TRSCER_CERFCE = 0x00000001, }; -#define DEFAULT_TRSCER_ERR_MASK (DESC_I_RINT8 | DESC_I_RINT5 | DESC_I_TINT2) +#define DEFAULT_TRSCER_ERR_MASK (TRSCER_RMAFCE | TRSCER_RRFCE | TRSCER_CDCE) /* RPADIR */ enum RPADIR_BIT { @@ -445,6 +415,24 @@ struct sh_eth_txdesc { u32 pad0; /* padding data */ } __aligned(2) __packed; +/* Transmit descriptor 0 bits */ +enum TD_STS_BIT { + TD_TACT = 0x80000000, + TD_TDLE = 0x40000000, + TD_TFP1 = 0x20000000, + TD_TFP0 = 0x10000000, + TD_TFE = 0x08000000, + TD_TWBI = 0x04000000, +}; +#define TDF1ST TD_TFP1 +#define TDFEND TD_TFP0 +#define TD_TFP (TD_TFP1 | TD_TFP0) + +/* Transmit descriptor 1 bits */ +enum TD_LEN_BIT { + TD_TBL = 0xffff0000, /* transmit buffer length */ +}; + /* The sh ether Rx buffer descriptors. * This structure should be 20 bytes. */ @@ -455,6 +443,34 @@ struct sh_eth_rxdesc { u32 pad0; /* padding data */ } __aligned(2) __packed; +/* Receive descriptor 0 bits */ +enum RD_STS_BIT { + RD_RACT = 0x80000000, + RD_RDLE = 0x40000000, + RD_RFP1 = 0x20000000, + RD_RFP0 = 0x10000000, + RD_RFE = 0x08000000, + RD_RFS10 = 0x00000200, + RD_RFS9 = 0x00000100, + RD_RFS8 = 0x00000080, + RD_RFS7 = 0x00000040, + RD_RFS6 = 0x00000020, + RD_RFS5 = 0x00000010, + RD_RFS4 = 0x00000008, + RD_RFS3 = 0x00000004, + RD_RFS2 = 0x00000002, + RD_RFS1 = 0x00000001, +}; +#define RDF1ST RD_RFP1 +#define RDFEND RD_RFP0 +#define RD_RFP (RD_RFP1 | RD_RFP0) + +/* Receive descriptor 1 bits */ +enum RD_LEN_BIT { + RD_RFL = 0x0000ffff, /* receive frame length */ + RD_RBL = 0xffff0000, /* receive buffer length */ +}; + /* This structure is used by each CPU dependency handling. */ struct sh_eth_cpu_data { /* mandatory functions */ diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 3473d296b2e2..a46633606cae 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2736,7 +2736,7 @@ static void rocker_switchdev_event_work(struct work_struct *work) switch (switchdev_work->event) { case SWITCHDEV_FDB_ADD_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; err = rocker_world_port_fdb_add(rocker_port, fdb_info); if (err) { @@ -2747,7 +2747,7 @@ static void rocker_switchdev_event_work(struct work_struct *work) break; case SWITCHDEV_FDB_DEL_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; - if (!fdb_info->added_by_user) + if (!fdb_info->added_by_user || fdb_info->is_local) break; err = rocker_world_port_fdb_del(rocker_port, fdb_info); if (err) diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c index 33f79402850d..4639ed9438a3 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c @@ -25,8 +25,7 @@ #ifdef CONFIG_OF static int sxgbe_probe_config_dt(struct platform_device *pdev, - struct sxgbe_plat_data *plat, - const char **mac) + struct sxgbe_plat_data *plat) { struct device_node *np = pdev->dev.of_node; struct sxgbe_dma_cfg *dma_cfg; @@ -35,7 +34,6 @@ static int sxgbe_probe_config_dt(struct platform_device *pdev, if (!np) return -ENODEV; - *mac = of_get_mac_address(np); err = of_get_phy_mode(np, &plat->interface); if (err && err != -ENODEV) return err; @@ -63,8 +61,7 @@ static int sxgbe_probe_config_dt(struct platform_device *pdev, } #else static int sxgbe_probe_config_dt(struct platform_device *pdev, - struct sxgbe_plat_data *plat, - const char **mac) + struct sxgbe_plat_data *plat) { return -ENOSYS; } @@ -85,7 +82,6 @@ static int sxgbe_platform_probe(struct platform_device *pdev) void __iomem *addr; struct sxgbe_priv_data *priv = NULL; struct sxgbe_plat_data *plat_dat = NULL; - const char *mac = NULL; struct net_device *ndev = platform_get_drvdata(pdev); struct device_node *node = dev->of_node; @@ -101,7 +97,7 @@ static int sxgbe_platform_probe(struct platform_device *pdev) if (!plat_dat) return -ENOMEM; - ret = sxgbe_probe_config_dt(pdev, plat_dat, &mac); + ret = sxgbe_probe_config_dt(pdev, plat_dat); if (ret) { pr_err("%s: main dt probe failed\n", __func__); return ret; @@ -122,8 +118,7 @@ static int sxgbe_platform_probe(struct platform_device *pdev) } /* Get MAC address if available (DT) */ - if (!IS_ERR_OR_NULL(mac)) - ether_addr_copy(priv->dev->dev_addr, mac); + of_get_mac_address(node, priv->dev->dev_addr); /* Get the TX/RX IRQ numbers */ for (i = 0, chan = 1; i < SXGBE_TX_QUEUES; i++) { diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index da6886dcac37..c3f35da1b82a 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1747,6 +1747,22 @@ static size_t efx_ef10_describe_stats(struct efx_nic *efx, u8 *names) mask, names); } +static void efx_ef10_get_fec_stats(struct efx_nic *efx, + struct ethtool_fec_stats *fec_stats) +{ + DECLARE_BITMAP(mask, EF10_STAT_COUNT); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u64 *stats = nic_data->stats; + + efx_ef10_get_stat_mask(efx, mask); + if (test_bit(EF10_STAT_fec_corrected_errors, mask)) + fec_stats->corrected_blocks.total = + stats[EF10_STAT_fec_corrected_errors]; + if (test_bit(EF10_STAT_fec_uncorrected_errors, mask)) + fec_stats->uncorrectable_blocks.total = + stats[EF10_STAT_fec_uncorrected_errors]; +} + static size_t efx_ef10_update_stats_common(struct efx_nic *efx, u64 *full_stats, struct rtnl_link_stats64 *core_stats) { @@ -2928,8 +2944,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) /* Get the transmit queue */ tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL); - tx_queue = efx_channel_get_tx_queue(channel, - tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL); + tx_queue = channel->tx_queue + (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL); if (!tx_queue->timestamping) { /* Transmit completion */ @@ -4122,6 +4137,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .get_wol = efx_ef10_get_wol, .set_wol = efx_ef10_set_wol, .resume_wol = efx_port_dummy_op_void, + .get_fec_stats = efx_ef10_get_fec_stats, .test_chip = efx_ef10_test_chip, .test_nvram = efx_mcdi_nvram_test_all, .mcdi_request = efx_ef10_mcdi_request, diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 36c8625a6fd7..c746ca7235f1 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -920,7 +920,7 @@ static void efx_probe_vpd_strings(struct efx_nic *efx) } /* Get the Read only section */ - ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA); + ro_start = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA); if (ro_start < 0) { netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n"); return; diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index 1bfeee283ea9..a3ca406a3561 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -914,6 +914,8 @@ int efx_set_channels(struct efx_nic *efx) } } } + if (xdp_queue_number) + efx->xdp_tx_queue_count = xdp_queue_number; rc = netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); if (rc) diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h index 3332cdf2918a..cd590e0685e5 100644 --- a/drivers/net/ethernet/sfc/enum.h +++ b/drivers/net/ethernet/sfc/enum.h @@ -78,7 +78,6 @@ enum efx_loopback_mode { (1 << LOOPBACK_XAUI) | \ (1 << LOOPBACK_GMII) | \ (1 << LOOPBACK_SGMII) | \ - (1 << LOOPBACK_SGMII) | \ (1 << LOOPBACK_XGBR) | \ (1 << LOOPBACK_XFI) | \ (1 << LOOPBACK_XAUI_FAR) | \ diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 12a91c559aa2..058d9fe41d99 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -206,6 +206,15 @@ static int efx_ethtool_set_wol(struct net_device *net_dev, return efx->type->set_wol(efx, wol->wolopts); } +static void efx_ethtool_get_fec_stats(struct net_device *net_dev, + struct ethtool_fec_stats *fec_stats) +{ + struct efx_nic *efx = netdev_priv(net_dev); + + if (efx->type->get_fec_stats) + efx->type->get_fec_stats(efx, fec_stats); +} + static int efx_ethtool_get_ts_info(struct net_device *net_dev, struct ethtool_ts_info *ts_info) { @@ -257,6 +266,7 @@ const struct ethtool_ops efx_ethtool_ops = { .get_module_eeprom = efx_ethtool_get_module_eeprom, .get_link_ksettings = efx_ethtool_get_link_ksettings, .set_link_ksettings = efx_ethtool_set_link_ksettings, + .get_fec_stats = efx_ethtool_get_fec_stats, .get_fecparam = efx_ethtool_get_fecparam, .set_fecparam = efx_ethtool_set_fecparam, }; diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index f8979991970e..5e7a57b680ca 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2800,7 +2800,7 @@ static void ef4_probe_vpd_strings(struct ef4_nic *efx) } /* Get the Read only section */ - ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA); + ro_start = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA); if (ro_start < 0) { netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n"); return; diff --git a/drivers/net/ethernet/sfc/falcon/net_driver.h b/drivers/net/ethernet/sfc/falcon/net_driver.h index a529ff395ead..a381cf9ec4f3 100644 --- a/drivers/net/ethernet/sfc/falcon/net_driver.h +++ b/drivers/net/ethernet/sfc/falcon/net_driver.h @@ -637,7 +637,7 @@ union ef4_multicast_hash { * struct ef4_nic - an Efx NIC * @name: Device name (net device name or bus id before net device registered) * @pci_dev: The PCI device - * @node: List node for maintaning primary/secondary function lists + * @node: List node for maintaining primary/secondary function lists * @primary: &struct ef4_nic instance for the primary function of this * controller. May be the same structure, and may be %NULL if no * primary function is bound. Serialised by rtnl_lock. diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index d75cf5ff5686..49df02ecee91 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -835,14 +835,14 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) /* Transmit completion */ tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR); tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL); - tx_queue = efx_channel_get_tx_queue( - channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL); + tx_queue = channel->tx_queue + + (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL); efx_xmit_done(tx_queue, tx_ev_desc_ptr); } else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) { /* Rewrite the FIFO write pointer */ tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL); - tx_queue = efx_channel_get_tx_queue( - channel, tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL); + tx_queue = channel->tx_queue + + (tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL); netif_tx_lock(efx->net_dev); efx_farch_notify_tx_desc(tx_queue); @@ -1081,16 +1081,16 @@ static void efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event) { struct efx_tx_queue *tx_queue; + struct efx_channel *channel; int qid; qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA); if (qid < EFX_MAX_TXQ_PER_CHANNEL * (efx->n_tx_channels + efx->n_extra_tx_channels)) { - tx_queue = efx_get_tx_queue(efx, qid / EFX_MAX_TXQ_PER_CHANNEL, - qid % EFX_MAX_TXQ_PER_CHANNEL); - if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) { + channel = efx_get_tx_channel(efx, qid / EFX_MAX_TXQ_PER_CHANNEL); + tx_queue = channel->tx_queue + (qid % EFX_MAX_TXQ_PER_CHANNEL); + if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) efx_farch_magic_event(tx_queue->channel, EFX_CHANNEL_MAGIC_TX_DRAIN(tx_queue)); - } } } diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 9f7dfdf708cf..9b4b25704271 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1187,6 +1187,7 @@ struct efx_udp_tunnel { * @get_wol: Get WoL configuration from driver state * @set_wol: Push WoL configuration to the NIC * @resume_wol: Synchronise WoL state between driver and MC (e.g. after resume) + * @get_fec_stats: Get standard FEC statistics. * @test_chip: Test registers. May use efx_farch_test_registers(), and is * expected to reset the NIC. * @test_nvram: Test validity of NVRAM contents @@ -1332,6 +1333,8 @@ struct efx_nic_type { void (*get_wol)(struct efx_nic *efx, struct ethtool_wolinfo *wol); int (*set_wol)(struct efx_nic *efx, u32 type); void (*resume_wol)(struct efx_nic *efx); + void (*get_fec_stats)(struct efx_nic *efx, + struct ethtool_fec_stats *fec_stats); unsigned int (*check_caps)(const struct efx_nic *efx, u8 flag, u32 offset); diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 89c5c75f479f..17b8119c48e5 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -94,12 +94,11 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel, rx_buf->len -= hdr_len; for (;;) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_buf->page, rx_buf->page_offset, - rx_buf->len); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + rx_buf->page, rx_buf->page_offset, + rx_buf->len, efx->rx_buffer_truesize); rx_buf->page = NULL; - skb->len += rx_buf->len; - skb->data_len += rx_buf->len; + if (skb_shinfo(skb)->nr_frags == n_frags) break; @@ -111,8 +110,6 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel, n_frags = 0; } - skb->truesize += n_frags * efx->rx_buffer_truesize; - /* Move past the ethernet header */ skb->protocol = eth_type_trans(skb, efx->net_dev); diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 1665529a7271..0c6650d2e239 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -412,14 +412,6 @@ err: return NETDEV_TX_OK; } -static void efx_xdp_return_frames(int n, struct xdp_frame **xdpfs) -{ - int i; - - for (i = 0; i < n; i++) - xdp_return_frame_rx_napi(xdpfs[i]); -} - /* Transmit a packet from an XDP buffer * * Returns number of packets sent on success, error code otherwise. @@ -492,12 +484,7 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs, if (flush && i > 0) efx_nic_push_buffers(tx_queue); - if (i == 0) - return -EIO; - - efx_xdp_return_frames(n - i, xdpfs + i); - - return i; + return i == 0 ? -EIO : i; } /* Initiate a packet transmission. We use one channel per CPU diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 891b49281bc6..cbde83f620a0 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2204,7 +2204,7 @@ static int try_toggle_control_gpio(struct device *dev, const char *name, int index, int value, unsigned int nsdelay) { - struct gpio_desc *gpio = *desc; + struct gpio_desc *gpio; enum gpiod_flags flags = value ? GPIOD_OUT_LOW : GPIOD_OUT_HIGH; gpio = devm_gpiod_get_index_optional(dev, name, index, flags); diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 606c79de93a6..556a9790cdcf 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2649,11 +2649,13 @@ static const struct of_device_id smsc911x_dt_ids[] = { MODULE_DEVICE_TABLE(of, smsc911x_dt_ids); #endif +#ifdef CONFIG_ACPI static const struct acpi_device_id smsc911x_acpi_match[] = { { "ARMH9118", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, smsc911x_acpi_match); +#endif static struct platform_driver smsc911x_driver = { .probe = smsc911x_drv_probe, diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 3c53051bdacf..dfc85cc68173 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1715,14 +1715,17 @@ static int netsec_netdev_init(struct net_device *ndev) goto err1; /* set phy power down */ - data = netsec_phy_read(priv->mii_bus, priv->phy_addr, MII_BMCR) | - BMCR_PDOWN; - netsec_phy_write(priv->mii_bus, priv->phy_addr, MII_BMCR, data); + data = netsec_phy_read(priv->mii_bus, priv->phy_addr, MII_BMCR); + netsec_phy_write(priv->mii_bus, priv->phy_addr, MII_BMCR, + data | BMCR_PDOWN); ret = netsec_reset_hardware(priv, true); if (ret) goto err2; + /* Restore phy power state */ + netsec_phy_write(priv->mii_bus, priv->phy_addr, MII_BMCR, data); + spin_lock_init(&priv->desc_ring[NETSEC_RING_TX].lock); spin_lock_init(&priv->desc_ring[NETSEC_RING_RX].lock); @@ -1757,8 +1760,7 @@ static int netsec_xdp_xmit(struct net_device *ndev, int n, { struct netsec_priv *priv = netdev_priv(ndev); struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX]; - int drops = 0; - int i; + int i, nxmit = 0; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; @@ -1769,12 +1771,11 @@ static int netsec_xdp_xmit(struct net_device *ndev, int n, int err; err = netsec_xdp_queue_one(priv, xdpf, true); - if (err != NETSEC_XDP_TX) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } else { - tx_ring->xdp_xmit++; - } + if (err != NETSEC_XDP_TX) + break; + + tx_ring->xdp_xmit++; + nxmit++; } spin_unlock(&tx_ring->lock); @@ -1783,7 +1784,7 @@ static int netsec_xdp_xmit(struct net_device *ndev, int n, tx_ring->xdp_xmit = 0; } - return n - drops; + return nxmit; } static int netsec_xdp_setup(struct netsec_priv *priv, struct bpf_prog *prog, diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c index 501b9c7aba56..fcbb4bb31408 100644 --- a/drivers/net/ethernet/socionext/sni_ave.c +++ b/drivers/net/ethernet/socionext/sni_ave.c @@ -1559,7 +1559,6 @@ static int ave_probe(struct platform_device *pdev) struct ave_private *priv; struct net_device *ndev; struct device_node *np; - const void *mac_addr; void __iomem *base; const char *name; int i, irq, ret; @@ -1600,12 +1599,9 @@ static int ave_probe(struct platform_device *pdev) ndev->max_mtu = AVE_MAX_ETHFRAME - (ETH_HLEN + ETH_FCS_LEN); - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - - /* if the mac address is invalid, use random mac address */ - if (!is_valid_ether_addr(ndev->dev_addr)) { + ret = of_get_mac_address(np, ndev->dev_addr); + if (ret) { + /* if the mac address is invalid, use random mac address */ eth_hw_addr_random(ndev); dev_warn(dev, "Using random MAC address: %pM\n", ndev->dev_addr); diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index e675ba12fde2..7737e4d0bb9e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -140,8 +140,8 @@ config DWMAC_ROCKCHIP config DWMAC_SOCFPGA tristate "SOCFPGA dwmac support" - default (ARCH_SOCFPGA || ARCH_STRATIX10) - depends on OF && (ARCH_SOCFPGA || ARCH_STRATIX10 || COMPILE_TEST) + default ARCH_INTEL_SOCFPGA + depends on OF && (ARCH_INTEL_SOCFPGA || COMPILE_TEST) select MFD_SYSCON help Support for ethernet controller on Altera SOCFPGA diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 366740ab9c5a..f2e478b884b0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -6,6 +6,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \ mmc_core.o stmmac_hwtstamp.o stmmac_ptp.o dwmac4_descs.o \ dwmac4_dma.o dwmac4_lib.o dwmac4_core.o dwmac5.o hwif.o \ stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o \ + stmmac_xdp.o \ $(stmmac-y) stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 6f271c46368d..619e3c0760d6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -33,6 +33,7 @@ #define DWMAC_CORE_4_10 0x41 #define DWMAC_CORE_5_00 0x50 #define DWMAC_CORE_5_10 0x51 +#define DWMAC_CORE_5_20 0x52 #define DWXGMAC_CORE_2_10 0x21 #define DWXLGMAC_CORE_2_00 0x20 @@ -182,6 +183,12 @@ struct stmmac_extra_stats { /* TSO */ unsigned long tx_tso_frames; unsigned long tx_tso_nfrags; + /* EST */ + unsigned long mtl_est_cgce; + unsigned long mtl_est_hlbs; + unsigned long mtl_est_hlbf; + unsigned long mtl_est_btre; + unsigned long mtl_est_btrlm; }; /* Safety Feature statistics exposed by ethtool */ @@ -253,6 +260,9 @@ struct stmmac_safety_stats { #define DMA_HW_FEAT_ACTPHYIF 0x70000000 /* Active/selected PHY iface */ #define DEFAULT_DMA_PBL 8 +/* MSI defines */ +#define STMMAC_MSI_VEC_MAX 32 + /* PCS status and mask defines */ #define PCS_ANE_IRQ BIT(2) /* PCS Auto-Negotiation */ #define PCS_LINK_IRQ BIT(1) /* PCS Link */ @@ -303,12 +313,37 @@ enum dma_irq_status { handle_tx = 0x8, }; +enum dma_irq_dir { + DMA_DIR_RX = 0x1, + DMA_DIR_TX = 0x2, + DMA_DIR_RXTX = 0x3, +}; + +enum request_irq_err { + REQ_IRQ_ERR_ALL, + REQ_IRQ_ERR_TX, + REQ_IRQ_ERR_RX, + REQ_IRQ_ERR_SFTY_UE, + REQ_IRQ_ERR_SFTY_CE, + REQ_IRQ_ERR_LPI, + REQ_IRQ_ERR_WOL, + REQ_IRQ_ERR_MAC, + REQ_IRQ_ERR_NO, +}; + /* EEE and LPI defines */ #define CORE_IRQ_TX_PATH_IN_LPI_MODE (1 << 0) #define CORE_IRQ_TX_PATH_EXIT_LPI_MODE (1 << 1) #define CORE_IRQ_RX_PATH_IN_LPI_MODE (1 << 2) #define CORE_IRQ_RX_PATH_EXIT_LPI_MODE (1 << 3) +/* FPE defines */ +#define FPE_EVENT_UNKNOWN 0 +#define FPE_EVENT_TRSP BIT(0) +#define FPE_EVENT_TVER BIT(1) +#define FPE_EVENT_RRSP BIT(2) +#define FPE_EVENT_RVER BIT(3) + #define CORE_IRQ_MTL_RX_OVERFLOW BIT(8) /* Physical Coding Sublayer */ @@ -382,6 +417,8 @@ struct dma_features { unsigned int estsel; unsigned int fpesel; unsigned int tbssel; + /* Numbers of Auxiliary Snapshot Inputs */ + unsigned int aux_snapshot_n; }; /* RX Buffer size must be multiple of 4/8/16 bytes */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c index 08c76636c164..dfbaea06d108 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c @@ -115,7 +115,7 @@ static int anarion_dwmac_probe(struct platform_device *pdev) if (IS_ERR(gmac)) return PTR_ERR(gmac); - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 27254b27d7ed..bc91fd867dcd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -438,7 +438,7 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev) if (IS_ERR(stmmac_res.addr)) return PTR_ERR(stmmac_res.addr); - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index fad503820e04..fbfda55b4c52 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -27,7 +27,7 @@ static int dwmac_generic_probe(struct platform_device *pdev) return ret; if (pdev->dev.of_node) { - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) { dev_err(&pdev->dev, "dt configuration failed\n"); return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 223f69da7e95..84651207a1de 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -90,6 +90,32 @@ imx8dxl_set_intf_mode(struct plat_stmmacenet_data *plat_dat) return ret; } +static int imx_dwmac_clks_config(void *priv, bool enabled) +{ + struct imx_priv_data *dwmac = priv; + int ret = 0; + + if (enabled) { + ret = clk_prepare_enable(dwmac->clk_mem); + if (ret) { + dev_err(dwmac->dev, "mem clock enable failed\n"); + return ret; + } + + ret = clk_prepare_enable(dwmac->clk_tx); + if (ret) { + dev_err(dwmac->dev, "tx clock enable failed\n"); + clk_disable_unprepare(dwmac->clk_mem); + return ret; + } + } else { + clk_disable_unprepare(dwmac->clk_tx); + clk_disable_unprepare(dwmac->clk_mem); + } + + return ret; +} + static int imx_dwmac_init(struct platform_device *pdev, void *priv) { struct plat_stmmacenet_data *plat_dat; @@ -98,39 +124,18 @@ static int imx_dwmac_init(struct platform_device *pdev, void *priv) plat_dat = dwmac->plat_dat; - ret = clk_prepare_enable(dwmac->clk_mem); - if (ret) { - dev_err(&pdev->dev, "mem clock enable failed\n"); - return ret; - } - - ret = clk_prepare_enable(dwmac->clk_tx); - if (ret) { - dev_err(&pdev->dev, "tx clock enable failed\n"); - goto clk_tx_en_failed; - } - if (dwmac->ops->set_intf_mode) { ret = dwmac->ops->set_intf_mode(plat_dat); if (ret) - goto intf_mode_failed; + return ret; } return 0; - -intf_mode_failed: - clk_disable_unprepare(dwmac->clk_tx); -clk_tx_en_failed: - clk_disable_unprepare(dwmac->clk_mem); - return ret; } static void imx_dwmac_exit(struct platform_device *pdev, void *priv) { - struct imx_priv_data *dwmac = priv; - - clk_disable_unprepare(dwmac->clk_tx); - clk_disable_unprepare(dwmac->clk_mem); + /* nothing to do now */ } static void imx_dwmac_fix_speed(void *priv, unsigned int speed) @@ -226,7 +231,7 @@ static int imx_dwmac_probe(struct platform_device *pdev) if (!dwmac) return -ENOMEM; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); @@ -249,10 +254,15 @@ static int imx_dwmac_probe(struct platform_device *pdev) plat_dat->addr64 = dwmac->ops->addr_width; plat_dat->init = imx_dwmac_init; plat_dat->exit = imx_dwmac_exit; + plat_dat->clks_config = imx_dwmac_clks_config; plat_dat->fix_mac_speed = imx_dwmac_fix_speed; plat_dat->bsp_priv = dwmac; dwmac->plat_dat = plat_dat; + ret = imx_dwmac_clks_config(dwmac, true); + if (ret) + goto err_clks_config; + ret = imx_dwmac_init(pdev, dwmac); if (ret) goto err_dwmac_init; @@ -263,9 +273,11 @@ static int imx_dwmac_probe(struct platform_device *pdev) return 0; -err_dwmac_init: err_drv_probe: imx_dwmac_exit(pdev, plat_dat->bsp_priv); +err_dwmac_init: + imx_dwmac_clks_config(dwmac, false); +err_clks_config: err_parse_dt: err_match_data: stmmac_remove_config_dt(pdev, plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index 6c19fcc76c6f..06d287f104be 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -85,7 +85,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) { dev_err(&pdev->dev, "dt configuration failed\n"); return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 0b64f7710d17..80728a4c0e3f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -8,9 +8,28 @@ #include "dwmac-intel.h" #include "dwmac4.h" #include "stmmac.h" +#include "stmmac_ptp.h" + +#define INTEL_MGBE_ADHOC_ADDR 0x15 +#define INTEL_MGBE_XPCS_ADDR 0x16 + +/* Selection for PTP Clock Freq belongs to PSE & PCH GbE */ +#define PSE_PTP_CLK_FREQ_MASK (GMAC_GPO0 | GMAC_GPO3) +#define PSE_PTP_CLK_FREQ_19_2MHZ (GMAC_GPO0) +#define PSE_PTP_CLK_FREQ_200MHZ (GMAC_GPO0 | GMAC_GPO3) +#define PSE_PTP_CLK_FREQ_256MHZ (0) +#define PCH_PTP_CLK_FREQ_MASK (GMAC_GPO0) +#define PCH_PTP_CLK_FREQ_19_2MHZ (GMAC_GPO0) +#define PCH_PTP_CLK_FREQ_200MHZ (0) + +/* Cross-timestamping defines */ +#define ART_CPUID_LEAF 0x15 +#define EHL_PSE_ART_MHZ 19200000 struct intel_priv_data { int mdio_adhoc_addr; /* mdio address for serdes & etc */ + unsigned long crossts_adj; + bool is_pse; }; /* This struct is used to associate PCI Function of MAC controller on a board, @@ -134,6 +153,11 @@ static int intel_serdes_powerup(struct net_device *ndev, void *priv_data) return data; } + /* PSE only - ungate SGMII PHY Rx Clock */ + if (intel_priv->is_pse) + mdiobus_modify(priv->mii, serdes_phy_addr, SERDES_GCR0, + 0, SERDES_PHY_RX_CLK); + return 0; } @@ -149,6 +173,11 @@ static void intel_serdes_powerdown(struct net_device *ndev, void *intel_data) serdes_phy_addr = intel_priv->mdio_adhoc_addr; + /* PSE only - gate SGMII PHY Rx Clock */ + if (intel_priv->is_pse) + mdiobus_modify(priv->mii, serdes_phy_addr, SERDES_GCR0, + SERDES_PHY_RX_CLK, 0); + /* move power state to P3 */ data = mdiobus_read(priv->mii, serdes_phy_addr, SERDES_GCR0); @@ -201,6 +230,161 @@ static void intel_serdes_powerdown(struct net_device *ndev, void *intel_data) } } +/* Program PTP Clock Frequency for different variant of + * Intel mGBE that has slightly different GPO mapping + */ +static void intel_mgbe_ptp_clk_freq_config(void *npriv) +{ + struct stmmac_priv *priv = (struct stmmac_priv *)npriv; + struct intel_priv_data *intel_priv; + u32 gpio_value; + + intel_priv = (struct intel_priv_data *)priv->plat->bsp_priv; + + gpio_value = readl(priv->ioaddr + GMAC_GPIO_STATUS); + + if (intel_priv->is_pse) { + /* For PSE GbE, use 200MHz */ + gpio_value &= ~PSE_PTP_CLK_FREQ_MASK; + gpio_value |= PSE_PTP_CLK_FREQ_200MHZ; + } else { + /* For PCH GbE, use 200MHz */ + gpio_value &= ~PCH_PTP_CLK_FREQ_MASK; + gpio_value |= PCH_PTP_CLK_FREQ_200MHZ; + } + + writel(gpio_value, priv->ioaddr + GMAC_GPIO_STATUS); +} + +static void get_arttime(struct mii_bus *mii, int intel_adhoc_addr, + u64 *art_time) +{ + u64 ns; + + ns = mdiobus_read(mii, intel_adhoc_addr, PMC_ART_VALUE3); + ns <<= GMAC4_ART_TIME_SHIFT; + ns |= mdiobus_read(mii, intel_adhoc_addr, PMC_ART_VALUE2); + ns <<= GMAC4_ART_TIME_SHIFT; + ns |= mdiobus_read(mii, intel_adhoc_addr, PMC_ART_VALUE1); + ns <<= GMAC4_ART_TIME_SHIFT; + ns |= mdiobus_read(mii, intel_adhoc_addr, PMC_ART_VALUE0); + + *art_time = ns; +} + +static int intel_crosststamp(ktime_t *device, + struct system_counterval_t *system, + void *ctx) +{ + struct intel_priv_data *intel_priv; + + struct stmmac_priv *priv = (struct stmmac_priv *)ctx; + void __iomem *ptpaddr = priv->ptpaddr; + void __iomem *ioaddr = priv->hw->pcsr; + unsigned long flags; + u64 art_time = 0; + u64 ptp_time = 0; + u32 num_snapshot; + u32 gpio_value; + u32 acr_value; + int ret; + u32 v; + int i; + + if (!boot_cpu_has(X86_FEATURE_ART)) + return -EOPNOTSUPP; + + intel_priv = priv->plat->bsp_priv; + + /* Both internal crosstimestamping and external triggered event + * timestamping cannot be run concurrently. + */ + if (priv->plat->ext_snapshot_en) + return -EBUSY; + + mutex_lock(&priv->aux_ts_lock); + /* Enable Internal snapshot trigger */ + acr_value = readl(ptpaddr + PTP_ACR); + acr_value &= ~PTP_ACR_MASK; + switch (priv->plat->int_snapshot_num) { + case AUX_SNAPSHOT0: + acr_value |= PTP_ACR_ATSEN0; + break; + case AUX_SNAPSHOT1: + acr_value |= PTP_ACR_ATSEN1; + break; + case AUX_SNAPSHOT2: + acr_value |= PTP_ACR_ATSEN2; + break; + case AUX_SNAPSHOT3: + acr_value |= PTP_ACR_ATSEN3; + break; + default: + mutex_unlock(&priv->aux_ts_lock); + return -EINVAL; + } + writel(acr_value, ptpaddr + PTP_ACR); + + /* Clear FIFO */ + acr_value = readl(ptpaddr + PTP_ACR); + acr_value |= PTP_ACR_ATSFC; + writel(acr_value, ptpaddr + PTP_ACR); + /* Release the mutex */ + mutex_unlock(&priv->aux_ts_lock); + + /* Trigger Internal snapshot signal + * Create a rising edge by just toggle the GPO1 to low + * and back to high. + */ + gpio_value = readl(ioaddr + GMAC_GPIO_STATUS); + gpio_value &= ~GMAC_GPO1; + writel(gpio_value, ioaddr + GMAC_GPIO_STATUS); + gpio_value |= GMAC_GPO1; + writel(gpio_value, ioaddr + GMAC_GPIO_STATUS); + + /* Poll for time sync operation done */ + ret = readl_poll_timeout(priv->ioaddr + GMAC_INT_STATUS, v, + (v & GMAC_INT_TSIE), 100, 10000); + + if (ret == -ETIMEDOUT) { + pr_err("%s: Wait for time sync operation timeout\n", __func__); + return ret; + } + + num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) & + GMAC_TIMESTAMP_ATSNS_MASK) >> + GMAC_TIMESTAMP_ATSNS_SHIFT; + + /* Repeat until the timestamps are from the FIFO last segment */ + for (i = 0; i < num_snapshot; i++) { + spin_lock_irqsave(&priv->ptp_lock, flags); + stmmac_get_ptptime(priv, ptpaddr, &ptp_time); + *device = ns_to_ktime(ptp_time); + spin_unlock_irqrestore(&priv->ptp_lock, flags); + get_arttime(priv->mii, intel_priv->mdio_adhoc_addr, &art_time); + *system = convert_art_to_tsc(art_time); + } + + system->cycles *= intel_priv->crossts_adj; + + return 0; +} + +static void intel_mgbe_pse_crossts_adj(struct intel_priv_data *intel_priv, + int base) +{ + if (boot_cpu_has(X86_FEATURE_ART)) { + unsigned int art_freq; + + /* On systems that support ART, ART frequency can be obtained + * from ECX register of CPUID leaf (0x15). + */ + art_freq = cpuid_ecx(ART_CPUID_LEAF); + do_div(art_freq, base); + intel_priv->crossts_adj = art_freq; + } +} + static void common_default_data(struct plat_stmmacenet_data *plat) { plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ @@ -263,6 +447,9 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, /* Disable Priority config by default */ plat->tx_queues_cfg[i].use_prio = false; + /* Default TX Q0 to use TSO and rest TXQ for TBS */ + if (i > 0) + plat->tx_queues_cfg[i].tbs_en = 1; } /* FIFO size is 4096 bytes for 1 tx/rx queue */ @@ -284,6 +471,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->dma_cfg->fixed_burst = 0; plat->dma_cfg->mixed_burst = 0; plat->dma_cfg->aal = 0; + plat->dma_cfg->dche = true; plat->axi = devm_kzalloc(&pdev->dev, sizeof(*plat->axi), GFP_KERNEL); @@ -319,6 +507,8 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, return ret; } + plat->ptp_clk_freq_config = intel_mgbe_ptp_clk_freq_config; + /* Set default value for multicast hash bins */ plat->multicast_filter_bins = HASH_TABLE_SIZE; @@ -333,6 +523,30 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, /* Use the last Rx queue */ plat->vlan_fail_q = plat->rx_queues_to_use - 1; + /* Intel mgbe SGMII interface uses pcs-xcps */ + if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII) { + plat->mdio_bus_data->has_xpcs = true; + plat->mdio_bus_data->xpcs_an_inband = true; + } + + /* Ensure mdio bus scan skips intel serdes and pcs-xpcs */ + plat->mdio_bus_data->phy_mask = 1 << INTEL_MGBE_ADHOC_ADDR; + plat->mdio_bus_data->phy_mask |= 1 << INTEL_MGBE_XPCS_ADDR; + + plat->int_snapshot_num = AUX_SNAPSHOT1; + plat->ext_snapshot_num = AUX_SNAPSHOT0; + + plat->has_crossts = true; + plat->crosststamp = intel_crosststamp; + + /* Setup MSI vector offset specific to Intel mGbE controller */ + plat->msi_mac_vec = 29; + plat->msi_lpi_vec = 28; + plat->msi_sfty_ce_vec = 27; + plat->msi_sfty_ue_vec = 26; + plat->msi_rx_base_vec = 0; + plat->msi_tx_base_vec = 1; + return 0; } @@ -378,8 +592,14 @@ static struct stmmac_pci_info ehl_rgmii1g_info = { static int ehl_pse0_common_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { + struct intel_priv_data *intel_priv = plat->bsp_priv; + + intel_priv->is_pse = true; plat->bus_id = 2; plat->addr64 = 32; + + intel_mgbe_pse_crossts_adj(intel_priv, EHL_PSE_ART_MHZ); + return ehl_common_data(pdev, plat); } @@ -410,8 +630,14 @@ static struct stmmac_pci_info ehl_pse0_sgmii1g_info = { static int ehl_pse1_common_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { + struct intel_priv_data *intel_priv = plat->bsp_priv; + + intel_priv->is_pse = true; plat->bus_id = 3; plat->addr64 = 32; + + intel_mgbe_pse_crossts_adj(intel_priv, EHL_PSE_ART_MHZ); + return ehl_common_data(pdev, plat); } @@ -609,6 +835,79 @@ static const struct stmmac_pci_info quark_info = { .setup = quark_default_data, }; +static int stmmac_config_single_msi(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res) +{ + int ret; + + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); + if (ret < 0) { + dev_info(&pdev->dev, "%s: Single IRQ enablement failed\n", + __func__); + return ret; + } + + res->irq = pci_irq_vector(pdev, 0); + res->wol_irq = res->irq; + plat->multi_msi_en = 0; + dev_info(&pdev->dev, "%s: Single IRQ enablement successful\n", + __func__); + + return 0; +} + +static int stmmac_config_multi_msi(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res) +{ + int ret; + int i; + + if (plat->msi_rx_base_vec >= STMMAC_MSI_VEC_MAX || + plat->msi_tx_base_vec >= STMMAC_MSI_VEC_MAX) { + dev_info(&pdev->dev, "%s: Invalid RX & TX vector defined\n", + __func__); + return -1; + } + + ret = pci_alloc_irq_vectors(pdev, 2, STMMAC_MSI_VEC_MAX, + PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (ret < 0) { + dev_info(&pdev->dev, "%s: multi MSI enablement failed\n", + __func__); + return ret; + } + + /* For RX MSI */ + for (i = 0; i < plat->rx_queues_to_use; i++) { + res->rx_irq[i] = pci_irq_vector(pdev, + plat->msi_rx_base_vec + i * 2); + } + + /* For TX MSI */ + for (i = 0; i < plat->tx_queues_to_use; i++) { + res->tx_irq[i] = pci_irq_vector(pdev, + plat->msi_tx_base_vec + i * 2); + } + + if (plat->msi_mac_vec < STMMAC_MSI_VEC_MAX) + res->irq = pci_irq_vector(pdev, plat->msi_mac_vec); + if (plat->msi_wol_vec < STMMAC_MSI_VEC_MAX) + res->wol_irq = pci_irq_vector(pdev, plat->msi_wol_vec); + if (plat->msi_lpi_vec < STMMAC_MSI_VEC_MAX) + res->lpi_irq = pci_irq_vector(pdev, plat->msi_lpi_vec); + if (plat->msi_sfty_ce_vec < STMMAC_MSI_VEC_MAX) + res->sfty_ce_irq = pci_irq_vector(pdev, plat->msi_sfty_ce_vec); + if (plat->msi_sfty_ue_vec < STMMAC_MSI_VEC_MAX) + res->sfty_ue_irq = pci_irq_vector(pdev, plat->msi_sfty_ue_vec); + + plat->multi_msi_en = 1; + dev_info(&pdev->dev, "%s: multi MSI enablement successful\n", __func__); + + return 0; +} + /** * intel_eth_pci_probe * @@ -650,7 +949,7 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, return -ENOMEM; /* Enable pci device */ - ret = pci_enable_device(pdev); + ret = pcim_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "%s: ERROR: failed to enable device\n", __func__); @@ -664,20 +963,27 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, pci_set_master(pdev); plat->bsp_priv = intel_priv; - intel_priv->mdio_adhoc_addr = 0x15; + intel_priv->mdio_adhoc_addr = INTEL_MGBE_ADHOC_ADDR; + intel_priv->crossts_adj = 1; + + /* Initialize all MSI vectors to invalid so that it can be set + * according to platform data settings below. + * Note: MSI vector takes value from 0 upto 31 (STMMAC_MSI_VEC_MAX) + */ + plat->msi_mac_vec = STMMAC_MSI_VEC_MAX; + plat->msi_wol_vec = STMMAC_MSI_VEC_MAX; + plat->msi_lpi_vec = STMMAC_MSI_VEC_MAX; + plat->msi_sfty_ce_vec = STMMAC_MSI_VEC_MAX; + plat->msi_sfty_ue_vec = STMMAC_MSI_VEC_MAX; + plat->msi_rx_base_vec = STMMAC_MSI_VEC_MAX; + plat->msi_tx_base_vec = STMMAC_MSI_VEC_MAX; ret = info->setup(pdev, plat); if (ret) return ret; - ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); - if (ret < 0) - return ret; - memset(&res, 0, sizeof(res)); res.addr = pcim_iomap_table(pdev)[0]; - res.wol_irq = pci_irq_vector(pdev, 0); - res.irq = pci_irq_vector(pdev, 0); if (plat->eee_usecs_rate > 0) { u32 tx_lpi_usec; @@ -686,13 +992,28 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, writel(tx_lpi_usec, res.addr + GMAC_1US_TIC_COUNTER); } + ret = stmmac_config_multi_msi(pdev, plat, &res); + if (ret) { + ret = stmmac_config_single_msi(pdev, plat, &res); + if (ret) { + dev_err(&pdev->dev, "%s: ERROR: failed to enable IRQ\n", + __func__); + goto err_alloc_irq; + } + } + ret = stmmac_dvr_probe(&pdev->dev, plat, &res); if (ret) { - pci_free_irq_vectors(pdev); - clk_disable_unprepare(plat->stmmac_clk); - clk_unregister_fixed_rate(plat->stmmac_clk); + goto err_dvr_probe; } + return 0; + +err_dvr_probe: + pci_free_irq_vectors(pdev); +err_alloc_irq: + clk_disable_unprepare(plat->stmmac_clk); + clk_unregister_fixed_rate(plat->stmmac_clk); return ret; } @@ -710,13 +1031,9 @@ static void intel_eth_pci_remove(struct pci_dev *pdev) stmmac_dvr_remove(&pdev->dev); - pci_free_irq_vectors(pdev); - clk_unregister_fixed_rate(priv->plat->stmmac_clk); pcim_iounmap_regions(pdev, BIT(0)); - - pci_disable_device(pdev); } static int __maybe_unused intel_eth_pci_suspend(struct device *dev) @@ -732,7 +1049,6 @@ static int __maybe_unused intel_eth_pci_suspend(struct device *dev) if (ret) return ret; - pci_disable_device(pdev); pci_wake_from_d3(pdev, true); return 0; } @@ -745,7 +1061,7 @@ static int __maybe_unused intel_eth_pci_resume(struct device *dev) pci_restore_state(pdev); pci_set_power_state(pdev, PCI_D0); - ret = pci_enable_device(pdev); + ret = pcim_enable_device(pdev); if (ret) return ret; @@ -757,41 +1073,41 @@ static int __maybe_unused intel_eth_pci_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend, intel_eth_pci_resume); -#define PCI_DEVICE_ID_INTEL_QUARK_ID 0x0937 -#define PCI_DEVICE_ID_INTEL_EHL_RGMII1G_ID 0x4b30 -#define PCI_DEVICE_ID_INTEL_EHL_SGMII1G_ID 0x4b31 -#define PCI_DEVICE_ID_INTEL_EHL_SGMII2G5_ID 0x4b32 +#define PCI_DEVICE_ID_INTEL_QUARK 0x0937 +#define PCI_DEVICE_ID_INTEL_EHL_RGMII1G 0x4b30 +#define PCI_DEVICE_ID_INTEL_EHL_SGMII1G 0x4b31 +#define PCI_DEVICE_ID_INTEL_EHL_SGMII2G5 0x4b32 /* Intel(R) Programmable Services Engine (Intel(R) PSE) consist of 2 MAC * which are named PSE0 and PSE1 */ -#define PCI_DEVICE_ID_INTEL_EHL_PSE0_RGMII1G_ID 0x4ba0 -#define PCI_DEVICE_ID_INTEL_EHL_PSE0_SGMII1G_ID 0x4ba1 -#define PCI_DEVICE_ID_INTEL_EHL_PSE0_SGMII2G5_ID 0x4ba2 -#define PCI_DEVICE_ID_INTEL_EHL_PSE1_RGMII1G_ID 0x4bb0 -#define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII1G_ID 0x4bb1 -#define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII2G5_ID 0x4bb2 -#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_0_ID 0x43ac -#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_1_ID 0x43a2 -#define PCI_DEVICE_ID_INTEL_TGL_SGMII1G_ID 0xa0ac -#define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_0_ID 0x7aac -#define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_1_ID 0x7aad +#define PCI_DEVICE_ID_INTEL_EHL_PSE0_RGMII1G 0x4ba0 +#define PCI_DEVICE_ID_INTEL_EHL_PSE0_SGMII1G 0x4ba1 +#define PCI_DEVICE_ID_INTEL_EHL_PSE0_SGMII2G5 0x4ba2 +#define PCI_DEVICE_ID_INTEL_EHL_PSE1_RGMII1G 0x4bb0 +#define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII1G 0x4bb1 +#define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII2G5 0x4bb2 +#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_0 0x43ac +#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_1 0x43a2 +#define PCI_DEVICE_ID_INTEL_TGL_SGMII1G 0xa0ac +#define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_0 0x7aac +#define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_1 0x7aad static const struct pci_device_id intel_eth_pci_id_table[] = { - { PCI_DEVICE_DATA(INTEL, QUARK_ID, &quark_info) }, - { PCI_DEVICE_DATA(INTEL, EHL_RGMII1G_ID, &ehl_rgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, EHL_SGMII1G_ID, &ehl_sgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, EHL_SGMII2G5_ID, &ehl_sgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, EHL_PSE0_RGMII1G_ID, &ehl_pse0_rgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, EHL_PSE0_SGMII1G_ID, &ehl_pse0_sgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, EHL_PSE0_SGMII2G5_ID, &ehl_pse0_sgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, EHL_PSE1_RGMII1G_ID, &ehl_pse1_rgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G_ID, &ehl_pse1_sgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5_ID, &ehl_pse1_sgmii1g_info) }, - { PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_phy0_info) }, - { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_phy0_info) }, - { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_phy1_info) }, - { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0_ID, &adls_sgmii1g_phy0_info) }, - { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1_ID, &adls_sgmii1g_phy1_info) }, + { PCI_DEVICE_DATA(INTEL, QUARK, &quark_info) }, + { PCI_DEVICE_DATA(INTEL, EHL_RGMII1G, &ehl_rgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, EHL_SGMII1G, &ehl_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, EHL_SGMII2G5, &ehl_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, EHL_PSE0_RGMII1G, &ehl_pse0_rgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, EHL_PSE0_SGMII1G, &ehl_pse0_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, EHL_PSE0_SGMII2G5, &ehl_pse0_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, EHL_PSE1_RGMII1G, &ehl_pse1_rgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G, &ehl_pse1_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5, &ehl_pse1_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, TGL_SGMII1G, &tgl_sgmii1g_phy0_info) }, + { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0, &tgl_sgmii1g_phy0_info) }, + { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1, &tgl_sgmii1g_phy1_info) }, + { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0, &adls_sgmii1g_phy0_info) }, + { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1, &adls_sgmii1g_phy1_info) }, {} }; MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h index e723096c0b15..542acb8ce467 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h @@ -14,6 +14,7 @@ /* SERDES defines */ #define SERDES_PLL_CLK BIT(0) /* PLL clk valid signal */ +#define SERDES_PHY_RX_CLK BIT(1) /* PSE SGMII PHY rx clk */ #define SERDES_RST BIT(2) /* Serdes Reset */ #define SERDES_PWR_ST_MASK GENMASK(6, 4) /* Serdes Power state*/ #define SERDES_PWR_ST_SHIFT 4 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index bf3250e0e59c..28dd0ed85a82 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -255,7 +255,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) if (val) return val; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); @@ -352,6 +352,8 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) plat_dat->bsp_priv = gmac; plat_dat->fix_mac_speed = ipq806x_gmac_fix_mac_speed; plat_dat->multicast_filter_bins = 0; + plat_dat->tx_fifo_size = 8192; + plat_dat->rx_fifo_size = 8192; err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (err) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c index 3d3f43d91b98..9d77c647badd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -37,7 +37,7 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 9e4b83832938..58c0feaa8131 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -407,7 +407,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c index bbc16b5a410a..16fb66a0ca72 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c @@ -52,7 +52,7 @@ static int meson6_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 848e5c37746b..c7a6588d9398 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -398,7 +398,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c index 8551ea878ba5..adfeb8d3293d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -118,7 +118,7 @@ static int oxnas_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index bfc4a92f1d92..84382fc5cc4d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -461,7 +461,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) { dev_err(&pdev->dev, "dt configuration failed\n"); return PTR_ERR(plat_dat); @@ -477,7 +477,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev) res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rgmii"); ethqos->rgmii_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(ethqos->rgmii_base)) { - dev_err(&pdev->dev, "Can't get rgmii base\n"); ret = PTR_ERR(ethqos->rgmii_base); goto err_mem; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 6ef30252bfe0..8d28a536e1bb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1396,7 +1396,7 @@ static int rk_gmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 70d41783329d..85208128f135 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -398,7 +398,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index e1b63df6f96f..710d7435733e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -325,7 +325,7 @@ static int sti_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 5d4df4c5254e..2b38a499a404 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -371,7 +371,7 @@ static int stm32_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 6b75cf2603ff..4422baeed3d8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -239,6 +239,22 @@ static const struct emac_variant emac_variant_h6 = { #define EMAC_RX_EARLY_INT BIT(13) #define EMAC_RGMII_STA_INT BIT(16) +#define EMAC_INT_MSK_COMMON EMAC_RGMII_STA_INT +#define EMAC_INT_MSK_TX (EMAC_TX_INT | \ + EMAC_TX_DMA_STOP_INT | \ + EMAC_TX_BUF_UA_INT | \ + EMAC_TX_TIMEOUT_INT | \ + EMAC_TX_UNDERFLOW_INT | \ + EMAC_TX_EARLY_INT |\ + EMAC_INT_MSK_COMMON) +#define EMAC_INT_MSK_RX (EMAC_RX_INT | \ + EMAC_RX_BUF_UA_INT | \ + EMAC_RX_DMA_STOP_INT | \ + EMAC_RX_TIMEOUT_INT | \ + EMAC_RX_OVERFLOW_INT | \ + EMAC_RX_EARLY_INT | \ + EMAC_INT_MSK_COMMON) + #define MAC_ADDR_TYPE_DST BIT(31) /* H3 specific bits for EPHY */ @@ -412,13 +428,19 @@ static void sun8i_dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan) } static int sun8i_dwmac_dma_interrupt(void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan) + struct stmmac_extra_stats *x, u32 chan, + u32 dir) { u32 v; int ret = 0; v = readl(ioaddr + EMAC_INT_STA); + if (dir == DMA_DIR_RX) + v &= EMAC_INT_MSK_RX; + else if (dir == DMA_DIR_TX) + v &= EMAC_INT_MSK_TX; + if (v & EMAC_TX_INT) { ret |= handle_tx; x->tx_normal_irq_n++; @@ -1199,7 +1221,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) if (ret) return -EINVAL; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); @@ -1214,6 +1236,8 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) plat_dat->init = sun8i_dwmac_init; plat_dat->exit = sun8i_dwmac_exit; plat_dat->setup = sun8i_dwmac_setup; + plat_dat->tx_fifo_size = 4096; + plat_dat->rx_fifo_size = 16384; ret = sun8i_dwmac_set_syscon(&pdev->dev, plat_dat); if (ret) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index 0e1ca2cba3c7..527077c98ebc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -108,7 +108,7 @@ static int sun7i_gmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index d23be45a64e5..d046e33b8a29 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -208,7 +208,7 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c index 2bac49b49f73..90383abafa66 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c @@ -255,7 +255,7 @@ static void dwmac1000_get_hw_feature(void __iomem *ioaddr, } static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt, - u32 number_chan) + u32 queue) { writel(riwt, ioaddr + DMA_RX_WATCHDOG); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 82df91c130f7..462ca7ed095a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -42,6 +42,7 @@ #define GMAC_HW_FEATURE3 0x00000128 #define GMAC_MDIO_ADDR 0x00000200 #define GMAC_MDIO_DATA 0x00000204 +#define GMAC_GPIO_STATUS 0x0000020C #define GMAC_ARP_ADDR 0x00000210 #define GMAC_ADDR_HIGH(reg) (0x300 + reg * 8) #define GMAC_ADDR_LOW(reg) (0x304 + reg * 8) @@ -49,6 +50,7 @@ #define GMAC_L4_ADDR(reg) (0x904 + (reg) * 0x30) #define GMAC_L3_ADDR0(reg) (0x910 + (reg) * 0x30) #define GMAC_L3_ADDR1(reg) (0x914 + (reg) * 0x30) +#define GMAC_TIMESTAMP_STATUS 0x00000b20 /* RX Queues Routing */ #define GMAC_RXQCTRL_AVCPQ_MASK GENMASK(2, 0) @@ -143,6 +145,7 @@ #define GMAC_INT_PCS_PHYIS BIT(3) #define GMAC_INT_PMT_EN BIT(4) #define GMAC_INT_LPI_EN BIT(5) +#define GMAC_INT_TSIE BIT(12) #define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \ GMAC_INT_PCS_ANE) @@ -259,6 +262,7 @@ enum power_event { #define GMAC_HW_RXFIFOSIZE GENMASK(4, 0) /* MAC HW features2 bitmap */ +#define GMAC_HW_FEAT_AUXSNAPNUM GENMASK(30, 28) #define GMAC_HW_FEAT_PPSOUTNUM GENMASK(26, 24) #define GMAC_HW_FEAT_TXCHCNT GENMASK(21, 18) #define GMAC_HW_FEAT_RXCHCNT GENMASK(15, 12) @@ -278,6 +282,12 @@ enum power_event { #define GMAC_HW_FEAT_DVLAN BIT(5) #define GMAC_HW_FEAT_NRVF GENMASK(2, 0) +/* GMAC GPIO Status reg */ +#define GMAC_GPO0 BIT(16) +#define GMAC_GPO1 BIT(17) +#define GMAC_GPO2 BIT(18) +#define GMAC_GPO3 BIT(19) + /* MAC HW ADDR regs */ #define GMAC_HI_DCS GENMASK(18, 16) #define GMAC_HI_DCS_SHIFT 16 @@ -298,6 +308,11 @@ enum power_event { #define GMAC_L4DP0_SHIFT 16 #define GMAC_L4SP0 GENMASK(15, 0) +/* MAC Timestamp Status */ +#define GMAC_TIMESTAMP_AUXTSTRIG BIT(2) +#define GMAC_TIMESTAMP_ATSNS_MASK GENMASK(29, 25) +#define GMAC_TIMESTAMP_ATSNS_SHIFT 25 + /* MTL registers */ #define MTL_OPERATION_MODE 0x00000c00 #define MTL_FRPE BIT(15) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 29f765a246a0..95864f014ffa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -53,6 +53,10 @@ static void dwmac4_core_init(struct mac_device_info *hw, if (hw->pcs) value |= GMAC_PCS_IRQ_DEFAULT; + /* Enable FPE interrupt */ + if ((GMAC_HW_FEAT_FPESEL & readl(ioaddr + GMAC_HW_FEATURE3)) >> 26) + value |= GMAC_INT_FPE_EN; + writel(value, ioaddr + GMAC_INT_EN); } @@ -1245,6 +1249,8 @@ const struct stmmac_ops dwmac410_ops = { .config_l4_filter = dwmac4_config_l4_filter, .est_configure = dwmac5_est_configure, .fpe_configure = dwmac5_fpe_configure, + .fpe_send_mpacket = dwmac5_fpe_send_mpacket, + .fpe_irq_status = dwmac5_fpe_irq_status, .add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr, .del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr, .restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr, @@ -1294,6 +1300,8 @@ const struct stmmac_ops dwmac510_ops = { .config_l4_filter = dwmac4_config_l4_filter, .est_configure = dwmac5_est_configure, .fpe_configure = dwmac5_fpe_configure, + .fpe_send_mpacket = dwmac5_fpe_send_mpacket, + .fpe_irq_status = dwmac5_fpe_irq_status, .add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr, .del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr, .restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 62aa0e95beb7..a602d16b9e53 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -161,6 +161,19 @@ static void dwmac4_dma_init(void __iomem *ioaddr, value |= DMA_SYS_BUS_EAME; writel(value, ioaddr + DMA_SYS_BUS_MODE); + + value = readl(ioaddr + DMA_BUS_MODE); + + if (dma_cfg->multi_msi_en) { + value &= ~DMA_BUS_MODE_INTM_MASK; + value |= (DMA_BUS_MODE_INTM_MODE1 << DMA_BUS_MODE_INTM_SHIFT); + } + + if (dma_cfg->dche) + value |= DMA_BUS_MODE_DCHE; + + writel(value, ioaddr + DMA_BUS_MODE); + } static void _dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 channel, @@ -210,12 +223,9 @@ static void dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space) _dwmac4_dump_dma_regs(ioaddr, i, reg_space); } -static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 number_chan) +static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 queue) { - u32 chan; - - for (chan = 0; chan < number_chan; chan++) - writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(chan)); + writel(riwt, ioaddr + DMA_CHAN_RX_WATCHDOG(queue)); } static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode, @@ -415,6 +425,8 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr, /* IEEE 1588-2002 */ dma_cap->time_stamp = 0; + /* Number of Auxiliary Snapshot Inputs */ + dma_cap->aux_snapshot_n = (hw_cap & GMAC_HW_FEAT_AUXSNAPNUM) >> 28; /* MAC HW feature3 */ hw_cap = readl(ioaddr + GMAC_HW_FEATURE3); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h index 8391ca63d943..9321879b599c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h @@ -25,6 +25,10 @@ #define DMA_TBS_CTRL 0x00001050 /* DMA Bus Mode bitmap */ +#define DMA_BUS_MODE_DCHE BIT(19) +#define DMA_BUS_MODE_INTM_MASK GENMASK(17, 16) +#define DMA_BUS_MODE_INTM_SHIFT 16 +#define DMA_BUS_MODE_INTM_MODE1 0x1 #define DMA_BUS_MODE_SFT_RESET BIT(0) /* DMA SYS Bus Mode bitmap */ @@ -149,6 +153,25 @@ #define DMA_CHAN_STATUS_TPS BIT(1) #define DMA_CHAN_STATUS_TI BIT(0) +#define DMA_CHAN_STATUS_MSK_COMMON (DMA_CHAN_STATUS_NIS | \ + DMA_CHAN_STATUS_AIS | \ + DMA_CHAN_STATUS_CDE | \ + DMA_CHAN_STATUS_FBE) + +#define DMA_CHAN_STATUS_MSK_RX (DMA_CHAN_STATUS_REB | \ + DMA_CHAN_STATUS_ERI | \ + DMA_CHAN_STATUS_RWT | \ + DMA_CHAN_STATUS_RPS | \ + DMA_CHAN_STATUS_RBU | \ + DMA_CHAN_STATUS_RI | \ + DMA_CHAN_STATUS_MSK_COMMON) + +#define DMA_CHAN_STATUS_MSK_TX (DMA_CHAN_STATUS_ETI | \ + DMA_CHAN_STATUS_TBU | \ + DMA_CHAN_STATUS_TPS | \ + DMA_CHAN_STATUS_TI | \ + DMA_CHAN_STATUS_MSK_COMMON) + /* Interrupt enable bits per channel */ #define DMA_CHAN_INTR_ENA_NIE BIT(16) #define DMA_CHAN_INTR_ENA_AIE BIT(15) @@ -206,7 +229,7 @@ void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan); void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan); void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan); int dwmac4_dma_interrupt(void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan); + struct stmmac_extra_stats *x, u32 chan, u32 dir); void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan); void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan); void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 71e50751ef2d..e63270267578 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -135,12 +135,17 @@ void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx) } int dwmac4_dma_interrupt(void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan) + struct stmmac_extra_stats *x, u32 chan, u32 dir) { u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(chan)); u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(chan)); int ret = 0; + if (dir == DMA_DIR_RX) + intr_status &= DMA_CHAN_STATUS_MSK_RX; + else if (dir == DMA_DIR_TX) + intr_status &= DMA_CHAN_STATUS_MSK_TX; + /* ABNORMAL interrupts */ if (unlikely(intr_status & DMA_CHAN_STATUS_AIS)) { if (unlikely(intr_status & DMA_CHAN_STATUS_RBU)) @@ -161,20 +166,19 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr, } } /* TX/RX NORMAL interrupts */ - if (likely(intr_status & DMA_CHAN_STATUS_NIS)) { + if (likely(intr_status & DMA_CHAN_STATUS_NIS)) x->normal_irq_n++; - if (likely(intr_status & DMA_CHAN_STATUS_RI)) { - x->rx_normal_irq_n++; - ret |= handle_rx; - } - if (likely(intr_status & (DMA_CHAN_STATUS_TI | - DMA_CHAN_STATUS_TBU))) { - x->tx_normal_irq_n++; - ret |= handle_tx; - } - if (unlikely(intr_status & DMA_CHAN_STATUS_ERI)) - x->rx_early_irq++; + if (likely(intr_status & DMA_CHAN_STATUS_RI)) { + x->rx_normal_irq_n++; + ret |= handle_rx; + } + if (likely(intr_status & (DMA_CHAN_STATUS_TI | + DMA_CHAN_STATUS_TBU))) { + x->tx_normal_irq_n++; + ret |= handle_tx; } + if (unlikely(intr_status & DMA_CHAN_STATUS_ERI)) + x->rx_early_irq++; writel(intr_status & intr_en, ioaddr + DMA_CHAN_STATUS(chan)); return ret; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index 8f7ac24545ef..d8c6ff725237 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -192,6 +192,7 @@ int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp) /* 1. Enable Safety Features */ value = readl(ioaddr + MTL_ECC_CONTROL); + value |= MEEAO; /* MTL ECC Error Addr Status Override */ value |= TSOEE; /* TSO ECC */ value |= MRXPEE; /* MTL RX Parser ECC */ value |= MESTEE; /* MTL EST ECC */ @@ -595,9 +596,95 @@ int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, ctrl &= ~EEST; writel(ctrl, ioaddr + MTL_EST_CONTROL); + + /* Configure EST interrupt */ + if (cfg->enable) + ctrl = (IECGCE | IEHS | IEHF | IEBE | IECC); + else + ctrl = 0; + + writel(ctrl, ioaddr + MTL_EST_INT_EN); + return 0; } +void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev, + struct stmmac_extra_stats *x, u32 txqcnt) +{ + u32 status, value, feqn, hbfq, hbfs, btrl; + u32 txqcnt_mask = (1 << txqcnt) - 1; + + status = readl(ioaddr + MTL_EST_STATUS); + + value = (CGCE | HLBS | HLBF | BTRE | SWLC); + + /* Return if there is no error */ + if (!(status & value)) + return; + + if (status & CGCE) { + /* Clear Interrupt */ + writel(CGCE, ioaddr + MTL_EST_STATUS); + + x->mtl_est_cgce++; + } + + if (status & HLBS) { + value = readl(ioaddr + MTL_EST_SCH_ERR); + value &= txqcnt_mask; + + x->mtl_est_hlbs++; + + /* Clear Interrupt */ + writel(value, ioaddr + MTL_EST_SCH_ERR); + + /* Collecting info to shows all the queues that has HLBS + * issue. The only way to clear this is to clear the + * statistic + */ + if (net_ratelimit()) + netdev_err(dev, "EST: HLB(sched) Queue 0x%x\n", value); + } + + if (status & HLBF) { + value = readl(ioaddr + MTL_EST_FRM_SZ_ERR); + feqn = value & txqcnt_mask; + + value = readl(ioaddr + MTL_EST_FRM_SZ_CAP); + hbfq = (value & SZ_CAP_HBFQ_MASK(txqcnt)) >> SZ_CAP_HBFQ_SHIFT; + hbfs = value & SZ_CAP_HBFS_MASK; + + x->mtl_est_hlbf++; + + /* Clear Interrupt */ + writel(feqn, ioaddr + MTL_EST_FRM_SZ_ERR); + + if (net_ratelimit()) + netdev_err(dev, "EST: HLB(size) Queue %u Size %u\n", + hbfq, hbfs); + } + + if (status & BTRE) { + if ((status & BTRL) == BTRL_MAX) + x->mtl_est_btrlm++; + else + x->mtl_est_btre++; + + btrl = (status & BTRL) >> BTRL_SHIFT; + + if (net_ratelimit()) + netdev_info(dev, "EST: BTR Error Loop Count %u\n", + btrl); + + writel(BTRE, ioaddr + MTL_EST_STATUS); + } + + if (status & SWLC) { + writel(SWLC, ioaddr + MTL_EST_STATUS); + netdev_info(dev, "EST: SWOL has been switched\n"); + } +} + void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, bool enable) { @@ -621,3 +708,52 @@ void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, value |= EFPE; writel(value, ioaddr + MAC_FPE_CTRL_STS); } + +int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev) +{ + u32 value; + int status; + + status = FPE_EVENT_UNKNOWN; + + value = readl(ioaddr + MAC_FPE_CTRL_STS); + + if (value & TRSP) { + status |= FPE_EVENT_TRSP; + netdev_info(dev, "FPE: Respond mPacket is transmitted\n"); + } + + if (value & TVER) { + status |= FPE_EVENT_TVER; + netdev_info(dev, "FPE: Verify mPacket is transmitted\n"); + } + + if (value & RRSP) { + status |= FPE_EVENT_RRSP; + netdev_info(dev, "FPE: Respond mPacket is received\n"); + } + + if (value & RVER) { + status |= FPE_EVENT_RVER; + netdev_info(dev, "FPE: Verify mPacket is received\n"); + } + + return status; +} + +void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, enum stmmac_mpacket_type type) +{ + u32 value; + + value = readl(ioaddr + MAC_FPE_CTRL_STS); + + if (type == MPACKET_VERIFY) { + value &= ~SRSP; + value |= SVER; + } else { + value &= ~SVER; + value |= SRSP; + } + + writel(value, ioaddr + MAC_FPE_CTRL_STS); +} diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h index 56b0762c1276..6b2fd37b29ad 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h @@ -12,6 +12,12 @@ #define TMOUTEN BIT(0) #define MAC_FPE_CTRL_STS 0x00000234 +#define TRSP BIT(19) +#define TVER BIT(18) +#define RRSP BIT(17) +#define RVER BIT(16) +#define SRSP BIT(2) +#define SVER BIT(1) #define EFPE BIT(0) #define MAC_PPS_CONTROL 0x00000b70 @@ -38,6 +44,36 @@ #define PTOV_SHIFT 24 #define SSWL BIT(1) #define EEST BIT(0) + +#define MTL_EST_STATUS 0x00000c58 +#define BTRL GENMASK(11, 8) +#define BTRL_SHIFT 8 +#define BTRL_MAX (0xF << BTRL_SHIFT) +#define SWOL BIT(7) +#define SWOL_SHIFT 7 +#define CGCE BIT(4) +#define HLBS BIT(3) +#define HLBF BIT(2) +#define BTRE BIT(1) +#define SWLC BIT(0) + +#define MTL_EST_SCH_ERR 0x00000c60 +#define MTL_EST_FRM_SZ_ERR 0x00000c64 +#define MTL_EST_FRM_SZ_CAP 0x00000c68 +#define SZ_CAP_HBFS_MASK GENMASK(14, 0) +#define SZ_CAP_HBFQ_SHIFT 16 +#define SZ_CAP_HBFQ_MASK(_val) ({ typeof(_val) (val) = (_val); \ + ((val) > 4 ? GENMASK(18, 16) : \ + (val) > 2 ? GENMASK(17, 16) : \ + BIT(16)); }) + +#define MTL_EST_INT_EN 0x00000c70 +#define IECGCE CGCE +#define IEHS HLBS +#define IEHF HLBF +#define IEBE BTRE +#define IECC SWLC + #define MTL_EST_GCL_CONTROL 0x00000c80 #define BTR_LOW 0x0 #define BTR_HIGH 0x1 @@ -62,6 +98,7 @@ #define ADDR GENMASK(15, 0) #define MTL_RXP_IACC_DATA 0x00000cb4 #define MTL_ECC_CONTROL 0x00000cc0 +#define MEEAO BIT(8) #define TSOEE BIT(4) #define MRXPEE BIT(3) #define MESTEE BIT(2) @@ -98,6 +135,8 @@ #define GMAC_RXQCTRL_VFFQ_SHIFT 17 #define GMAC_RXQCTRL_VFFQE BIT(16) +#define GMAC_INT_FPE_EN BIT(17) + int dwmac5_safety_feat_config(void __iomem *ioaddr, unsigned int asp); int dwmac5_safety_feat_irq_status(struct net_device *ndev, void __iomem *ioaddr, unsigned int asp, @@ -111,7 +150,12 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, u32 sub_second_inc, u32 systime_flags); int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, unsigned int ptp_rate); +void dwmac5_est_irq_status(void __iomem *ioaddr, struct net_device *dev, + struct stmmac_extra_stats *x, u32 txqcnt); void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, bool enable); +void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, + enum stmmac_mpacket_type type); +int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev); #endif /* __DWMAC5_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h index e5dbd0bc257e..1914ad698cab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h @@ -128,6 +128,26 @@ #define DMA_STATUS_TI 0x00000001 /* Transmit Interrupt */ #define DMA_CONTROL_FTF 0x00100000 /* Flush transmit FIFO */ +#define DMA_STATUS_MSK_COMMON (DMA_STATUS_NIS | \ + DMA_STATUS_AIS | \ + DMA_STATUS_FBI) + +#define DMA_STATUS_MSK_RX (DMA_STATUS_ERI | \ + DMA_STATUS_RWT | \ + DMA_STATUS_RPS | \ + DMA_STATUS_RU | \ + DMA_STATUS_RI | \ + DMA_STATUS_OVF | \ + DMA_STATUS_MSK_COMMON) + +#define DMA_STATUS_MSK_TX (DMA_STATUS_ETI | \ + DMA_STATUS_UNF | \ + DMA_STATUS_TJT | \ + DMA_STATUS_TU | \ + DMA_STATUS_TPS | \ + DMA_STATUS_TI | \ + DMA_STATUS_MSK_COMMON) + #define NUM_DWMAC100_DMA_REGS 9 #define NUM_DWMAC1000_DMA_REGS 23 @@ -139,7 +159,7 @@ void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan); void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan); void dwmac_dma_stop_rx(void __iomem *ioaddr, u32 chan); int dwmac_dma_interrupt(void __iomem *ioaddr, struct stmmac_extra_stats *x, - u32 chan); + u32 chan, u32 dir); int dwmac_dma_reset(void __iomem *ioaddr); #endif /* __DWMAC_DMA_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index 57a53a600aa5..d1c31200bb91 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -155,7 +155,7 @@ static void show_rx_process_state(unsigned int status) #endif int dwmac_dma_interrupt(void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan) + struct stmmac_extra_stats *x, u32 chan, u32 dir) { int ret = 0; /* read the status register (CSR5) */ @@ -167,6 +167,12 @@ int dwmac_dma_interrupt(void __iomem *ioaddr, show_tx_process_state(intr_status); show_rx_process_state(intr_status); #endif + + if (dir == DMA_DIR_RX) + intr_status &= DMA_STATUS_MSK_RX; + else if (dir == DMA_DIR_TX) + intr_status &= DMA_STATUS_MSK_TX; + /* ABNORMAL interrupts */ if (unlikely(intr_status & DMA_STATUS_AIS)) { if (unlikely(intr_status & DMA_STATUS_UNF)) { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 6c3b8a950f58..1913385df685 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -412,6 +412,12 @@ #define XGMAC_TI BIT(0) #define XGMAC_REGSIZE ((0x0000317c + (0x80 * 15)) / 4) +#define XGMAC_DMA_STATUS_MSK_COMMON (XGMAC_NIS | XGMAC_AIS | XGMAC_FBE) +#define XGMAC_DMA_STATUS_MSK_RX (XGMAC_RBU | XGMAC_RI | \ + XGMAC_DMA_STATUS_MSK_COMMON) +#define XGMAC_DMA_STATUS_MSK_TX (XGMAC_TBU | XGMAC_TPS | XGMAC_TI | \ + XGMAC_DMA_STATUS_MSK_COMMON) + /* Descriptors */ #define XGMAC_TDES0_LTV BIT(31) #define XGMAC_TDES0_LT GENMASK(7, 0) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 77308c5c5d29..906e985441a9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -323,12 +323,18 @@ static void dwxgmac2_dma_stop_rx(void __iomem *ioaddr, u32 chan) } static int dwxgmac2_dma_interrupt(void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan) + struct stmmac_extra_stats *x, u32 chan, + u32 dir) { u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan)); u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); int ret = 0; + if (dir == DMA_DIR_RX) + intr_status &= XGMAC_DMA_STATUS_MSK_RX; + else if (dir == DMA_DIR_TX) + intr_status &= XGMAC_DMA_STATUS_MSK_TX; + /* ABNORMAL interrupts */ if (unlikely(intr_status & XGMAC_AIS)) { if (unlikely(intr_status & XGMAC_RBU)) { @@ -441,12 +447,9 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, dma_cap->frpsel = (hw_cap & XGMAC_HWFEAT_FRPSEL) >> 3; } -static void dwxgmac2_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 nchan) +static void dwxgmac2_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 queue) { - u32 i; - - for (i = 0; i < nchan; i++) - writel(riwt & XGMAC_RWT, ioaddr + XGMAC_DMA_CH_Rx_WATCHDOG(i)); + writel(riwt & XGMAC_RWT, ioaddr + XGMAC_DMA_CH_Rx_WATCHDOG(queue)); } static void dwxgmac2_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 979ac9fca23c..2cc91759b91f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -201,12 +201,12 @@ struct stmmac_dma_ops { void (*start_rx)(void __iomem *ioaddr, u32 chan); void (*stop_rx)(void __iomem *ioaddr, u32 chan); int (*dma_interrupt) (void __iomem *ioaddr, - struct stmmac_extra_stats *x, u32 chan); + struct stmmac_extra_stats *x, u32 chan, u32 dir); /* If supported then get the optional core features */ void (*get_hw_feature)(void __iomem *ioaddr, struct dma_features *dma_cap); /* Program the HW RX Watchdog */ - void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan); + void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 queue); void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan); void (*set_rx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan); void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan); @@ -280,7 +280,6 @@ struct stmmac_dma_ops { struct mac_device_info; struct net_device; struct rgmii_adv; -struct stmmac_safety_stats; struct stmmac_tc_entry; struct stmmac_pps_cfg; struct stmmac_rss; @@ -393,8 +392,13 @@ struct stmmac_ops { void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr); int (*est_configure)(void __iomem *ioaddr, struct stmmac_est *cfg, unsigned int ptp_rate); + void (*est_irq_status)(void __iomem *ioaddr, struct net_device *dev, + struct stmmac_extra_stats *x, u32 txqcnt); void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq, bool enable); + void (*fpe_send_mpacket)(void __iomem *ioaddr, + enum stmmac_mpacket_type type); + int (*fpe_irq_status)(void __iomem *ioaddr, struct net_device *dev); }; #define stmmac_core_init(__priv, __args...) \ @@ -491,8 +495,16 @@ struct stmmac_ops { stmmac_do_void_callback(__priv, mac, set_arp_offload, __args) #define stmmac_est_configure(__priv, __args...) \ stmmac_do_callback(__priv, mac, est_configure, __args) +#define stmmac_est_irq_status(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, est_irq_status, __args) #define stmmac_fpe_configure(__priv, __args...) \ stmmac_do_void_callback(__priv, mac, fpe_configure, __args) +#define stmmac_fpe_send_mpacket(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, fpe_send_mpacket, __args) +#define stmmac_fpe_irq_status(__priv, __args...) \ + stmmac_do_callback(__priv, mac, fpe_irq_status, __args) + +struct stmmac_priv; /* PTP and HW Timer helpers */ struct stmmac_hwtimestamp { @@ -504,6 +516,8 @@ struct stmmac_hwtimestamp { int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec, int add_sub, int gmac4); void (*get_systime) (void __iomem *ioaddr, u64 *systime); + void (*get_ptptime)(void __iomem *ioaddr, u64 *ptp_time); + void (*timestamp_interrupt)(struct stmmac_priv *priv); }; #define stmmac_config_hw_tstamping(__priv, __args...) \ @@ -518,6 +532,10 @@ struct stmmac_hwtimestamp { stmmac_do_callback(__priv, ptp, adjust_systime, __args) #define stmmac_get_systime(__priv, __args...) \ stmmac_do_void_callback(__priv, ptp, get_systime, __args) +#define stmmac_get_ptptime(__priv, __args...) \ + stmmac_do_void_callback(__priv, ptp, get_ptptime, __args) +#define stmmac_timestamp_interrupt(__priv, __args...) \ + stmmac_do_void_callback(__priv, ptp, timestamp_interrupt, __args) /* Helpers to manage the descriptors for chain and ring modes */ struct stmmac_mode_ops { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index e553b9a1f785..b6cd43eda7ac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -26,10 +26,21 @@ struct stmmac_resources { void __iomem *addr; - const char *mac; + u8 mac[ETH_ALEN]; int wol_irq; int lpi_irq; int irq; + int sfty_ce_irq; + int sfty_ue_irq; + int rx_irq[MTL_MAX_RX_QUEUES]; + int tx_irq[MTL_MAX_TX_QUEUES]; +}; + +enum stmmac_txbuf_type { + STMMAC_TXBUF_T_SKB, + STMMAC_TXBUF_T_XDP_TX, + STMMAC_TXBUF_T_XDP_NDO, + STMMAC_TXBUF_T_XSK_TX, }; struct stmmac_tx_info { @@ -38,6 +49,7 @@ struct stmmac_tx_info { unsigned len; bool last_segment; bool is_jumbo; + enum stmmac_txbuf_type buf_type; }; #define STMMAC_TBS_AVAIL BIT(0) @@ -53,8 +65,13 @@ struct stmmac_tx_queue { struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp; struct dma_edesc *dma_entx; struct dma_desc *dma_tx; - struct sk_buff **tx_skbuff; + union { + struct sk_buff **tx_skbuff; + struct xdp_frame **xdpf; + }; struct stmmac_tx_info *tx_skbuff_dma; + struct xsk_buff_pool *xsk_pool; + u32 xsk_frames_done; unsigned int cur_tx; unsigned int dirty_tx; dma_addr_t dma_tx_phy; @@ -63,15 +80,23 @@ struct stmmac_tx_queue { }; struct stmmac_rx_buffer { - struct page *page; + union { + struct { + struct page *page; + dma_addr_t addr; + __u32 page_offset; + }; + struct xdp_buff *xdp; + }; struct page *sec_page; - dma_addr_t addr; dma_addr_t sec_addr; }; struct stmmac_rx_queue { u32 rx_count_frames; u32 queue_index; + struct xdp_rxq_info xdp_rxq; + struct xsk_buff_pool *xsk_pool; struct page_pool *page_pool; struct stmmac_rx_buffer *buf_pool; struct stmmac_priv *priv_data; @@ -79,6 +104,7 @@ struct stmmac_rx_queue { struct dma_desc *dma_rx ____cacheline_aligned_in_smp; unsigned int cur_rx; unsigned int dirty_rx; + unsigned int buf_alloc_num; u32 rx_zeroc_thresh; dma_addr_t dma_rx_phy; u32 rx_tail_addr; @@ -93,6 +119,7 @@ struct stmmac_rx_queue { struct stmmac_channel { struct napi_struct rx_napi ____cacheline_aligned_in_smp; struct napi_struct tx_napi ____cacheline_aligned_in_smp; + struct napi_struct rxtx_napi ____cacheline_aligned_in_smp; struct stmmac_priv *priv_data; spinlock_t lock; u32 index; @@ -147,20 +174,21 @@ struct stmmac_flow_entry { struct stmmac_priv { /* Frequently used values are kept adjacent for cache effect */ - u32 tx_coal_frames; - u32 tx_coal_timer; - u32 rx_coal_frames; + u32 tx_coal_frames[MTL_MAX_TX_QUEUES]; + u32 tx_coal_timer[MTL_MAX_TX_QUEUES]; + u32 rx_coal_frames[MTL_MAX_TX_QUEUES]; int tx_coalesce; int hwts_tx_en; bool tx_path_in_lpi_mode; bool tso; int sph; + int sph_cap; u32 sarc_type; unsigned int dma_buf_sz; unsigned int rx_copybreak; - u32 rx_riwt; + u32 rx_riwt[MTL_MAX_TX_QUEUES]; int hwts_rx_en; void __iomem *ioaddr; @@ -222,9 +250,24 @@ struct stmmac_priv { int use_riwt; int irq_wake; spinlock_t ptp_lock; + /* Protects auxiliary snapshot registers from concurrent access. */ + struct mutex aux_ts_lock; + void __iomem *mmcaddr; void __iomem *ptpaddr; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + int sfty_ce_irq; + int sfty_ue_irq; + int rx_irq[MTL_MAX_RX_QUEUES]; + int tx_irq[MTL_MAX_TX_QUEUES]; + /*irq name */ + char int_name_mac[IFNAMSIZ + 9]; + char int_name_wol[IFNAMSIZ + 9]; + char int_name_lpi[IFNAMSIZ + 9]; + char int_name_sfty_ce[IFNAMSIZ + 10]; + char int_name_sfty_ue[IFNAMSIZ + 10]; + char int_name_rx_irq[MTL_MAX_TX_QUEUES][IFNAMSIZ + 14]; + char int_name_tx_irq[MTL_MAX_TX_QUEUES][IFNAMSIZ + 18]; #ifdef CONFIG_DEBUG_FS struct dentry *dbgfs_dir; @@ -234,6 +277,12 @@ struct stmmac_priv { struct workqueue_struct *wq; struct work_struct service_task; + /* Workqueue for handling FPE hand-shaking */ + unsigned long fpe_task_state; + struct workqueue_struct *fpe_wq; + struct work_struct fpe_task; + char wq_name[IFNAMSIZ + 4]; + /* TC Handling */ unsigned int tc_entries_max; unsigned int tc_off_max; @@ -246,6 +295,10 @@ struct stmmac_priv { /* Receive Side Scaling */ struct stmmac_rss rss; + + /* XDP BPF Program */ + unsigned long *af_xdp_zc_qps; + struct bpf_prog *xdp_prog; }; enum stmmac_state { @@ -262,6 +315,8 @@ void stmmac_set_ethtool_ops(struct net_device *netdev); void stmmac_ptp_register(struct stmmac_priv *priv); void stmmac_ptp_unregister(struct stmmac_priv *priv); +int stmmac_open(struct net_device *dev); +int stmmac_release(struct net_device *dev); int stmmac_resume(struct device *dev); int stmmac_suspend(struct device *dev); int stmmac_dvr_remove(struct device *dev); @@ -272,6 +327,27 @@ void stmmac_disable_eee_mode(struct stmmac_priv *priv); bool stmmac_eee_init(struct stmmac_priv *priv); int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt); int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size); +int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled); +void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable); + +static inline bool stmmac_xdp_is_enabled(struct stmmac_priv *priv) +{ + return !!priv->xdp_prog; +} + +static inline unsigned int stmmac_rx_offset(struct stmmac_priv *priv) +{ + if (stmmac_xdp_is_enabled(priv)) + return XDP_PACKET_HEADROOM; + + return 0; +} + +void stmmac_disable_rx_queue(struct stmmac_priv *priv, u32 queue); +void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue); +void stmmac_disable_tx_queue(struct stmmac_priv *priv, u32 queue); +void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue); +int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags); #if IS_ENABLED(CONFIG_STMMAC_SELFTESTS) void stmmac_selftest_run(struct net_device *dev, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index c5642985ef95..61b11639ee0c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -158,6 +158,12 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = { /* TSO */ STMMAC_STAT(tx_tso_frames), STMMAC_STAT(tx_tso_nfrags), + /* EST */ + STMMAC_STAT(mtl_est_cgce), + STMMAC_STAT(mtl_est_hlbs), + STMMAC_STAT(mtl_est_hlbf), + STMMAC_STAT(mtl_est_btre), + STMMAC_STAT(mtl_est_btrlm), }; #define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats) @@ -756,28 +762,75 @@ static u32 stmmac_riwt2usec(u32 riwt, struct stmmac_priv *priv) return (riwt * 256) / (clk / 1000000); } -static int stmmac_get_coalesce(struct net_device *dev, - struct ethtool_coalesce *ec) +static int __stmmac_get_coalesce(struct net_device *dev, + struct ethtool_coalesce *ec, + int queue) { struct stmmac_priv *priv = netdev_priv(dev); + u32 max_cnt; + u32 rx_cnt; + u32 tx_cnt; - ec->tx_coalesce_usecs = priv->tx_coal_timer; - ec->tx_max_coalesced_frames = priv->tx_coal_frames; + rx_cnt = priv->plat->rx_queues_to_use; + tx_cnt = priv->plat->tx_queues_to_use; + max_cnt = max(rx_cnt, tx_cnt); - if (priv->use_riwt) { - ec->rx_max_coalesced_frames = priv->rx_coal_frames; - ec->rx_coalesce_usecs = stmmac_riwt2usec(priv->rx_riwt, priv); + if (queue < 0) + queue = 0; + else if (queue >= max_cnt) + return -EINVAL; + + if (queue < tx_cnt) { + ec->tx_coalesce_usecs = priv->tx_coal_timer[queue]; + ec->tx_max_coalesced_frames = priv->tx_coal_frames[queue]; + } else { + ec->tx_coalesce_usecs = 0; + ec->tx_max_coalesced_frames = 0; + } + + if (priv->use_riwt && queue < rx_cnt) { + ec->rx_max_coalesced_frames = priv->rx_coal_frames[queue]; + ec->rx_coalesce_usecs = stmmac_riwt2usec(priv->rx_riwt[queue], + priv); + } else { + ec->rx_max_coalesced_frames = 0; + ec->rx_coalesce_usecs = 0; } return 0; } -static int stmmac_set_coalesce(struct net_device *dev, +static int stmmac_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) { + return __stmmac_get_coalesce(dev, ec, -1); +} + +static int stmmac_get_per_queue_coalesce(struct net_device *dev, u32 queue, + struct ethtool_coalesce *ec) +{ + return __stmmac_get_coalesce(dev, ec, queue); +} + +static int __stmmac_set_coalesce(struct net_device *dev, + struct ethtool_coalesce *ec, + int queue) +{ struct stmmac_priv *priv = netdev_priv(dev); - u32 rx_cnt = priv->plat->rx_queues_to_use; + bool all_queues = false; unsigned int rx_riwt; + u32 max_cnt; + u32 rx_cnt; + u32 tx_cnt; + + rx_cnt = priv->plat->rx_queues_to_use; + tx_cnt = priv->plat->tx_queues_to_use; + max_cnt = max(rx_cnt, tx_cnt); + + if (queue < 0) + all_queues = true; + else if (queue >= max_cnt) + return -EINVAL; if (priv->use_riwt && (ec->rx_coalesce_usecs > 0)) { rx_riwt = stmmac_usec2riwt(ec->rx_coalesce_usecs, priv); @@ -785,8 +838,23 @@ static int stmmac_set_coalesce(struct net_device *dev, if ((rx_riwt > MAX_DMA_RIWT) || (rx_riwt < MIN_DMA_RIWT)) return -EINVAL; - priv->rx_riwt = rx_riwt; - stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt); + if (all_queues) { + int i; + + for (i = 0; i < rx_cnt; i++) { + priv->rx_riwt[i] = rx_riwt; + stmmac_rx_watchdog(priv, priv->ioaddr, + rx_riwt, i); + priv->rx_coal_frames[i] = + ec->rx_max_coalesced_frames; + } + } else if (queue < rx_cnt) { + priv->rx_riwt[queue] = rx_riwt; + stmmac_rx_watchdog(priv, priv->ioaddr, + rx_riwt, queue); + priv->rx_coal_frames[queue] = + ec->rx_max_coalesced_frames; + } } if ((ec->tx_coalesce_usecs == 0) && @@ -797,13 +865,37 @@ static int stmmac_set_coalesce(struct net_device *dev, (ec->tx_max_coalesced_frames > STMMAC_TX_MAX_FRAMES)) return -EINVAL; - /* Only copy relevant parameters, ignore all others. */ - priv->tx_coal_frames = ec->tx_max_coalesced_frames; - priv->tx_coal_timer = ec->tx_coalesce_usecs; - priv->rx_coal_frames = ec->rx_max_coalesced_frames; + if (all_queues) { + int i; + + for (i = 0; i < tx_cnt; i++) { + priv->tx_coal_frames[i] = + ec->tx_max_coalesced_frames; + priv->tx_coal_timer[i] = + ec->tx_coalesce_usecs; + } + } else if (queue < tx_cnt) { + priv->tx_coal_frames[queue] = + ec->tx_max_coalesced_frames; + priv->tx_coal_timer[queue] = + ec->tx_coalesce_usecs; + } + return 0; } +static int stmmac_set_coalesce(struct net_device *dev, + struct ethtool_coalesce *ec) +{ + return __stmmac_set_coalesce(dev, ec, -1); +} + +static int stmmac_set_per_queue_coalesce(struct net_device *dev, u32 queue, + struct ethtool_coalesce *ec) +{ + return __stmmac_set_coalesce(dev, ec, queue); +} + static int stmmac_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *rxnfc, u32 *rule_locs) { @@ -1001,6 +1093,8 @@ static const struct ethtool_ops stmmac_ethtool_ops = { .get_ts_info = stmmac_get_ts_info, .get_coalesce = stmmac_get_coalesce, .set_coalesce = stmmac_set_coalesce, + .get_per_queue_coalesce = stmmac_get_per_queue_coalesce, + .set_per_queue_coalesce = stmmac_set_per_queue_coalesce, .get_channels = stmmac_get_channels, .set_channels = stmmac_set_channels, .get_tunable = stmmac_get_tunable, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index d291612eeafb..074e2cdfb0fa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -12,8 +12,11 @@ #include <linux/io.h> #include <linux/iopoll.h> #include <linux/delay.h> +#include <linux/ptp_clock_kernel.h> #include "common.h" #include "stmmac_ptp.h" +#include "dwmac4.h" +#include "stmmac.h" static void config_hw_tstamping(void __iomem *ioaddr, u32 data) { @@ -153,6 +156,51 @@ static void get_systime(void __iomem *ioaddr, u64 *systime) *systime = ns; } +static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) +{ + u64 ns; + + ns = readl(ptpaddr + PTP_ATNR); + ns += readl(ptpaddr + PTP_ATSR) * NSEC_PER_SEC; + + *ptp_time = ns; +} + +static void timestamp_interrupt(struct stmmac_priv *priv) +{ + u32 num_snapshot, ts_status, tsync_int; + struct ptp_clock_event event; + unsigned long flags; + u64 ptp_time; + int i; + + tsync_int = readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE; + + if (!tsync_int) + return; + + /* Read timestamp status to clear interrupt from either external + * timestamp or start/end of PPS. + */ + ts_status = readl(priv->ioaddr + GMAC_TIMESTAMP_STATUS); + + if (!priv->plat->ext_snapshot_en) + return; + + num_snapshot = (ts_status & GMAC_TIMESTAMP_ATSNS_MASK) >> + GMAC_TIMESTAMP_ATSNS_SHIFT; + + for (i = 0; i < num_snapshot; i++) { + spin_lock_irqsave(&priv->ptp_lock, flags); + get_ptptime(priv->ptpaddr, &ptp_time); + spin_unlock_irqrestore(&priv->ptp_lock, flags); + event.type = PTP_CLOCK_EXTTS; + event.index = 0; + event.timestamp = ptp_time; + ptp_clock_event(priv->ptp_clock, &event); + } +} + const struct stmmac_hwtimestamp stmmac_ptp = { .config_hw_tstamping = config_hw_tstamping, .init_systime = init_systime, @@ -160,4 +208,6 @@ const struct stmmac_hwtimestamp stmmac_ptp = { .config_addend = config_addend, .adjust_systime = adjust_systime, .get_systime = get_systime, + .get_ptptime = get_ptptime, + .timestamp_interrupt = timestamp_interrupt, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 208cae344ffa..a9a984c57d78 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -28,6 +28,7 @@ #include <linux/if_vlan.h> #include <linux/dma-mapping.h> #include <linux/slab.h> +#include <linux/pm_runtime.h> #include <linux/prefetch.h> #include <linux/pinctrl/consumer.h> #ifdef CONFIG_DEBUG_FS @@ -37,9 +38,12 @@ #include <linux/net_tstamp.h> #include <linux/phylink.h> #include <linux/udp.h> +#include <linux/bpf_trace.h> #include <net/pkt_cls.h> +#include <net/xdp_sock_drv.h> #include "stmmac_ptp.h" #include "stmmac.h" +#include "stmmac_xdp.h" #include <linux/reset.h> #include <linux/of_mdio.h> #include "dwmac1000.h" @@ -66,6 +70,16 @@ MODULE_PARM_DESC(phyaddr, "Physical device address"); #define STMMAC_TX_THRESH(x) ((x)->dma_tx_size / 4) #define STMMAC_RX_THRESH(x) ((x)->dma_rx_size / 4) +/* Limit to make sure XDP TX and slow path can coexist */ +#define STMMAC_XSK_TX_BUDGET_MAX 256 +#define STMMAC_TX_XSK_AVAIL 16 +#define STMMAC_RX_FILL_BATCH 16 + +#define STMMAC_XDP_PASS 0 +#define STMMAC_XDP_CONSUMED BIT(0) +#define STMMAC_XDP_TX BIT(1) +#define STMMAC_XDP_REDIRECT BIT(2) + static int flow_ctrl = FLOW_AUTO; module_param(flow_ctrl, int, 0644); MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]"); @@ -104,6 +118,13 @@ module_param(chain_mode, int, 0444); MODULE_PARM_DESC(chain_mode, "To use chain instead of ring mode"); static irqreturn_t stmmac_interrupt(int irq, void *dev_id); +/* For MSI interrupts handling */ +static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id); +static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id); +static irqreturn_t stmmac_msi_intr_tx(int irq, void *data); +static irqreturn_t stmmac_msi_intr_rx(int irq, void *data); +static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue); +static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue); #ifdef CONFIG_DEBUG_FS static const struct net_device_ops stmmac_netdev_ops; @@ -113,6 +134,38 @@ static void stmmac_exit_fs(struct net_device *dev); #define STMMAC_COAL_TIMER(x) (ns_to_ktime((x) * NSEC_PER_USEC)) +int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled) +{ + int ret = 0; + + if (enabled) { + ret = clk_prepare_enable(priv->plat->stmmac_clk); + if (ret) + return ret; + ret = clk_prepare_enable(priv->plat->pclk); + if (ret) { + clk_disable_unprepare(priv->plat->stmmac_clk); + return ret; + } + if (priv->plat->clks_config) { + ret = priv->plat->clks_config(priv->plat->bsp_priv, enabled); + if (ret) { + clk_disable_unprepare(priv->plat->stmmac_clk); + clk_disable_unprepare(priv->plat->pclk); + return ret; + } + } + } else { + clk_disable_unprepare(priv->plat->stmmac_clk); + clk_disable_unprepare(priv->plat->pclk); + if (priv->plat->clks_config) + priv->plat->clks_config(priv->plat->bsp_priv, enabled); + } + + return ret; +} +EXPORT_SYMBOL_GPL(stmmac_bus_clks_config); + /** * stmmac_verify_args - verify the driver parameters. * Description: it checks the driver parameters and set a default in case of @@ -134,11 +187,7 @@ static void stmmac_verify_args(void) eee_timer = STMMAC_DEFAULT_LPI_TIMER; } -/** - * stmmac_disable_all_queues - Disable all queues - * @priv: driver private structure - */ -static void stmmac_disable_all_queues(struct stmmac_priv *priv) +static void __stmmac_disable_all_queues(struct stmmac_priv *priv) { u32 rx_queues_cnt = priv->plat->rx_queues_to_use; u32 tx_queues_cnt = priv->plat->tx_queues_to_use; @@ -148,6 +197,12 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv) for (queue = 0; queue < maxq; queue++) { struct stmmac_channel *ch = &priv->channel[queue]; + if (stmmac_xdp_is_enabled(priv) && + test_bit(queue, priv->af_xdp_zc_qps)) { + napi_disable(&ch->rxtx_napi); + continue; + } + if (queue < rx_queues_cnt) napi_disable(&ch->rx_napi); if (queue < tx_queues_cnt) @@ -156,6 +211,28 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv) } /** + * stmmac_disable_all_queues - Disable all queues + * @priv: driver private structure + */ +static void stmmac_disable_all_queues(struct stmmac_priv *priv) +{ + u32 rx_queues_cnt = priv->plat->rx_queues_to_use; + struct stmmac_rx_queue *rx_q; + u32 queue; + + /* synchronize_rcu() needed for pending XDP buffers to drain */ + for (queue = 0; queue < rx_queues_cnt; queue++) { + rx_q = &priv->rx_queue[queue]; + if (rx_q->xsk_pool) { + synchronize_rcu(); + break; + } + } + + __stmmac_disable_all_queues(priv); +} + +/** * stmmac_enable_all_queues - Enable all queues * @priv: driver private structure */ @@ -169,6 +246,12 @@ static void stmmac_enable_all_queues(struct stmmac_priv *priv) for (queue = 0; queue < maxq; queue++) { struct stmmac_channel *ch = &priv->channel[queue]; + if (stmmac_xdp_is_enabled(priv) && + test_bit(queue, priv->af_xdp_zc_qps)) { + napi_enable(&ch->rxtx_napi); + continue; + } + if (queue < rx_queues_cnt) napi_enable(&ch->rx_napi); if (queue < tx_queues_cnt) @@ -433,6 +516,7 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv, { struct skb_shared_hwtstamps shhwtstamp; bool found = false; + s64 adjust = 0; u64 ns = 0; if (!priv->hwts_tx_en) @@ -451,6 +535,13 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv, } if (found) { + /* Correct the clk domain crossing(CDC) error */ + if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) { + adjust += -(2 * (NSEC_PER_SEC / + priv->plat->clk_ptp_rate)); + ns += adjust; + } + memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); shhwtstamp.hwtstamp = ns_to_ktime(ns); @@ -474,6 +565,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, { struct skb_shared_hwtstamps *shhwtstamp = NULL; struct dma_desc *desc = p; + u64 adjust = 0; u64 ns = 0; if (!priv->hwts_rx_en) @@ -485,6 +577,13 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, /* Check if timestamp is available */ if (stmmac_get_rx_timestamp_status(priv, p, np, priv->adv_ts)) { stmmac_get_timestamp(priv, desc, priv->adv_ts, &ns); + + /* Correct the clk domain crossing(CDC) error */ + if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) { + adjust += 2 * (NSEC_PER_SEC / priv->plat->clk_ptp_rate); + ns -= adjust; + } + netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns); shhwtstamp = skb_hwtstamps(skb); memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); @@ -922,6 +1021,21 @@ static void stmmac_mac_an_restart(struct phylink_config *config) /* Not Supported */ } +static void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up) +{ + struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg; + enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state; + enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state; + bool *hs_enable = &fpe_cfg->hs_enable; + + if (is_up && *hs_enable) { + stmmac_fpe_send_mpacket(priv, priv->ioaddr, MPACKET_VERIFY); + } else { + *lo_state = FPE_EVENT_UNKNOWN; + *lp_state = FPE_EVENT_UNKNOWN; + } +} + static void stmmac_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { @@ -932,6 +1046,9 @@ static void stmmac_mac_link_down(struct phylink_config *config, priv->tx_lpi_enabled = false; stmmac_eee_init(priv); stmmac_set_eee_pls(priv, priv->hw, false); + + if (priv->dma_cap.fpesel) + stmmac_fpe_link_state_handle(priv, false); } static void stmmac_mac_link_up(struct phylink_config *config, @@ -1030,6 +1147,9 @@ static void stmmac_mac_link_up(struct phylink_config *config, priv->tx_lpi_enabled = priv->eee_enabled; stmmac_set_eee_pls(priv, priv->hw, true); } + + if (priv->dma_cap.fpesel) + stmmac_fpe_link_state_handle(priv, true); } static const struct phylink_mac_ops stmmac_phylink_mac_ops = { @@ -1117,6 +1237,8 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) priv->phylink_config.dev = &priv->dev->dev; priv->phylink_config.type = PHYLINK_NETDEV; priv->phylink_config.pcs_poll = true; + priv->phylink_config.ovr_an_inband = + priv->plat->mdio_bus_data->xpcs_an_inband; if (!fwnode) fwnode = dev_fwnode(priv->device); @@ -1304,11 +1426,14 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; - buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); - if (!buf->page) - return -ENOMEM; + if (!buf->page) { + buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); + if (!buf->page) + return -ENOMEM; + buf->page_offset = stmmac_rx_offset(priv); + } - if (priv->sph) { + if (priv->sph && !buf->sec_page) { buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); if (!buf->sec_page) return -ENOMEM; @@ -1320,7 +1445,8 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false); } - buf->addr = page_pool_get_dma_addr(buf->page); + buf->addr = page_pool_get_dma_addr(buf->page) + buf->page_offset; + stmmac_set_desc_addr(priv, p, buf->addr); if (priv->dma_buf_sz == BUF_SIZE_16KiB) stmmac_init_desc3(priv, p); @@ -1358,7 +1484,8 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i) { struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; - if (tx_q->tx_skbuff_dma[i].buf) { + if (tx_q->tx_skbuff_dma[i].buf && + tx_q->tx_skbuff_dma[i].buf_type != STMMAC_TXBUF_T_XDP_TX) { if (tx_q->tx_skbuff_dma[i].map_as_page) dma_unmap_page(priv->device, tx_q->tx_skbuff_dma[i].buf, @@ -1371,166 +1498,227 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i) DMA_TO_DEVICE); } - if (tx_q->tx_skbuff[i]) { + if (tx_q->xdpf[i] && + (tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XDP_TX || + tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XDP_NDO)) { + xdp_return_frame(tx_q->xdpf[i]); + tx_q->xdpf[i] = NULL; + } + + if (tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_XSK_TX) + tx_q->xsk_frames_done++; + + if (tx_q->tx_skbuff[i] && + tx_q->tx_skbuff_dma[i].buf_type == STMMAC_TXBUF_T_SKB) { dev_kfree_skb_any(tx_q->tx_skbuff[i]); tx_q->tx_skbuff[i] = NULL; - tx_q->tx_skbuff_dma[i].buf = 0; - tx_q->tx_skbuff_dma[i].map_as_page = false; } + + tx_q->tx_skbuff_dma[i].buf = 0; + tx_q->tx_skbuff_dma[i].map_as_page = false; } /** - * stmmac_reinit_rx_buffers - reinit the RX descriptor buffer. - * @priv: driver private structure - * Description: this function is called to re-allocate a receive buffer, perform - * the DMA mapping and init the descriptor. + * dma_free_rx_skbufs - free RX dma buffers + * @priv: private structure + * @queue: RX queue index */ -static void stmmac_reinit_rx_buffers(struct stmmac_priv *priv) +static void dma_free_rx_skbufs(struct stmmac_priv *priv, u32 queue) { - u32 rx_count = priv->plat->rx_queues_to_use; - u32 queue; int i; - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + for (i = 0; i < priv->dma_rx_size; i++) + stmmac_free_rx_buffer(priv, queue, i); +} - for (i = 0; i < priv->dma_rx_size; i++) { - struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; +static int stmmac_alloc_rx_buffers(struct stmmac_priv *priv, u32 queue, + gfp_t flags) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + int i; - if (buf->page) { - page_pool_recycle_direct(rx_q->page_pool, buf->page); - buf->page = NULL; - } + for (i = 0; i < priv->dma_rx_size; i++) { + struct dma_desc *p; + int ret; - if (priv->sph && buf->sec_page) { - page_pool_recycle_direct(rx_q->page_pool, buf->sec_page); - buf->sec_page = NULL; - } - } + if (priv->extend_desc) + p = &((rx_q->dma_erx + i)->basic); + else + p = rx_q->dma_rx + i; + + ret = stmmac_init_rx_buffers(priv, p, i, flags, + queue); + if (ret) + return ret; + + rx_q->buf_alloc_num++; } - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + return 0; +} - for (i = 0; i < priv->dma_rx_size; i++) { - struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; - struct dma_desc *p; +/** + * dma_free_rx_xskbufs - free RX dma buffers from XSK pool + * @priv: private structure + * @queue: RX queue index + */ +static void dma_free_rx_xskbufs(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + int i; - if (priv->extend_desc) - p = &((rx_q->dma_erx + i)->basic); - else - p = rx_q->dma_rx + i; + for (i = 0; i < priv->dma_rx_size; i++) { + struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; - if (!buf->page) { - buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); - if (!buf->page) - goto err_reinit_rx_buffers; + if (!buf->xdp) + continue; - buf->addr = page_pool_get_dma_addr(buf->page); - } + xsk_buff_free(buf->xdp); + buf->xdp = NULL; + } +} - if (priv->sph && !buf->sec_page) { - buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool); - if (!buf->sec_page) - goto err_reinit_rx_buffers; +static int stmmac_alloc_rx_buffers_zc(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + int i; - buf->sec_addr = page_pool_get_dma_addr(buf->sec_page); - } + for (i = 0; i < priv->dma_rx_size; i++) { + struct stmmac_rx_buffer *buf; + dma_addr_t dma_addr; + struct dma_desc *p; - stmmac_set_desc_addr(priv, p, buf->addr); - if (priv->sph) - stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true); - else - stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false); - if (priv->dma_buf_sz == BUF_SIZE_16KiB) - stmmac_init_desc3(priv, p); - } - } + if (priv->extend_desc) + p = (struct dma_desc *)(rx_q->dma_erx + i); + else + p = rx_q->dma_rx + i; - return; + buf = &rx_q->buf_pool[i]; -err_reinit_rx_buffers: - do { - while (--i >= 0) - stmmac_free_rx_buffer(priv, queue, i); + buf->xdp = xsk_buff_alloc(rx_q->xsk_pool); + if (!buf->xdp) + return -ENOMEM; - if (queue == 0) - break; + dma_addr = xsk_buff_xdp_get_dma(buf->xdp); + stmmac_set_desc_addr(priv, p, dma_addr); + rx_q->buf_alloc_num++; + } - i = priv->dma_rx_size; - } while (queue-- > 0); + return 0; +} + +static struct xsk_buff_pool *stmmac_get_xsk_pool(struct stmmac_priv *priv, u32 queue) +{ + if (!stmmac_xdp_is_enabled(priv) || !test_bit(queue, priv->af_xdp_zc_qps)) + return NULL; + + return xsk_get_pool_from_qid(priv->dev, queue); } /** - * init_dma_rx_desc_rings - init the RX descriptor rings - * @dev: net device structure + * __init_dma_rx_desc_rings - init the RX descriptor ring (per queue) + * @priv: driver private structure + * @queue: RX queue index * @flags: gfp flag. * Description: this function initializes the DMA RX descriptors * and allocates the socket buffers. It supports the chained and ring * modes. */ -static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags) +static int __init_dma_rx_desc_rings(struct stmmac_priv *priv, u32 queue, gfp_t flags) { - struct stmmac_priv *priv = netdev_priv(dev); - u32 rx_count = priv->plat->rx_queues_to_use; - int ret = -ENOMEM; - int queue; - int i; + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + int ret; - /* RX INITIALIZATION */ netif_dbg(priv, probe, priv->dev, - "SKB addresses:\nskb\t\tskb data\tdma data\n"); + "(%s) dma_rx_phy=0x%08x\n", __func__, + (u32)rx_q->dma_rx_phy); - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + stmmac_clear_rx_descriptors(priv, queue); - netif_dbg(priv, probe, priv->dev, - "(%s) dma_rx_phy=0x%08x\n", __func__, - (u32)rx_q->dma_rx_phy); + xdp_rxq_info_unreg_mem_model(&rx_q->xdp_rxq); - stmmac_clear_rx_descriptors(priv, queue); + rx_q->xsk_pool = stmmac_get_xsk_pool(priv, queue); - for (i = 0; i < priv->dma_rx_size; i++) { - struct dma_desc *p; + if (rx_q->xsk_pool) { + WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + NULL)); + netdev_info(priv->dev, + "Register MEM_TYPE_XSK_BUFF_POOL RxQ-%d\n", + rx_q->queue_index); + xsk_pool_set_rxq_info(rx_q->xsk_pool, &rx_q->xdp_rxq); + } else { + WARN_ON(xdp_rxq_info_reg_mem_model(&rx_q->xdp_rxq, + MEM_TYPE_PAGE_POOL, + rx_q->page_pool)); + netdev_info(priv->dev, + "Register MEM_TYPE_PAGE_POOL RxQ-%d\n", + rx_q->queue_index); + } - if (priv->extend_desc) - p = &((rx_q->dma_erx + i)->basic); - else - p = rx_q->dma_rx + i; + if (rx_q->xsk_pool) { + /* RX XDP ZC buffer pool may not be populated, e.g. + * xdpsock TX-only. + */ + stmmac_alloc_rx_buffers_zc(priv, queue); + } else { + ret = stmmac_alloc_rx_buffers(priv, queue, flags); + if (ret < 0) + return -ENOMEM; + } - ret = stmmac_init_rx_buffers(priv, p, i, flags, - queue); - if (ret) - goto err_init_rx_buffers; - } + rx_q->cur_rx = 0; + rx_q->dirty_rx = 0; - rx_q->cur_rx = 0; - rx_q->dirty_rx = (unsigned int)(i - priv->dma_rx_size); - - /* Setup the chained descriptor addresses */ - if (priv->mode == STMMAC_CHAIN_MODE) { - if (priv->extend_desc) - stmmac_mode_init(priv, rx_q->dma_erx, - rx_q->dma_rx_phy, - priv->dma_rx_size, 1); - else - stmmac_mode_init(priv, rx_q->dma_rx, - rx_q->dma_rx_phy, - priv->dma_rx_size, 0); - } + /* Setup the chained descriptor addresses */ + if (priv->mode == STMMAC_CHAIN_MODE) { + if (priv->extend_desc) + stmmac_mode_init(priv, rx_q->dma_erx, + rx_q->dma_rx_phy, + priv->dma_rx_size, 1); + else + stmmac_mode_init(priv, rx_q->dma_rx, + rx_q->dma_rx_phy, + priv->dma_rx_size, 0); + } + + return 0; +} + +static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags) +{ + struct stmmac_priv *priv = netdev_priv(dev); + u32 rx_count = priv->plat->rx_queues_to_use; + u32 queue; + int ret; + + /* RX INITIALIZATION */ + netif_dbg(priv, probe, priv->dev, + "SKB addresses:\nskb\t\tskb data\tdma data\n"); + + for (queue = 0; queue < rx_count; queue++) { + ret = __init_dma_rx_desc_rings(priv, queue, flags); + if (ret) + goto err_init_rx_buffers; } return 0; err_init_rx_buffers: while (queue >= 0) { - while (--i >= 0) - stmmac_free_rx_buffer(priv, queue, i); + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + + if (rx_q->xsk_pool) + dma_free_rx_xskbufs(priv, queue); + else + dma_free_rx_skbufs(priv, queue); + + rx_q->buf_alloc_num = 0; + rx_q->xsk_pool = NULL; if (queue == 0) break; - i = priv->dma_rx_size; queue--; } @@ -1538,63 +1726,75 @@ err_init_rx_buffers: } /** - * init_dma_tx_desc_rings - init the TX descriptor rings - * @dev: net device structure. + * __init_dma_tx_desc_rings - init the TX descriptor ring (per queue) + * @priv: driver private structure + * @queue : TX queue index * Description: this function initializes the DMA TX descriptors * and allocates the socket buffers. It supports the chained and ring * modes. */ -static int init_dma_tx_desc_rings(struct net_device *dev) +static int __init_dma_tx_desc_rings(struct stmmac_priv *priv, u32 queue) { - struct stmmac_priv *priv = netdev_priv(dev); - u32 tx_queue_cnt = priv->plat->tx_queues_to_use; - u32 queue; + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; int i; - for (queue = 0; queue < tx_queue_cnt; queue++) { - struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + netif_dbg(priv, probe, priv->dev, + "(%s) dma_tx_phy=0x%08x\n", __func__, + (u32)tx_q->dma_tx_phy); - netif_dbg(priv, probe, priv->dev, - "(%s) dma_tx_phy=0x%08x\n", __func__, - (u32)tx_q->dma_tx_phy); - - /* Setup the chained descriptor addresses */ - if (priv->mode == STMMAC_CHAIN_MODE) { - if (priv->extend_desc) - stmmac_mode_init(priv, tx_q->dma_etx, - tx_q->dma_tx_phy, - priv->dma_tx_size, 1); - else if (!(tx_q->tbs & STMMAC_TBS_AVAIL)) - stmmac_mode_init(priv, tx_q->dma_tx, - tx_q->dma_tx_phy, - priv->dma_tx_size, 0); - } + /* Setup the chained descriptor addresses */ + if (priv->mode == STMMAC_CHAIN_MODE) { + if (priv->extend_desc) + stmmac_mode_init(priv, tx_q->dma_etx, + tx_q->dma_tx_phy, + priv->dma_tx_size, 1); + else if (!(tx_q->tbs & STMMAC_TBS_AVAIL)) + stmmac_mode_init(priv, tx_q->dma_tx, + tx_q->dma_tx_phy, + priv->dma_tx_size, 0); + } - for (i = 0; i < priv->dma_tx_size; i++) { - struct dma_desc *p; - if (priv->extend_desc) - p = &((tx_q->dma_etx + i)->basic); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - p = &((tx_q->dma_entx + i)->basic); - else - p = tx_q->dma_tx + i; + tx_q->xsk_pool = stmmac_get_xsk_pool(priv, queue); - stmmac_clear_desc(priv, p); + for (i = 0; i < priv->dma_tx_size; i++) { + struct dma_desc *p; - tx_q->tx_skbuff_dma[i].buf = 0; - tx_q->tx_skbuff_dma[i].map_as_page = false; - tx_q->tx_skbuff_dma[i].len = 0; - tx_q->tx_skbuff_dma[i].last_segment = false; - tx_q->tx_skbuff[i] = NULL; - } + if (priv->extend_desc) + p = &((tx_q->dma_etx + i)->basic); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + p = &((tx_q->dma_entx + i)->basic); + else + p = tx_q->dma_tx + i; - tx_q->dirty_tx = 0; - tx_q->cur_tx = 0; - tx_q->mss = 0; + stmmac_clear_desc(priv, p); - netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue)); + tx_q->tx_skbuff_dma[i].buf = 0; + tx_q->tx_skbuff_dma[i].map_as_page = false; + tx_q->tx_skbuff_dma[i].len = 0; + tx_q->tx_skbuff_dma[i].last_segment = false; + tx_q->tx_skbuff[i] = NULL; } + tx_q->dirty_tx = 0; + tx_q->cur_tx = 0; + tx_q->mss = 0; + + netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue)); + + return 0; +} + +static int init_dma_tx_desc_rings(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + u32 tx_queue_cnt; + u32 queue; + + tx_queue_cnt = priv->plat->tx_queues_to_use; + + for (queue = 0; queue < tx_queue_cnt; queue++) + __init_dma_tx_desc_rings(priv, queue); + return 0; } @@ -1626,29 +1826,25 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags) } /** - * dma_free_rx_skbufs - free RX dma buffers - * @priv: private structure - * @queue: RX queue index - */ -static void dma_free_rx_skbufs(struct stmmac_priv *priv, u32 queue) -{ - int i; - - for (i = 0; i < priv->dma_rx_size; i++) - stmmac_free_rx_buffer(priv, queue, i); -} - -/** * dma_free_tx_skbufs - free TX dma buffers * @priv: private structure * @queue: TX queue index */ static void dma_free_tx_skbufs(struct stmmac_priv *priv, u32 queue) { + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; int i; + tx_q->xsk_frames_done = 0; + for (i = 0; i < priv->dma_tx_size; i++) stmmac_free_tx_buffer(priv, queue, i); + + if (tx_q->xsk_pool && tx_q->xsk_frames_done) { + xsk_tx_completed(tx_q->xsk_pool, tx_q->xsk_frames_done); + tx_q->xsk_frames_done = 0; + tx_q->xsk_pool = NULL; + } } /** @@ -1665,137 +1861,186 @@ static void stmmac_free_tx_skbufs(struct stmmac_priv *priv) } /** - * free_dma_rx_desc_resources - free RX dma desc resources + * __free_dma_rx_desc_resources - free RX dma desc resources (per queue) * @priv: private structure + * @queue: RX queue index */ +static void __free_dma_rx_desc_resources(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + + /* Release the DMA RX socket buffers */ + if (rx_q->xsk_pool) + dma_free_rx_xskbufs(priv, queue); + else + dma_free_rx_skbufs(priv, queue); + + rx_q->buf_alloc_num = 0; + rx_q->xsk_pool = NULL; + + /* Free DMA regions of consistent memory previously allocated */ + if (!priv->extend_desc) + dma_free_coherent(priv->device, priv->dma_rx_size * + sizeof(struct dma_desc), + rx_q->dma_rx, rx_q->dma_rx_phy); + else + dma_free_coherent(priv->device, priv->dma_rx_size * + sizeof(struct dma_extended_desc), + rx_q->dma_erx, rx_q->dma_rx_phy); + + if (xdp_rxq_info_is_reg(&rx_q->xdp_rxq)) + xdp_rxq_info_unreg(&rx_q->xdp_rxq); + + kfree(rx_q->buf_pool); + if (rx_q->page_pool) + page_pool_destroy(rx_q->page_pool); +} + static void free_dma_rx_desc_resources(struct stmmac_priv *priv) { u32 rx_count = priv->plat->rx_queues_to_use; u32 queue; /* Free RX queue resources */ - for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - - /* Release the DMA RX socket buffers */ - dma_free_rx_skbufs(priv, queue); - - /* Free DMA regions of consistent memory previously allocated */ - if (!priv->extend_desc) - dma_free_coherent(priv->device, priv->dma_rx_size * - sizeof(struct dma_desc), - rx_q->dma_rx, rx_q->dma_rx_phy); - else - dma_free_coherent(priv->device, priv->dma_rx_size * - sizeof(struct dma_extended_desc), - rx_q->dma_erx, rx_q->dma_rx_phy); - - kfree(rx_q->buf_pool); - if (rx_q->page_pool) - page_pool_destroy(rx_q->page_pool); - } + for (queue = 0; queue < rx_count; queue++) + __free_dma_rx_desc_resources(priv, queue); } /** - * free_dma_tx_desc_resources - free TX dma desc resources + * __free_dma_tx_desc_resources - free TX dma desc resources (per queue) * @priv: private structure + * @queue: TX queue index */ -static void free_dma_tx_desc_resources(struct stmmac_priv *priv) +static void __free_dma_tx_desc_resources(struct stmmac_priv *priv, u32 queue) { - u32 tx_count = priv->plat->tx_queues_to_use; - u32 queue; + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + size_t size; + void *addr; - /* Free TX queue resources */ - for (queue = 0; queue < tx_count; queue++) { - struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; - size_t size; - void *addr; + /* Release the DMA TX socket buffers */ + dma_free_tx_skbufs(priv, queue); + + if (priv->extend_desc) { + size = sizeof(struct dma_extended_desc); + addr = tx_q->dma_etx; + } else if (tx_q->tbs & STMMAC_TBS_AVAIL) { + size = sizeof(struct dma_edesc); + addr = tx_q->dma_entx; + } else { + size = sizeof(struct dma_desc); + addr = tx_q->dma_tx; + } - /* Release the DMA TX socket buffers */ - dma_free_tx_skbufs(priv, queue); + size *= priv->dma_tx_size; - if (priv->extend_desc) { - size = sizeof(struct dma_extended_desc); - addr = tx_q->dma_etx; - } else if (tx_q->tbs & STMMAC_TBS_AVAIL) { - size = sizeof(struct dma_edesc); - addr = tx_q->dma_entx; - } else { - size = sizeof(struct dma_desc); - addr = tx_q->dma_tx; - } + dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy); - size *= priv->dma_tx_size; + kfree(tx_q->tx_skbuff_dma); + kfree(tx_q->tx_skbuff); +} - dma_free_coherent(priv->device, size, addr, tx_q->dma_tx_phy); +static void free_dma_tx_desc_resources(struct stmmac_priv *priv) +{ + u32 tx_count = priv->plat->tx_queues_to_use; + u32 queue; - kfree(tx_q->tx_skbuff_dma); - kfree(tx_q->tx_skbuff); - } + /* Free TX queue resources */ + for (queue = 0; queue < tx_count; queue++) + __free_dma_tx_desc_resources(priv, queue); } /** - * alloc_dma_rx_desc_resources - alloc RX resources. + * __alloc_dma_rx_desc_resources - alloc RX resources (per queue). * @priv: private structure + * @queue: RX queue index * Description: according to which descriptor can be used (extend or basic) * this function allocates the resources for TX and RX paths. In case of * reception, for example, it pre-allocated the RX socket buffer in order to * allow zero-copy mechanism. */ +static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + struct stmmac_channel *ch = &priv->channel[queue]; + bool xdp_prog = stmmac_xdp_is_enabled(priv); + struct page_pool_params pp_params = { 0 }; + unsigned int num_pages; + unsigned int napi_id; + int ret; + + rx_q->queue_index = queue; + rx_q->priv_data = priv; + + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.pool_size = priv->dma_rx_size; + num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE); + pp_params.order = ilog2(num_pages); + pp_params.nid = dev_to_node(priv->device); + pp_params.dev = priv->device; + pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + pp_params.offset = stmmac_rx_offset(priv); + pp_params.max_len = STMMAC_MAX_RX_BUF_SIZE(num_pages); + + rx_q->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rx_q->page_pool)) { + ret = PTR_ERR(rx_q->page_pool); + rx_q->page_pool = NULL; + return ret; + } + + rx_q->buf_pool = kcalloc(priv->dma_rx_size, + sizeof(*rx_q->buf_pool), + GFP_KERNEL); + if (!rx_q->buf_pool) + return -ENOMEM; + + if (priv->extend_desc) { + rx_q->dma_erx = dma_alloc_coherent(priv->device, + priv->dma_rx_size * + sizeof(struct dma_extended_desc), + &rx_q->dma_rx_phy, + GFP_KERNEL); + if (!rx_q->dma_erx) + return -ENOMEM; + + } else { + rx_q->dma_rx = dma_alloc_coherent(priv->device, + priv->dma_rx_size * + sizeof(struct dma_desc), + &rx_q->dma_rx_phy, + GFP_KERNEL); + if (!rx_q->dma_rx) + return -ENOMEM; + } + + if (stmmac_xdp_is_enabled(priv) && + test_bit(queue, priv->af_xdp_zc_qps)) + napi_id = ch->rxtx_napi.napi_id; + else + napi_id = ch->rx_napi.napi_id; + + ret = xdp_rxq_info_reg(&rx_q->xdp_rxq, priv->dev, + rx_q->queue_index, + napi_id); + if (ret) { + netdev_err(priv->dev, "Failed to register xdp rxq info\n"); + return -EINVAL; + } + + return 0; +} + static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv) { u32 rx_count = priv->plat->rx_queues_to_use; - int ret = -ENOMEM; u32 queue; + int ret; /* RX queues buffers and DMA */ for (queue = 0; queue < rx_count; queue++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - struct page_pool_params pp_params = { 0 }; - unsigned int num_pages; - - rx_q->queue_index = queue; - rx_q->priv_data = priv; - - pp_params.flags = PP_FLAG_DMA_MAP; - pp_params.pool_size = priv->dma_rx_size; - num_pages = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE); - pp_params.order = ilog2(num_pages); - pp_params.nid = dev_to_node(priv->device); - pp_params.dev = priv->device; - pp_params.dma_dir = DMA_FROM_DEVICE; - - rx_q->page_pool = page_pool_create(&pp_params); - if (IS_ERR(rx_q->page_pool)) { - ret = PTR_ERR(rx_q->page_pool); - rx_q->page_pool = NULL; - goto err_dma; - } - - rx_q->buf_pool = kcalloc(priv->dma_rx_size, - sizeof(*rx_q->buf_pool), - GFP_KERNEL); - if (!rx_q->buf_pool) + ret = __alloc_dma_rx_desc_resources(priv, queue); + if (ret) goto err_dma; - - if (priv->extend_desc) { - rx_q->dma_erx = dma_alloc_coherent(priv->device, - priv->dma_rx_size * - sizeof(struct dma_extended_desc), - &rx_q->dma_rx_phy, - GFP_KERNEL); - if (!rx_q->dma_erx) - goto err_dma; - - } else { - rx_q->dma_rx = dma_alloc_coherent(priv->device, - priv->dma_rx_size * - sizeof(struct dma_desc), - &rx_q->dma_rx_phy, - GFP_KERNEL); - if (!rx_q->dma_rx) - goto err_dma; - } } return 0; @@ -1807,60 +2052,70 @@ err_dma: } /** - * alloc_dma_tx_desc_resources - alloc TX resources. + * __alloc_dma_tx_desc_resources - alloc TX resources (per queue). * @priv: private structure + * @queue: TX queue index * Description: according to which descriptor can be used (extend or basic) * this function allocates the resources for TX and RX paths. In case of * reception, for example, it pre-allocated the RX socket buffer in order to * allow zero-copy mechanism. */ -static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv) +static int __alloc_dma_tx_desc_resources(struct stmmac_priv *priv, u32 queue) { - u32 tx_count = priv->plat->tx_queues_to_use; - int ret = -ENOMEM; - u32 queue; + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + size_t size; + void *addr; - /* TX queues buffers and DMA */ - for (queue = 0; queue < tx_count; queue++) { - struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; - size_t size; - void *addr; + tx_q->queue_index = queue; + tx_q->priv_data = priv; - tx_q->queue_index = queue; - tx_q->priv_data = priv; + tx_q->tx_skbuff_dma = kcalloc(priv->dma_tx_size, + sizeof(*tx_q->tx_skbuff_dma), + GFP_KERNEL); + if (!tx_q->tx_skbuff_dma) + return -ENOMEM; - tx_q->tx_skbuff_dma = kcalloc(priv->dma_tx_size, - sizeof(*tx_q->tx_skbuff_dma), - GFP_KERNEL); - if (!tx_q->tx_skbuff_dma) - goto err_dma; + tx_q->tx_skbuff = kcalloc(priv->dma_tx_size, + sizeof(struct sk_buff *), + GFP_KERNEL); + if (!tx_q->tx_skbuff) + return -ENOMEM; - tx_q->tx_skbuff = kcalloc(priv->dma_tx_size, - sizeof(struct sk_buff *), - GFP_KERNEL); - if (!tx_q->tx_skbuff) - goto err_dma; + if (priv->extend_desc) + size = sizeof(struct dma_extended_desc); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + size = sizeof(struct dma_edesc); + else + size = sizeof(struct dma_desc); - if (priv->extend_desc) - size = sizeof(struct dma_extended_desc); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - size = sizeof(struct dma_edesc); - else - size = sizeof(struct dma_desc); + size *= priv->dma_tx_size; + + addr = dma_alloc_coherent(priv->device, size, + &tx_q->dma_tx_phy, GFP_KERNEL); + if (!addr) + return -ENOMEM; - size *= priv->dma_tx_size; + if (priv->extend_desc) + tx_q->dma_etx = addr; + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + tx_q->dma_entx = addr; + else + tx_q->dma_tx = addr; - addr = dma_alloc_coherent(priv->device, size, - &tx_q->dma_tx_phy, GFP_KERNEL); - if (!addr) - goto err_dma; + return 0; +} - if (priv->extend_desc) - tx_q->dma_etx = addr; - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - tx_q->dma_entx = addr; - else - tx_q->dma_tx = addr; +static int alloc_dma_tx_desc_resources(struct stmmac_priv *priv) +{ + u32 tx_count = priv->plat->tx_queues_to_use; + u32 queue; + int ret; + + /* TX queues buffers and DMA */ + for (queue = 0; queue < tx_count; queue++) { + ret = __alloc_dma_tx_desc_resources(priv, queue); + if (ret) + goto err_dma; } return 0; @@ -1897,11 +2152,13 @@ static int alloc_dma_desc_resources(struct stmmac_priv *priv) */ static void free_dma_desc_resources(struct stmmac_priv *priv) { - /* Release the DMA RX socket buffers */ - free_dma_rx_desc_resources(priv); - /* Release the DMA TX socket buffers */ free_dma_tx_desc_resources(priv); + + /* Release the DMA RX socket buffers later + * to ensure all pending XDP_TX buffers are returned. + */ + free_dma_rx_desc_resources(priv); } /** @@ -2058,12 +2315,24 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) /* configure all channels */ for (chan = 0; chan < rx_channels_count; chan++) { + struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan]; + u32 buf_size; + qmode = priv->plat->rx_queues_cfg[chan].mode_to_use; stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, rxfifosz, qmode); - stmmac_set_dma_bfsize(priv, priv->ioaddr, priv->dma_buf_sz, - chan); + + if (rx_q->xsk_pool) { + buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool); + stmmac_set_dma_bfsize(priv, priv->ioaddr, + buf_size, + chan); + } else { + stmmac_set_dma_bfsize(priv, priv->ioaddr, + priv->dma_buf_sz, + chan); + } } for (chan = 0; chan < tx_channels_count; chan++) { @@ -2074,6 +2343,101 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) } } +static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) +{ + struct netdev_queue *nq = netdev_get_tx_queue(priv->dev, queue); + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + struct xsk_buff_pool *pool = tx_q->xsk_pool; + unsigned int entry = tx_q->cur_tx; + struct dma_desc *tx_desc = NULL; + struct xdp_desc xdp_desc; + bool work_done = true; + + /* Avoids TX time-out as we are sharing with slow path */ + nq->trans_start = jiffies; + + budget = min(budget, stmmac_tx_avail(priv, queue)); + + while (budget-- > 0) { + dma_addr_t dma_addr; + bool set_ic; + + /* We are sharing with slow path and stop XSK TX desc submission when + * available TX ring is less than threshold. + */ + if (unlikely(stmmac_tx_avail(priv, queue) < STMMAC_TX_XSK_AVAIL) || + !netif_carrier_ok(priv->dev)) { + work_done = false; + break; + } + + if (!xsk_tx_peek_desc(pool, &xdp_desc)) + break; + + if (likely(priv->extend_desc)) + tx_desc = (struct dma_desc *)(tx_q->dma_etx + entry); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + tx_desc = &tx_q->dma_entx[entry].basic; + else + tx_desc = tx_q->dma_tx + entry; + + dma_addr = xsk_buff_raw_get_dma(pool, xdp_desc.addr); + xsk_buff_raw_dma_sync_for_device(pool, dma_addr, xdp_desc.len); + + tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XSK_TX; + + /* To return XDP buffer to XSK pool, we simple call + * xsk_tx_completed(), so we don't need to fill up + * 'buf' and 'xdpf'. + */ + tx_q->tx_skbuff_dma[entry].buf = 0; + tx_q->xdpf[entry] = NULL; + + tx_q->tx_skbuff_dma[entry].map_as_page = false; + tx_q->tx_skbuff_dma[entry].len = xdp_desc.len; + tx_q->tx_skbuff_dma[entry].last_segment = true; + tx_q->tx_skbuff_dma[entry].is_jumbo = false; + + stmmac_set_desc_addr(priv, tx_desc, dma_addr); + + tx_q->tx_count_frames++; + + if (!priv->tx_coal_frames[queue]) + set_ic = false; + else if (tx_q->tx_count_frames % priv->tx_coal_frames[queue] == 0) + set_ic = true; + else + set_ic = false; + + if (set_ic) { + tx_q->tx_count_frames = 0; + stmmac_set_tx_ic(priv, tx_desc); + priv->xstats.tx_set_ic_bit++; + } + + stmmac_prepare_tx_desc(priv, tx_desc, 1, xdp_desc.len, + true, priv->mode, true, true, + xdp_desc.len); + + stmmac_enable_dma_transmission(priv, priv->ioaddr); + + tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, priv->dma_tx_size); + entry = tx_q->cur_tx; + } + + if (tx_desc) { + stmmac_flush_tx_descriptors(priv, queue); + xsk_tx_release(pool); + } + + /* Return true if all of the 3 conditions are met + * a) TX Budget is still available + * b) work_done = true when XSK TX desc peek is empty (no more + * pending XSK TX for transmission) + */ + return !!budget && work_done; +} + /** * stmmac_tx_clean - to manage the transmission completion * @priv: driver private structure @@ -2085,18 +2449,35 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) { struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; unsigned int bytes_compl = 0, pkts_compl = 0; - unsigned int entry, count = 0; + unsigned int entry, xmits = 0, count = 0; __netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue)); priv->xstats.tx_clean++; + tx_q->xsk_frames_done = 0; + entry = tx_q->dirty_tx; - while ((entry != tx_q->cur_tx) && (count < budget)) { - struct sk_buff *skb = tx_q->tx_skbuff[entry]; + + /* Try to clean all TX complete frame in 1 shot */ + while ((entry != tx_q->cur_tx) && count < priv->dma_tx_size) { + struct xdp_frame *xdpf; + struct sk_buff *skb; struct dma_desc *p; int status; + if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_TX || + tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_NDO) { + xdpf = tx_q->xdpf[entry]; + skb = NULL; + } else if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_SKB) { + xdpf = NULL; + skb = tx_q->tx_skbuff[entry]; + } else { + xdpf = NULL; + skb = NULL; + } + if (priv->extend_desc) p = (struct dma_desc *)(tx_q->dma_etx + entry); else if (tx_q->tbs & STMMAC_TBS_AVAIL) @@ -2126,10 +2507,12 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) priv->dev->stats.tx_packets++; priv->xstats.tx_pkt_n++; } - stmmac_get_tx_hwtstamp(priv, p, skb); + if (skb) + stmmac_get_tx_hwtstamp(priv, p, skb); } - if (likely(tx_q->tx_skbuff_dma[entry].buf)) { + if (likely(tx_q->tx_skbuff_dma[entry].buf && + tx_q->tx_skbuff_dma[entry].buf_type != STMMAC_TXBUF_T_XDP_TX)) { if (tx_q->tx_skbuff_dma[entry].map_as_page) dma_unmap_page(priv->device, tx_q->tx_skbuff_dma[entry].buf, @@ -2150,11 +2533,28 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) tx_q->tx_skbuff_dma[entry].last_segment = false; tx_q->tx_skbuff_dma[entry].is_jumbo = false; - if (likely(skb != NULL)) { - pkts_compl++; - bytes_compl += skb->len; - dev_consume_skb_any(skb); - tx_q->tx_skbuff[entry] = NULL; + if (xdpf && + tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_TX) { + xdp_return_frame_rx_napi(xdpf); + tx_q->xdpf[entry] = NULL; + } + + if (xdpf && + tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XDP_NDO) { + xdp_return_frame(xdpf); + tx_q->xdpf[entry] = NULL; + } + + if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_XSK_TX) + tx_q->xsk_frames_done++; + + if (tx_q->tx_skbuff_dma[entry].buf_type == STMMAC_TXBUF_T_SKB) { + if (likely(skb)) { + pkts_compl++; + bytes_compl += skb->len; + dev_consume_skb_any(skb); + tx_q->tx_skbuff[entry] = NULL; + } } stmmac_release_tx_desc(priv, p, priv->mode); @@ -2175,6 +2575,28 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) netif_tx_wake_queue(netdev_get_tx_queue(priv->dev, queue)); } + if (tx_q->xsk_pool) { + bool work_done; + + if (tx_q->xsk_frames_done) + xsk_tx_completed(tx_q->xsk_pool, tx_q->xsk_frames_done); + + if (xsk_uses_need_wakeup(tx_q->xsk_pool)) + xsk_set_tx_need_wakeup(tx_q->xsk_pool); + + /* For XSK TX, we try to send as many as possible. + * If XSK work done (XSK TX desc empty and budget still + * available), return "budget - 1" to reenable TX IRQ. + * Else, return "budget" to make NAPI continue polling. + */ + work_done = stmmac_xdp_xmit_zc(priv, queue, + STMMAC_XSK_TX_BUDGET_MAX); + if (work_done) + xmits = budget - 1; + else + xmits = budget; + } + if (priv->eee_enabled && !priv->tx_path_in_lpi_mode && priv->eee_sw_timer_en) { stmmac_enable_eee_mode(priv); @@ -2183,12 +2605,14 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) /* We still have pending packets, let's call for a new scheduling */ if (tx_q->dirty_tx != tx_q->cur_tx) - hrtimer_start(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer), + hrtimer_start(&tx_q->txtimer, + STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]), HRTIMER_MODE_REL); __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue)); - return count; + /* Combine decisions from TX clean and XSK TX */ + return max(count, xmits); } /** @@ -2266,28 +2690,35 @@ static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv) return false; } -static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) +static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan, u32 dir) { int status = stmmac_dma_interrupt_status(priv, priv->ioaddr, - &priv->xstats, chan); + &priv->xstats, chan, dir); + struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan]; + struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; struct stmmac_channel *ch = &priv->channel[chan]; + struct napi_struct *rx_napi; + struct napi_struct *tx_napi; unsigned long flags; + rx_napi = rx_q->xsk_pool ? &ch->rxtx_napi : &ch->rx_napi; + tx_napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi; + if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) { - if (napi_schedule_prep(&ch->rx_napi)) { + if (napi_schedule_prep(rx_napi)) { spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule(&ch->rx_napi); + __napi_schedule(rx_napi); } } if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) { - if (napi_schedule_prep(&ch->tx_napi)) { + if (napi_schedule_prep(tx_napi)) { spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule(&ch->tx_napi); + __napi_schedule(tx_napi); } } @@ -2315,7 +2746,8 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv) channels_to_check = ARRAY_SIZE(status); for (chan = 0; chan < channels_to_check; chan++) - status[chan] = stmmac_napi_check(priv, chan); + status[chan] = stmmac_napi_check(priv, chan, + DMA_DIR_RXTX); for (chan = 0; chan < tx_channel_count; chan++) { if (unlikely(status[chan] & tx_hard_error_bump_tc)) { @@ -2443,7 +2875,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv) rx_q->dma_rx_phy, chan); rx_q->rx_tail_addr = rx_q->dma_rx_phy + - (priv->dma_rx_size * + (rx_q->buf_alloc_num * sizeof(struct dma_desc)); stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, chan); @@ -2468,7 +2900,8 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue) { struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; - hrtimer_start(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer), + hrtimer_start(&tx_q->txtimer, + STMMAC_COAL_TIMER(priv->tx_coal_timer[queue]), HRTIMER_MODE_REL); } @@ -2483,16 +2916,18 @@ static enum hrtimer_restart stmmac_tx_timer(struct hrtimer *t) struct stmmac_tx_queue *tx_q = container_of(t, struct stmmac_tx_queue, txtimer); struct stmmac_priv *priv = tx_q->priv_data; struct stmmac_channel *ch; + struct napi_struct *napi; ch = &priv->channel[tx_q->queue_index]; + napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi; - if (likely(napi_schedule_prep(&ch->tx_napi))) { + if (likely(napi_schedule_prep(napi))) { unsigned long flags; spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, ch->index, 0, 1); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule(&ch->tx_napi); + __napi_schedule(napi); } return HRTIMER_NORESTART; @@ -2509,18 +2944,21 @@ static enum hrtimer_restart stmmac_tx_timer(struct hrtimer *t) static void stmmac_init_coalesce(struct stmmac_priv *priv) { u32 tx_channel_count = priv->plat->tx_queues_to_use; + u32 rx_channel_count = priv->plat->rx_queues_to_use; u32 chan; - priv->tx_coal_frames = STMMAC_TX_FRAMES; - priv->tx_coal_timer = STMMAC_COAL_TX_TIMER; - priv->rx_coal_frames = STMMAC_RX_FRAMES; - for (chan = 0; chan < tx_channel_count; chan++) { struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; + priv->tx_coal_frames[chan] = STMMAC_TX_FRAMES; + priv->tx_coal_timer[chan] = STMMAC_COAL_TX_TIMER; + hrtimer_init(&tx_q->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); tx_q->txtimer.function = stmmac_tx_timer; } + + for (chan = 0; chan < rx_channel_count; chan++) + priv->rx_coal_frames[chan] = STMMAC_RX_FRAMES; } static void stmmac_set_rings_length(struct stmmac_priv *priv) @@ -2737,6 +3175,26 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv) } } +static int stmmac_fpe_start_wq(struct stmmac_priv *priv) +{ + char *name; + + clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state); + + name = priv->wq_name; + sprintf(name, "%s-fpe", priv->dev->name); + + priv->fpe_wq = create_singlethread_workqueue(name); + if (!priv->fpe_wq) { + netdev_err(priv->dev, "%s: Failed to create workqueue\n", name); + + return -ENOMEM; + } + netdev_info(priv->dev, "FPE workqueue start"); + + return 0; +} + /** * stmmac_hw_setup - setup mac in a usable state. * @dev : pointer to the device structure. @@ -2755,6 +3213,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) struct stmmac_priv *priv = netdev_priv(dev); u32 rx_cnt = priv->plat->rx_queues_to_use; u32 tx_cnt = priv->plat->tx_queues_to_use; + bool sph_en; u32 chan; int ret; @@ -2825,10 +3284,15 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) priv->tx_lpi_timer = eee_timer * 1000; if (priv->use_riwt) { - if (!priv->rx_riwt) - priv->rx_riwt = DEF_DMA_RIWT; + u32 queue; + + for (queue = 0; queue < rx_cnt; queue++) { + if (!priv->rx_riwt[queue]) + priv->rx_riwt[queue] = DEF_DMA_RIWT; - ret = stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt); + stmmac_rx_watchdog(priv, priv->ioaddr, + priv->rx_riwt[queue], queue); + } } if (priv->hw->pcs) @@ -2839,15 +3303,22 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) /* Enable TSO */ if (priv->tso) { - for (chan = 0; chan < tx_cnt; chan++) + for (chan = 0; chan < tx_cnt; chan++) { + struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; + + /* TSO and TBS cannot co-exist */ + if (tx_q->tbs & STMMAC_TBS_AVAIL) + continue; + stmmac_enable_tso(priv, priv->ioaddr, 1, chan); + } } /* Enable Split Header */ - if (priv->sph && priv->hw->rx_csum) { - for (chan = 0; chan < rx_cnt; chan++) - stmmac_enable_sph(priv, priv->ioaddr, 1, chan); - } + sph_en = (priv->hw->rx_csum > 0) && priv->sph; + for (chan = 0; chan < rx_cnt; chan++) + stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan); + /* VLAN Tag Insertion */ if (priv->dma_cap.vlins) @@ -2868,6 +3339,13 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) /* Start the ball rolling... */ stmmac_start_all_dma(priv); + if (priv->dma_cap.fpesel) { + stmmac_fpe_start_wq(priv); + + if (priv->plat->fpe_cfg->enable) + stmmac_fpe_handshake(priv, true); + } + return 0; } @@ -2878,6 +3356,271 @@ static void stmmac_hw_teardown(struct net_device *dev) clk_disable_unprepare(priv->plat->clk_ptp_ref); } +static void stmmac_free_irq(struct net_device *dev, + enum request_irq_err irq_err, int irq_idx) +{ + struct stmmac_priv *priv = netdev_priv(dev); + int j; + + switch (irq_err) { + case REQ_IRQ_ERR_ALL: + irq_idx = priv->plat->tx_queues_to_use; + fallthrough; + case REQ_IRQ_ERR_TX: + for (j = irq_idx - 1; j >= 0; j--) { + if (priv->tx_irq[j] > 0) { + irq_set_affinity_hint(priv->tx_irq[j], NULL); + free_irq(priv->tx_irq[j], &priv->tx_queue[j]); + } + } + irq_idx = priv->plat->rx_queues_to_use; + fallthrough; + case REQ_IRQ_ERR_RX: + for (j = irq_idx - 1; j >= 0; j--) { + if (priv->rx_irq[j] > 0) { + irq_set_affinity_hint(priv->rx_irq[j], NULL); + free_irq(priv->rx_irq[j], &priv->rx_queue[j]); + } + } + + if (priv->sfty_ue_irq > 0 && priv->sfty_ue_irq != dev->irq) + free_irq(priv->sfty_ue_irq, dev); + fallthrough; + case REQ_IRQ_ERR_SFTY_UE: + if (priv->sfty_ce_irq > 0 && priv->sfty_ce_irq != dev->irq) + free_irq(priv->sfty_ce_irq, dev); + fallthrough; + case REQ_IRQ_ERR_SFTY_CE: + if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) + free_irq(priv->lpi_irq, dev); + fallthrough; + case REQ_IRQ_ERR_LPI: + if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) + free_irq(priv->wol_irq, dev); + fallthrough; + case REQ_IRQ_ERR_WOL: + free_irq(dev->irq, dev); + fallthrough; + case REQ_IRQ_ERR_MAC: + case REQ_IRQ_ERR_NO: + /* If MAC IRQ request error, no more IRQ to free */ + break; + } +} + +static int stmmac_request_irq_multi_msi(struct net_device *dev) +{ + enum request_irq_err irq_err = REQ_IRQ_ERR_NO; + struct stmmac_priv *priv = netdev_priv(dev); + cpumask_t cpu_mask; + int irq_idx = 0; + char *int_name; + int ret; + int i; + + /* For common interrupt */ + int_name = priv->int_name_mac; + sprintf(int_name, "%s:%s", dev->name, "mac"); + ret = request_irq(dev->irq, stmmac_mac_interrupt, + 0, int_name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc mac MSI %d (error: %d)\n", + __func__, dev->irq, ret); + irq_err = REQ_IRQ_ERR_MAC; + goto irq_error; + } + + /* Request the Wake IRQ in case of another line + * is used for WoL + */ + if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) { + int_name = priv->int_name_wol; + sprintf(int_name, "%s:%s", dev->name, "wol"); + ret = request_irq(priv->wol_irq, + stmmac_mac_interrupt, + 0, int_name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc wol MSI %d (error: %d)\n", + __func__, priv->wol_irq, ret); + irq_err = REQ_IRQ_ERR_WOL; + goto irq_error; + } + } + + /* Request the LPI IRQ in case of another line + * is used for LPI + */ + if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) { + int_name = priv->int_name_lpi; + sprintf(int_name, "%s:%s", dev->name, "lpi"); + ret = request_irq(priv->lpi_irq, + stmmac_mac_interrupt, + 0, int_name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc lpi MSI %d (error: %d)\n", + __func__, priv->lpi_irq, ret); + irq_err = REQ_IRQ_ERR_LPI; + goto irq_error; + } + } + + /* Request the Safety Feature Correctible Error line in + * case of another line is used + */ + if (priv->sfty_ce_irq > 0 && priv->sfty_ce_irq != dev->irq) { + int_name = priv->int_name_sfty_ce; + sprintf(int_name, "%s:%s", dev->name, "safety-ce"); + ret = request_irq(priv->sfty_ce_irq, + stmmac_safety_interrupt, + 0, int_name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc sfty ce MSI %d (error: %d)\n", + __func__, priv->sfty_ce_irq, ret); + irq_err = REQ_IRQ_ERR_SFTY_CE; + goto irq_error; + } + } + + /* Request the Safety Feature Uncorrectible Error line in + * case of another line is used + */ + if (priv->sfty_ue_irq > 0 && priv->sfty_ue_irq != dev->irq) { + int_name = priv->int_name_sfty_ue; + sprintf(int_name, "%s:%s", dev->name, "safety-ue"); + ret = request_irq(priv->sfty_ue_irq, + stmmac_safety_interrupt, + 0, int_name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc sfty ue MSI %d (error: %d)\n", + __func__, priv->sfty_ue_irq, ret); + irq_err = REQ_IRQ_ERR_SFTY_UE; + goto irq_error; + } + } + + /* Request Rx MSI irq */ + for (i = 0; i < priv->plat->rx_queues_to_use; i++) { + if (priv->rx_irq[i] == 0) + continue; + + int_name = priv->int_name_rx_irq[i]; + sprintf(int_name, "%s:%s-%d", dev->name, "rx", i); + ret = request_irq(priv->rx_irq[i], + stmmac_msi_intr_rx, + 0, int_name, &priv->rx_queue[i]); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc rx-%d MSI %d (error: %d)\n", + __func__, i, priv->rx_irq[i], ret); + irq_err = REQ_IRQ_ERR_RX; + irq_idx = i; + goto irq_error; + } + cpumask_clear(&cpu_mask); + cpumask_set_cpu(i % num_online_cpus(), &cpu_mask); + irq_set_affinity_hint(priv->rx_irq[i], &cpu_mask); + } + + /* Request Tx MSI irq */ + for (i = 0; i < priv->plat->tx_queues_to_use; i++) { + if (priv->tx_irq[i] == 0) + continue; + + int_name = priv->int_name_tx_irq[i]; + sprintf(int_name, "%s:%s-%d", dev->name, "tx", i); + ret = request_irq(priv->tx_irq[i], + stmmac_msi_intr_tx, + 0, int_name, &priv->tx_queue[i]); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: alloc tx-%d MSI %d (error: %d)\n", + __func__, i, priv->tx_irq[i], ret); + irq_err = REQ_IRQ_ERR_TX; + irq_idx = i; + goto irq_error; + } + cpumask_clear(&cpu_mask); + cpumask_set_cpu(i % num_online_cpus(), &cpu_mask); + irq_set_affinity_hint(priv->tx_irq[i], &cpu_mask); + } + + return 0; + +irq_error: + stmmac_free_irq(dev, irq_err, irq_idx); + return ret; +} + +static int stmmac_request_irq_single(struct net_device *dev) +{ + enum request_irq_err irq_err = REQ_IRQ_ERR_NO; + struct stmmac_priv *priv = netdev_priv(dev); + int ret; + + ret = request_irq(dev->irq, stmmac_interrupt, + IRQF_SHARED, dev->name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: ERROR: allocating the IRQ %d (error: %d)\n", + __func__, dev->irq, ret); + irq_err = REQ_IRQ_ERR_MAC; + return ret; + } + + /* Request the Wake IRQ in case of another line + * is used for WoL + */ + if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) { + ret = request_irq(priv->wol_irq, stmmac_interrupt, + IRQF_SHARED, dev->name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: ERROR: allocating the WoL IRQ %d (%d)\n", + __func__, priv->wol_irq, ret); + irq_err = REQ_IRQ_ERR_WOL; + return ret; + } + } + + /* Request the IRQ lines */ + if (priv->lpi_irq > 0 && priv->lpi_irq != dev->irq) { + ret = request_irq(priv->lpi_irq, stmmac_interrupt, + IRQF_SHARED, dev->name, dev); + if (unlikely(ret < 0)) { + netdev_err(priv->dev, + "%s: ERROR: allocating the LPI IRQ %d (%d)\n", + __func__, priv->lpi_irq, ret); + irq_err = REQ_IRQ_ERR_LPI; + goto irq_error; + } + } + + return 0; + +irq_error: + stmmac_free_irq(dev, irq_err, 0); + return ret; +} + +static int stmmac_request_irq(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + int ret; + + /* Request the IRQ lines */ + if (priv->plat->multi_msi_en) + ret = stmmac_request_irq_multi_msi(dev); + else + ret = stmmac_request_irq_single(dev); + + return ret; +} + /** * stmmac_open - open entry point of the driver * @dev : pointer to the device structure. @@ -2887,22 +3630,28 @@ static void stmmac_hw_teardown(struct net_device *dev) * 0 on success and an appropriate (-)ve integer as defined in errno.h * file on failure. */ -static int stmmac_open(struct net_device *dev) +int stmmac_open(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); int bfsize = 0; u32 chan; int ret; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI && - priv->hw->xpcs == NULL) { + priv->hw->xpcs_args.an_mode != DW_AN_C73) { ret = stmmac_init_phy(dev); if (ret) { netdev_err(priv->dev, "%s: Cannot attach to PHY (error: %d)\n", __func__, ret); - return ret; + goto init_phy_error; } } @@ -2932,9 +3681,8 @@ static int stmmac_open(struct net_device *dev) struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; int tbs_en = priv->plat->tx_queues_cfg[chan].tbs_en; + /* Setup per-TXQ tbs flag before TX descriptor alloc */ tx_q->tbs |= tbs_en ? STMMAC_TBS_AVAIL : 0; - if (stmmac_enable_tbs(priv, priv->ioaddr, tbs_en, chan)) - tx_q->tbs &= ~STMMAC_TBS_AVAIL; } ret = alloc_dma_desc_resources(priv); @@ -2963,50 +3711,15 @@ static int stmmac_open(struct net_device *dev) /* We may have called phylink_speed_down before */ phylink_speed_up(priv->phylink); - /* Request the IRQ lines */ - ret = request_irq(dev->irq, stmmac_interrupt, - IRQF_SHARED, dev->name, dev); - if (unlikely(ret < 0)) { - netdev_err(priv->dev, - "%s: ERROR: allocating the IRQ %d (error: %d)\n", - __func__, dev->irq, ret); + ret = stmmac_request_irq(dev); + if (ret) goto irq_error; - } - - /* Request the Wake IRQ in case of another line is used for WoL */ - if (priv->wol_irq != dev->irq) { - ret = request_irq(priv->wol_irq, stmmac_interrupt, - IRQF_SHARED, dev->name, dev); - if (unlikely(ret < 0)) { - netdev_err(priv->dev, - "%s: ERROR: allocating the WoL IRQ %d (%d)\n", - __func__, priv->wol_irq, ret); - goto wolirq_error; - } - } - - /* Request the IRQ lines */ - if (priv->lpi_irq > 0) { - ret = request_irq(priv->lpi_irq, stmmac_interrupt, IRQF_SHARED, - dev->name, dev); - if (unlikely(ret < 0)) { - netdev_err(priv->dev, - "%s: ERROR: allocating the LPI IRQ %d (%d)\n", - __func__, priv->lpi_irq, ret); - goto lpiirq_error; - } - } stmmac_enable_all_queues(priv); netif_tx_start_all_queues(priv->dev); return 0; -lpiirq_error: - if (priv->wol_irq != dev->irq) - free_irq(priv->wol_irq, dev); -wolirq_error: - free_irq(dev->irq, dev); irq_error: phylink_stop(priv->phylink); @@ -3018,16 +3731,28 @@ init_error: free_dma_desc_resources(priv); dma_desc_error: phylink_disconnect_phy(priv->phylink); +init_phy_error: + pm_runtime_put(priv->device); return ret; } +static void stmmac_fpe_stop_wq(struct stmmac_priv *priv) +{ + set_bit(__FPE_REMOVING, &priv->fpe_task_state); + + if (priv->fpe_wq) + destroy_workqueue(priv->fpe_wq); + + netdev_info(priv->dev, "FPE workqueue stop"); +} + /** * stmmac_release - close entry point of the driver * @dev : device pointer. * Description: * This is the stop entry point of the driver. */ -static int stmmac_release(struct net_device *dev) +int stmmac_release(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); u32 chan; @@ -3044,11 +3769,7 @@ static int stmmac_release(struct net_device *dev) hrtimer_cancel(&priv->tx_queue[chan].txtimer); /* Free the IRQ lines */ - free_irq(dev->irq, dev); - if (priv->wol_irq != dev->irq) - free_irq(priv->wol_irq, dev); - if (priv->lpi_irq > 0) - free_irq(priv->lpi_irq, dev); + stmmac_free_irq(dev, REQ_IRQ_ERR_ALL, 0); if (priv->eee_enabled) { priv->tx_path_in_lpi_mode = false; @@ -3068,6 +3789,11 @@ static int stmmac_release(struct net_device *dev) stmmac_release_ptp(priv); + pm_runtime_put(priv->device); + + if (priv->dma_cap.fpesel) + stmmac_fpe_stop_wq(priv); + return 0; } @@ -3153,6 +3879,28 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des, } } +static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue) +{ + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + int desc_size; + + if (likely(priv->extend_desc)) + desc_size = sizeof(struct dma_extended_desc); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + desc_size = sizeof(struct dma_edesc); + else + desc_size = sizeof(struct dma_desc); + + /* The own bit must be the latest setting done when prepare the + * descriptor and then barrier is needed to make sure that + * all is coherent before granting the DMA engine. + */ + wmb(); + + tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size); + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); +} + /** * stmmac_tso_xmit - Tx entry point of the driver for oversized frames (TSO) * @skb : the socket buffer @@ -3184,10 +3932,10 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) { struct dma_desc *desc, *first, *mss_desc = NULL; struct stmmac_priv *priv = netdev_priv(dev); - int desc_size, tmp_pay_len = 0, first_tx; int nfrags = skb_shinfo(skb)->nr_frags; u32 queue = skb_get_queue_mapping(skb); unsigned int first_entry, tx_packets; + int tmp_pay_len = 0, first_tx; struct stmmac_tx_queue *tx_q; bool has_vlan, set_ic; u8 proto_hdr_len, hdr; @@ -3269,6 +4017,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->tx_skbuff_dma[first_entry].buf = des; tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb); + tx_q->tx_skbuff_dma[first_entry].map_as_page = false; + tx_q->tx_skbuff_dma[first_entry].buf_type = STMMAC_TXBUF_T_SKB; if (priv->dma_cap.addr64 <= 32) { first->des0 = cpu_to_le32(des); @@ -3304,12 +4054,14 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->tx_skbuff_dma[tx_q->cur_tx].buf = des; tx_q->tx_skbuff_dma[tx_q->cur_tx].len = skb_frag_size(frag); tx_q->tx_skbuff_dma[tx_q->cur_tx].map_as_page = true; + tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB; } tx_q->tx_skbuff_dma[tx_q->cur_tx].last_segment = true; /* Only the last descriptor gets to point to the skb. */ tx_q->tx_skbuff[tx_q->cur_tx] = skb; + tx_q->tx_skbuff_dma[tx_q->cur_tx].buf_type = STMMAC_TXBUF_T_SKB; /* Manage tx mitigation */ tx_packets = (tx_q->cur_tx + 1) - first_tx; @@ -3317,11 +4069,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en) set_ic = true; - else if (!priv->tx_coal_frames) + else if (!priv->tx_coal_frames[queue]) set_ic = false; - else if (tx_packets > priv->tx_coal_frames) + else if (tx_packets > priv->tx_coal_frames[queue]) set_ic = true; - else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < tx_packets) + else if ((tx_q->tx_count_frames % + priv->tx_coal_frames[queue]) < tx_packets) set_ic = true; else set_ic = false; @@ -3384,12 +4137,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_set_tx_owner(priv, mss_desc); } - /* The own bit must be the latest setting done when prepare the - * descriptor and then barrier is needed to make sure that - * all is coherent before granting the DMA engine. - */ - wmb(); - if (netif_msg_pktdata(priv)) { pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n", __func__, tx_q->cur_tx, tx_q->dirty_tx, first_entry, @@ -3400,13 +4147,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); - if (tx_q->tbs & STMMAC_TBS_AVAIL) - desc_size = sizeof(struct dma_edesc); - else - desc_size = sizeof(struct dma_desc); - - tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size); - stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); + stmmac_flush_tx_descriptors(priv, queue); stmmac_tx_timer_arm(priv, queue); return NETDEV_TX_OK; @@ -3436,10 +4177,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) int nfrags = skb_shinfo(skb)->nr_frags; int gso = skb_shinfo(skb)->gso_type; struct dma_edesc *tbs_desc = NULL; - int entry, desc_size, first_tx; struct dma_desc *desc, *first; struct stmmac_tx_queue *tx_q; bool has_vlan, set_ic; + int entry, first_tx; dma_addr_t des; tx_q = &priv->tx_queue[queue]; @@ -3527,6 +4268,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->tx_skbuff_dma[entry].map_as_page = true; tx_q->tx_skbuff_dma[entry].len = len; tx_q->tx_skbuff_dma[entry].last_segment = last_segment; + tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB; /* Prepare the descriptor and set the own bit too */ stmmac_prepare_tx_desc(priv, desc, 0, len, csum_insertion, @@ -3535,6 +4277,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Only the last descriptor gets to point to the skb. */ tx_q->tx_skbuff[entry] = skb; + tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_SKB; /* According to the coalesce parameter the IC bit for the latest * segment is reset and the timer re-started to clean the tx status. @@ -3546,11 +4289,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && priv->hwts_tx_en) set_ic = true; - else if (!priv->tx_coal_frames) + else if (!priv->tx_coal_frames[queue]) set_ic = false; - else if (tx_packets > priv->tx_coal_frames) + else if (tx_packets > priv->tx_coal_frames[queue]) set_ic = true; - else if ((tx_q->tx_count_frames % priv->tx_coal_frames) < tx_packets) + else if ((tx_q->tx_count_frames % + priv->tx_coal_frames[queue]) < tx_packets) set_ic = true; else set_ic = false; @@ -3612,6 +4356,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) goto dma_map_err; tx_q->tx_skbuff_dma[first_entry].buf = des; + tx_q->tx_skbuff_dma[first_entry].buf_type = STMMAC_TXBUF_T_SKB; + tx_q->tx_skbuff_dma[first_entry].map_as_page = false; stmmac_set_desc_addr(priv, first, des); @@ -3640,25 +4386,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_set_tx_owner(priv, first); - /* The own bit must be the latest setting done when prepare the - * descriptor and then barrier is needed to make sure that - * all is coherent before granting the DMA engine. - */ - wmb(); - netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); stmmac_enable_dma_transmission(priv, priv->ioaddr); - if (likely(priv->extend_desc)) - desc_size = sizeof(struct dma_extended_desc); - else if (tx_q->tbs & STMMAC_TBS_AVAIL) - desc_size = sizeof(struct dma_edesc); - else - desc_size = sizeof(struct dma_desc); - - tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * desc_size); - stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); + stmmac_flush_tx_descriptors(priv, queue); stmmac_tx_timer_arm(priv, queue); return NETDEV_TX_OK; @@ -3701,11 +4433,9 @@ static void stmmac_rx_vlan(struct net_device *dev, struct sk_buff *skb) static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - int len, dirty = stmmac_rx_dirty(priv, queue); + int dirty = stmmac_rx_dirty(priv, queue); unsigned int entry = rx_q->dirty_rx; - len = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE) * PAGE_SIZE; - while (dirty-- > 0) { struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry]; struct dma_desc *p; @@ -3728,18 +4458,9 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) break; buf->sec_addr = page_pool_get_dma_addr(buf->sec_page); - - dma_sync_single_for_device(priv->device, buf->sec_addr, - len, DMA_FROM_DEVICE); } - buf->addr = page_pool_get_dma_addr(buf->page); - - /* Sync whole allocation to device. This will invalidate old - * data. - */ - dma_sync_single_for_device(priv->device, buf->addr, len, - DMA_FROM_DEVICE); + buf->addr = page_pool_get_dma_addr(buf->page) + buf->page_offset; stmmac_set_desc_addr(priv, p, buf->addr); if (priv->sph) @@ -3749,11 +4470,11 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) stmmac_refill_desc3(priv, rx_q, p); rx_q->rx_count_frames++; - rx_q->rx_count_frames += priv->rx_coal_frames; - if (rx_q->rx_count_frames > priv->rx_coal_frames) + rx_q->rx_count_frames += priv->rx_coal_frames[queue]; + if (rx_q->rx_count_frames > priv->rx_coal_frames[queue]) rx_q->rx_count_frames = 0; - use_rx_wd = !priv->rx_coal_frames; + use_rx_wd = !priv->rx_coal_frames[queue]; use_rx_wd |= rx_q->rx_count_frames > 0; if (!priv->use_riwt) use_rx_wd = false; @@ -3818,6 +4539,487 @@ static unsigned int stmmac_rx_buf2_len(struct stmmac_priv *priv, return plen - len; } +static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, + struct xdp_frame *xdpf, bool dma_map) +{ + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + unsigned int entry = tx_q->cur_tx; + struct dma_desc *tx_desc; + dma_addr_t dma_addr; + bool set_ic; + + if (stmmac_tx_avail(priv, queue) < STMMAC_TX_THRESH(priv)) + return STMMAC_XDP_CONSUMED; + + if (likely(priv->extend_desc)) + tx_desc = (struct dma_desc *)(tx_q->dma_etx + entry); + else if (tx_q->tbs & STMMAC_TBS_AVAIL) + tx_desc = &tx_q->dma_entx[entry].basic; + else + tx_desc = tx_q->dma_tx + entry; + + if (dma_map) { + dma_addr = dma_map_single(priv->device, xdpf->data, + xdpf->len, DMA_TO_DEVICE); + if (dma_mapping_error(priv->device, dma_addr)) + return STMMAC_XDP_CONSUMED; + + tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_NDO; + } else { + struct page *page = virt_to_page(xdpf->data); + + dma_addr = page_pool_get_dma_addr(page) + sizeof(*xdpf) + + xdpf->headroom; + dma_sync_single_for_device(priv->device, dma_addr, + xdpf->len, DMA_BIDIRECTIONAL); + + tx_q->tx_skbuff_dma[entry].buf_type = STMMAC_TXBUF_T_XDP_TX; + } + + tx_q->tx_skbuff_dma[entry].buf = dma_addr; + tx_q->tx_skbuff_dma[entry].map_as_page = false; + tx_q->tx_skbuff_dma[entry].len = xdpf->len; + tx_q->tx_skbuff_dma[entry].last_segment = true; + tx_q->tx_skbuff_dma[entry].is_jumbo = false; + + tx_q->xdpf[entry] = xdpf; + + stmmac_set_desc_addr(priv, tx_desc, dma_addr); + + stmmac_prepare_tx_desc(priv, tx_desc, 1, xdpf->len, + true, priv->mode, true, true, + xdpf->len); + + tx_q->tx_count_frames++; + + if (tx_q->tx_count_frames % priv->tx_coal_frames[queue] == 0) + set_ic = true; + else + set_ic = false; + + if (set_ic) { + tx_q->tx_count_frames = 0; + stmmac_set_tx_ic(priv, tx_desc); + priv->xstats.tx_set_ic_bit++; + } + + stmmac_enable_dma_transmission(priv, priv->ioaddr); + + entry = STMMAC_GET_ENTRY(entry, priv->dma_tx_size); + tx_q->cur_tx = entry; + + return STMMAC_XDP_TX; +} + +static int stmmac_xdp_get_tx_queue(struct stmmac_priv *priv, + int cpu) +{ + int index = cpu; + + if (unlikely(index < 0)) + index = 0; + + while (index >= priv->plat->tx_queues_to_use) + index -= priv->plat->tx_queues_to_use; + + return index; +} + +static int stmmac_xdp_xmit_back(struct stmmac_priv *priv, + struct xdp_buff *xdp) +{ + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); + int cpu = smp_processor_id(); + struct netdev_queue *nq; + int queue; + int res; + + if (unlikely(!xdpf)) + return STMMAC_XDP_CONSUMED; + + queue = stmmac_xdp_get_tx_queue(priv, cpu); + nq = netdev_get_tx_queue(priv->dev, queue); + + __netif_tx_lock(nq, cpu); + /* Avoids TX time-out as we are sharing with slow path */ + nq->trans_start = jiffies; + + res = stmmac_xdp_xmit_xdpf(priv, queue, xdpf, false); + if (res == STMMAC_XDP_TX) + stmmac_flush_tx_descriptors(priv, queue); + + __netif_tx_unlock(nq); + + return res; +} + +/* This function assumes rcu_read_lock() is held by the caller. */ +static int __stmmac_xdp_run_prog(struct stmmac_priv *priv, + struct bpf_prog *prog, + struct xdp_buff *xdp) +{ + u32 act; + int res; + + act = bpf_prog_run_xdp(prog, xdp); + switch (act) { + case XDP_PASS: + res = STMMAC_XDP_PASS; + break; + case XDP_TX: + res = stmmac_xdp_xmit_back(priv, xdp); + break; + case XDP_REDIRECT: + if (xdp_do_redirect(priv->dev, xdp, prog) < 0) + res = STMMAC_XDP_CONSUMED; + else + res = STMMAC_XDP_REDIRECT; + break; + default: + bpf_warn_invalid_xdp_action(act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(priv->dev, prog, act); + fallthrough; + case XDP_DROP: + res = STMMAC_XDP_CONSUMED; + break; + } + + return res; +} + +static struct sk_buff *stmmac_xdp_run_prog(struct stmmac_priv *priv, + struct xdp_buff *xdp) +{ + struct bpf_prog *prog; + int res; + + rcu_read_lock(); + + prog = READ_ONCE(priv->xdp_prog); + if (!prog) { + res = STMMAC_XDP_PASS; + goto unlock; + } + + res = __stmmac_xdp_run_prog(priv, prog, xdp); +unlock: + rcu_read_unlock(); + return ERR_PTR(-res); +} + +static void stmmac_finalize_xdp_rx(struct stmmac_priv *priv, + int xdp_status) +{ + int cpu = smp_processor_id(); + int queue; + + queue = stmmac_xdp_get_tx_queue(priv, cpu); + + if (xdp_status & STMMAC_XDP_TX) + stmmac_tx_timer_arm(priv, queue); + + if (xdp_status & STMMAC_XDP_REDIRECT) + xdp_do_flush(); +} + +static struct sk_buff *stmmac_construct_skb_zc(struct stmmac_channel *ch, + struct xdp_buff *xdp) +{ + unsigned int metasize = xdp->data - xdp->data_meta; + unsigned int datasize = xdp->data_end - xdp->data; + struct sk_buff *skb; + + skb = __napi_alloc_skb(&ch->rxtx_napi, + xdp->data_end - xdp->data_hard_start, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, xdp->data - xdp->data_hard_start); + memcpy(__skb_put(skb, datasize), xdp->data, datasize); + if (metasize) + skb_metadata_set(skb, metasize); + + return skb; +} + +static void stmmac_dispatch_skb_zc(struct stmmac_priv *priv, u32 queue, + struct dma_desc *p, struct dma_desc *np, + struct xdp_buff *xdp) +{ + struct stmmac_channel *ch = &priv->channel[queue]; + unsigned int len = xdp->data_end - xdp->data; + enum pkt_hash_types hash_type; + int coe = priv->hw->rx_csum; + struct sk_buff *skb; + u32 hash; + + skb = stmmac_construct_skb_zc(ch, xdp); + if (!skb) { + priv->dev->stats.rx_dropped++; + return; + } + + stmmac_get_rx_hwtstamp(priv, p, np, skb); + stmmac_rx_vlan(priv->dev, skb); + skb->protocol = eth_type_trans(skb, priv->dev); + + if (unlikely(!coe)) + skb_checksum_none_assert(skb); + else + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (!stmmac_get_rx_hash(priv, p, &hash, &hash_type)) + skb_set_hash(skb, hash, hash_type); + + skb_record_rx_queue(skb, queue); + napi_gro_receive(&ch->rxtx_napi, skb); + + priv->dev->stats.rx_packets++; + priv->dev->stats.rx_bytes += len; +} + +static bool stmmac_rx_refill_zc(struct stmmac_priv *priv, u32 queue, u32 budget) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + unsigned int entry = rx_q->dirty_rx; + struct dma_desc *rx_desc = NULL; + bool ret = true; + + budget = min(budget, stmmac_rx_dirty(priv, queue)); + + while (budget-- > 0 && entry != rx_q->cur_rx) { + struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry]; + dma_addr_t dma_addr; + bool use_rx_wd; + + if (!buf->xdp) { + buf->xdp = xsk_buff_alloc(rx_q->xsk_pool); + if (!buf->xdp) { + ret = false; + break; + } + } + + if (priv->extend_desc) + rx_desc = (struct dma_desc *)(rx_q->dma_erx + entry); + else + rx_desc = rx_q->dma_rx + entry; + + dma_addr = xsk_buff_xdp_get_dma(buf->xdp); + stmmac_set_desc_addr(priv, rx_desc, dma_addr); + stmmac_set_desc_sec_addr(priv, rx_desc, 0, false); + stmmac_refill_desc3(priv, rx_q, rx_desc); + + rx_q->rx_count_frames++; + rx_q->rx_count_frames += priv->rx_coal_frames[queue]; + if (rx_q->rx_count_frames > priv->rx_coal_frames[queue]) + rx_q->rx_count_frames = 0; + + use_rx_wd = !priv->rx_coal_frames[queue]; + use_rx_wd |= rx_q->rx_count_frames > 0; + if (!priv->use_riwt) + use_rx_wd = false; + + dma_wmb(); + stmmac_set_rx_owner(priv, rx_desc, use_rx_wd); + + entry = STMMAC_GET_ENTRY(entry, priv->dma_rx_size); + } + + if (rx_desc) { + rx_q->dirty_rx = entry; + rx_q->rx_tail_addr = rx_q->dma_rx_phy + + (rx_q->dirty_rx * sizeof(struct dma_desc)); + stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue); + } + + return ret; +} + +static int stmmac_rx_zc(struct stmmac_priv *priv, int limit, u32 queue) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + unsigned int count = 0, error = 0, len = 0; + int dirty = stmmac_rx_dirty(priv, queue); + unsigned int next_entry = rx_q->cur_rx; + unsigned int desc_size; + struct bpf_prog *prog; + bool failure = false; + int xdp_status = 0; + int status = 0; + + if (netif_msg_rx_status(priv)) { + void *rx_head; + + netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__); + if (priv->extend_desc) { + rx_head = (void *)rx_q->dma_erx; + desc_size = sizeof(struct dma_extended_desc); + } else { + rx_head = (void *)rx_q->dma_rx; + desc_size = sizeof(struct dma_desc); + } + + stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true, + rx_q->dma_rx_phy, desc_size); + } + while (count < limit) { + struct stmmac_rx_buffer *buf; + unsigned int buf1_len = 0; + struct dma_desc *np, *p; + int entry; + int res; + + if (!count && rx_q->state_saved) { + error = rx_q->state.error; + len = rx_q->state.len; + } else { + rx_q->state_saved = false; + error = 0; + len = 0; + } + + if (count >= limit) + break; + +read_again: + buf1_len = 0; + entry = next_entry; + buf = &rx_q->buf_pool[entry]; + + if (dirty >= STMMAC_RX_FILL_BATCH) { + failure = failure || + !stmmac_rx_refill_zc(priv, queue, dirty); + dirty = 0; + } + + if (priv->extend_desc) + p = (struct dma_desc *)(rx_q->dma_erx + entry); + else + p = rx_q->dma_rx + entry; + + /* read the status of the incoming frame */ + status = stmmac_rx_status(priv, &priv->dev->stats, + &priv->xstats, p); + /* check if managed by the DMA otherwise go ahead */ + if (unlikely(status & dma_own)) + break; + + /* Prefetch the next RX descriptor */ + rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, + priv->dma_rx_size); + next_entry = rx_q->cur_rx; + + if (priv->extend_desc) + np = (struct dma_desc *)(rx_q->dma_erx + next_entry); + else + np = rx_q->dma_rx + next_entry; + + prefetch(np); + + if (priv->extend_desc) + stmmac_rx_extended_status(priv, &priv->dev->stats, + &priv->xstats, + rx_q->dma_erx + entry); + if (unlikely(status == discard_frame)) { + xsk_buff_free(buf->xdp); + buf->xdp = NULL; + dirty++; + error = 1; + if (!priv->hwts_rx_en) + priv->dev->stats.rx_errors++; + } + + if (unlikely(error && (status & rx_not_ls))) + goto read_again; + if (unlikely(error)) { + count++; + continue; + } + + /* Ensure a valid XSK buffer before proceed */ + if (!buf->xdp) + break; + + /* XSK pool expects RX frame 1:1 mapped to XSK buffer */ + if (likely(status & rx_not_ls)) { + xsk_buff_free(buf->xdp); + buf->xdp = NULL; + dirty++; + count++; + goto read_again; + } + + /* XDP ZC Frame only support primary buffers for now */ + buf1_len = stmmac_rx_buf1_len(priv, p, status, len); + len += buf1_len; + + /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3 + * Type frames (LLC/LLC-SNAP) + * + * llc_snap is never checked in GMAC >= 4, so this ACS + * feature is always disabled and packets need to be + * stripped manually. + */ + if (likely(!(status & rx_not_ls)) && + (likely(priv->synopsys_id >= DWMAC_CORE_4_00) || + unlikely(status != llc_snap))) { + buf1_len -= ETH_FCS_LEN; + len -= ETH_FCS_LEN; + } + + /* RX buffer is good and fit into a XSK pool buffer */ + buf->xdp->data_end = buf->xdp->data + buf1_len; + xsk_buff_dma_sync_for_cpu(buf->xdp, rx_q->xsk_pool); + + rcu_read_lock(); + prog = READ_ONCE(priv->xdp_prog); + res = __stmmac_xdp_run_prog(priv, prog, buf->xdp); + rcu_read_unlock(); + + switch (res) { + case STMMAC_XDP_PASS: + stmmac_dispatch_skb_zc(priv, queue, p, np, buf->xdp); + xsk_buff_free(buf->xdp); + break; + case STMMAC_XDP_CONSUMED: + xsk_buff_free(buf->xdp); + priv->dev->stats.rx_dropped++; + break; + case STMMAC_XDP_TX: + case STMMAC_XDP_REDIRECT: + xdp_status |= res; + break; + } + + buf->xdp = NULL; + dirty++; + count++; + } + + if (status & rx_not_ls) { + rx_q->state_saved = true; + rx_q->state.error = error; + rx_q->state.len = len; + } + + stmmac_finalize_xdp_rx(priv, xdp_status); + + if (xsk_uses_need_wakeup(rx_q->xsk_pool)) { + if (failure || stmmac_rx_dirty(priv, queue) > 0) + xsk_set_rx_need_wakeup(rx_q->xsk_pool); + else + xsk_clear_rx_need_wakeup(rx_q->xsk_pool); + + return (int)count; + } + + return failure ? limit : (int)count; +} + /** * stmmac_rx - manage the receive process * @priv: driver private structure @@ -3833,8 +5035,15 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) unsigned int count = 0, error = 0, len = 0; int status = 0, coe = priv->hw->rx_csum; unsigned int next_entry = rx_q->cur_rx; + enum dma_data_direction dma_dir; unsigned int desc_size; struct sk_buff *skb = NULL; + struct xdp_buff xdp; + int xdp_status = 0; + int buf_sz; + + dma_dir = page_pool_get_dma_dir(rx_q->page_pool); + buf_sz = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE) * PAGE_SIZE; if (netif_msg_rx_status(priv)) { void *rx_head; @@ -3952,6 +5161,64 @@ read_again: } if (!skb) { + unsigned int pre_len, sync_len; + + dma_sync_single_for_cpu(priv->device, buf->addr, + buf1_len, dma_dir); + + xdp.data = page_address(buf->page) + buf->page_offset; + xdp.data_end = xdp.data + buf1_len; + xdp.data_hard_start = page_address(buf->page); + xdp_set_data_meta_invalid(&xdp); + xdp.frame_sz = buf_sz; + xdp.rxq = &rx_q->xdp_rxq; + + pre_len = xdp.data_end - xdp.data_hard_start - + buf->page_offset; + skb = stmmac_xdp_run_prog(priv, &xdp); + /* Due xdp_adjust_tail: DMA sync for_device + * cover max len CPU touch + */ + sync_len = xdp.data_end - xdp.data_hard_start - + buf->page_offset; + sync_len = max(sync_len, pre_len); + + /* For Not XDP_PASS verdict */ + if (IS_ERR(skb)) { + unsigned int xdp_res = -PTR_ERR(skb); + + if (xdp_res & STMMAC_XDP_CONSUMED) { + page_pool_put_page(rx_q->page_pool, + virt_to_head_page(xdp.data), + sync_len, true); + buf->page = NULL; + priv->dev->stats.rx_dropped++; + + /* Clear skb as it was set as + * status by XDP program. + */ + skb = NULL; + + if (unlikely((status & rx_not_ls))) + goto read_again; + + count++; + continue; + } else if (xdp_res & (STMMAC_XDP_TX | + STMMAC_XDP_REDIRECT)) { + xdp_status |= xdp_res; + buf->page = NULL; + skb = NULL; + count++; + continue; + } + } + } + + if (!skb) { + /* XDP program may expand or reduce tail */ + buf1_len = xdp.data_end - xdp.data; + skb = napi_alloc_skb(&ch->rx_napi, buf1_len); if (!skb) { priv->dev->stats.rx_dropped++; @@ -3959,10 +5226,8 @@ read_again: goto drain_data; } - dma_sync_single_for_cpu(priv->device, buf->addr, - buf1_len, DMA_FROM_DEVICE); - skb_copy_to_linear_data(skb, page_address(buf->page), - buf1_len); + /* XDP program may adjust header */ + skb_copy_to_linear_data(skb, xdp.data, buf1_len); skb_put(skb, buf1_len); /* Data payload copied into SKB, page ready for recycle */ @@ -3970,9 +5235,9 @@ read_again: buf->page = NULL; } else if (buf1_len) { dma_sync_single_for_cpu(priv->device, buf->addr, - buf1_len, DMA_FROM_DEVICE); + buf1_len, dma_dir); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - buf->page, 0, buf1_len, + buf->page, buf->page_offset, buf1_len, priv->dma_buf_sz); /* Data payload appended into SKB */ @@ -3982,7 +5247,7 @@ read_again: if (buf2_len) { dma_sync_single_for_cpu(priv->device, buf->sec_addr, - buf2_len, DMA_FROM_DEVICE); + buf2_len, dma_dir); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, buf->sec_page, 0, buf2_len, priv->dma_buf_sz); @@ -4028,6 +5293,8 @@ drain_data: rx_q->state.len = len; } + stmmac_finalize_xdp_rx(priv, xdp_status); + stmmac_rx_refill(priv, queue); priv->xstats.rx_pkt_n += count; @@ -4067,7 +5334,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) priv->xstats.napi_poll++; - work_done = stmmac_tx_clean(priv, priv->dma_tx_size, chan); + work_done = stmmac_tx_clean(priv, budget, chan); work_done = min(work_done, budget); if (work_done < budget && napi_complete_done(napi, work_done)) { @@ -4081,6 +5348,42 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) return work_done; } +static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget) +{ + struct stmmac_channel *ch = + container_of(napi, struct stmmac_channel, rxtx_napi); + struct stmmac_priv *priv = ch->priv_data; + int rx_done, tx_done; + u32 chan = ch->index; + + priv->xstats.napi_poll++; + + tx_done = stmmac_tx_clean(priv, budget, chan); + tx_done = min(tx_done, budget); + + rx_done = stmmac_rx_zc(priv, budget, chan); + + /* If either TX or RX work is not complete, return budget + * and keep pooling + */ + if (tx_done >= budget || rx_done >= budget) + return budget; + + /* all work done, exit the polling mode */ + if (napi_complete_done(napi, rx_done)) { + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + /* Both RX and TX work done are compelte, + * so enable both RX & TX IRQs. + */ + stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 1, 1); + spin_unlock_irqrestore(&ch->lock, flags); + } + + return min(rx_done, budget - 1); +} + /** * stmmac_tx_timeout * @dev : Pointer to net device structure @@ -4140,6 +5443,11 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) return -EBUSY; } + if (stmmac_xdp_is_enabled(priv) && new_mtu > ETH_DATA_LEN) { + netdev_dbg(priv->dev, "Jumbo frames not supported for XDP\n"); + return -EINVAL; + } + new_mtu = STMMAC_ALIGN(new_mtu); /* If condition true, FIFO is too small or MTU too large */ @@ -4201,27 +5509,57 @@ static int stmmac_set_features(struct net_device *netdev, stmmac_rx_ipc(priv, priv->hw); sph_en = (priv->hw->rx_csum > 0) && priv->sph; + for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++) stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan); return 0; } -/** - * stmmac_interrupt - main ISR - * @irq: interrupt number. - * @dev_id: to pass the net device pointer (must be valid). - * Description: this is the main driver interrupt service routine. - * It can call: - * o DMA service routine (to manage incoming frame reception and transmission - * status) - * o Core interrupts to manage: remote wake-up, management counter, LPI - * interrupts. - */ -static irqreturn_t stmmac_interrupt(int irq, void *dev_id) +static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status) +{ + struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg; + enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state; + enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state; + bool *hs_enable = &fpe_cfg->hs_enable; + + if (status == FPE_EVENT_UNKNOWN || !*hs_enable) + return; + + /* If LP has sent verify mPacket, LP is FPE capable */ + if ((status & FPE_EVENT_RVER) == FPE_EVENT_RVER) { + if (*lp_state < FPE_STATE_CAPABLE) + *lp_state = FPE_STATE_CAPABLE; + + /* If user has requested FPE enable, quickly response */ + if (*hs_enable) + stmmac_fpe_send_mpacket(priv, priv->ioaddr, + MPACKET_RESPONSE); + } + + /* If Local has sent verify mPacket, Local is FPE capable */ + if ((status & FPE_EVENT_TVER) == FPE_EVENT_TVER) { + if (*lo_state < FPE_STATE_CAPABLE) + *lo_state = FPE_STATE_CAPABLE; + } + + /* If LP has sent response mPacket, LP is entering FPE ON */ + if ((status & FPE_EVENT_RRSP) == FPE_EVENT_RRSP) + *lp_state = FPE_STATE_ENTERING_ON; + + /* If Local has sent response mPacket, Local is entering FPE ON */ + if ((status & FPE_EVENT_TRSP) == FPE_EVENT_TRSP) + *lo_state = FPE_STATE_ENTERING_ON; + + if (!test_bit(__FPE_REMOVING, &priv->fpe_task_state) && + !test_and_set_bit(__FPE_TASK_SCHED, &priv->fpe_task_state) && + priv->fpe_wq) { + queue_work(priv->fpe_wq, &priv->fpe_task); + } +} + +static void stmmac_common_interrupt(struct stmmac_priv *priv) { - struct net_device *dev = (struct net_device *)dev_id; - struct stmmac_priv *priv = netdev_priv(dev); u32 rx_cnt = priv->plat->rx_queues_to_use; u32 tx_cnt = priv->plat->tx_queues_to_use; u32 queues_count; @@ -4234,12 +5572,16 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) if (priv->irq_wake) pm_wakeup_event(priv->device, 0); - /* Check if adapter is up */ - if (test_bit(STMMAC_DOWN, &priv->state)) - return IRQ_HANDLED; - /* Check if a fatal error happened */ - if (stmmac_safety_feat_interrupt(priv)) - return IRQ_HANDLED; + if (priv->dma_cap.estsel) + stmmac_est_irq_status(priv, priv->ioaddr, priv->dev, + &priv->xstats, tx_cnt); + + if (priv->dma_cap.fpesel) { + int status = stmmac_fpe_irq_status(priv, priv->ioaddr, + priv->dev); + + stmmac_fpe_event_status(priv, status); + } /* To handle GMAC own interrupts */ if ((priv->plat->has_gmac) || xmac) { @@ -4271,11 +5613,41 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) /* PCS link status */ if (priv->hw->pcs) { if (priv->xstats.pcs_link) - netif_carrier_on(dev); + netif_carrier_on(priv->dev); else - netif_carrier_off(dev); + netif_carrier_off(priv->dev); } + + stmmac_timestamp_interrupt(priv, priv); } +} + +/** + * stmmac_interrupt - main ISR + * @irq: interrupt number. + * @dev_id: to pass the net device pointer. + * Description: this is the main driver interrupt service routine. + * It can call: + * o DMA service routine (to manage incoming frame reception and transmission + * status) + * o Core interrupts to manage: remote wake-up, management counter, LPI + * interrupts. + */ +static irqreturn_t stmmac_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct stmmac_priv *priv = netdev_priv(dev); + + /* Check if adapter is up */ + if (test_bit(STMMAC_DOWN, &priv->state)) + return IRQ_HANDLED; + + /* Check if a fatal error happened */ + if (stmmac_safety_feat_interrupt(priv)) + return IRQ_HANDLED; + + /* To handle Common interrupts */ + stmmac_common_interrupt(priv); /* To handle DMA interrupts */ stmmac_dma_interrupt(priv); @@ -4283,15 +5655,136 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct stmmac_priv *priv = netdev_priv(dev); + + if (unlikely(!dev)) { + netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); + return IRQ_NONE; + } + + /* Check if adapter is up */ + if (test_bit(STMMAC_DOWN, &priv->state)) + return IRQ_HANDLED; + + /* To handle Common interrupts */ + stmmac_common_interrupt(priv); + + return IRQ_HANDLED; +} + +static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct stmmac_priv *priv = netdev_priv(dev); + + if (unlikely(!dev)) { + netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); + return IRQ_NONE; + } + + /* Check if adapter is up */ + if (test_bit(STMMAC_DOWN, &priv->state)) + return IRQ_HANDLED; + + /* Check if a fatal error happened */ + stmmac_safety_feat_interrupt(priv); + + return IRQ_HANDLED; +} + +static irqreturn_t stmmac_msi_intr_tx(int irq, void *data) +{ + struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)data; + int chan = tx_q->queue_index; + struct stmmac_priv *priv; + int status; + + priv = container_of(tx_q, struct stmmac_priv, tx_queue[chan]); + + if (unlikely(!data)) { + netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); + return IRQ_NONE; + } + + /* Check if adapter is up */ + if (test_bit(STMMAC_DOWN, &priv->state)) + return IRQ_HANDLED; + + status = stmmac_napi_check(priv, chan, DMA_DIR_TX); + + if (unlikely(status & tx_hard_error_bump_tc)) { + /* Try to bump up the dma threshold on this failure */ + if (unlikely(priv->xstats.threshold != SF_DMA_MODE) && + tc <= 256) { + tc += 64; + if (priv->plat->force_thresh_dma_mode) + stmmac_set_dma_operation_mode(priv, + tc, + tc, + chan); + else + stmmac_set_dma_operation_mode(priv, + tc, + SF_DMA_MODE, + chan); + priv->xstats.threshold = tc; + } + } else if (unlikely(status == tx_hard_error)) { + stmmac_tx_err(priv, chan); + } + + return IRQ_HANDLED; +} + +static irqreturn_t stmmac_msi_intr_rx(int irq, void *data) +{ + struct stmmac_rx_queue *rx_q = (struct stmmac_rx_queue *)data; + int chan = rx_q->queue_index; + struct stmmac_priv *priv; + + priv = container_of(rx_q, struct stmmac_priv, rx_queue[chan]); + + if (unlikely(!data)) { + netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__); + return IRQ_NONE; + } + + /* Check if adapter is up */ + if (test_bit(STMMAC_DOWN, &priv->state)) + return IRQ_HANDLED; + + stmmac_napi_check(priv, chan, DMA_DIR_RX); + + return IRQ_HANDLED; +} + #ifdef CONFIG_NET_POLL_CONTROLLER /* Polling receive - used by NETCONSOLE and other diagnostic tools * to allow network I/O with interrupts disabled. */ static void stmmac_poll_controller(struct net_device *dev) { - disable_irq(dev->irq); - stmmac_interrupt(dev->irq, dev); - enable_irq(dev->irq); + struct stmmac_priv *priv = netdev_priv(dev); + int i; + + /* If adapter is down, do nothing */ + if (test_bit(STMMAC_DOWN, &priv->state)) + return; + + if (priv->plat->multi_msi_en) { + for (i = 0; i < priv->plat->rx_queues_to_use; i++) + stmmac_msi_intr_rx(0, &priv->rx_queue[i]); + + for (i = 0; i < priv->plat->tx_queues_to_use; i++) + stmmac_msi_intr_tx(0, &priv->tx_queue[i]); + } else { + disable_irq(dev->irq); + stmmac_interrupt(dev->irq, dev); + enable_irq(dev->irq); + } } #endif @@ -4340,7 +5833,7 @@ static int stmmac_setup_tc_block_cb(enum tc_setup_type type, void *type_data, if (!tc_cls_can_offload_and_chain0(priv->dev, type_data)) return ret; - stmmac_disable_all_queues(priv); + __stmmac_disable_all_queues(priv); switch (type) { case TC_SETUP_CLSU32: @@ -4704,6 +6197,12 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid bool is_double = false; int ret; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + if (be16_to_cpu(proto) == ETH_P_8021AD) is_double = true; @@ -4737,10 +6236,222 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi if (priv->hw->num_vlan) { ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); if (ret) - return ret; + goto del_vlan_error; } - return stmmac_vlan_update(priv, is_double); + ret = stmmac_vlan_update(priv, is_double); + +del_vlan_error: + pm_runtime_put(priv->device); + + return ret; +} + +static int stmmac_bpf(struct net_device *dev, struct netdev_bpf *bpf) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + switch (bpf->command) { + case XDP_SETUP_PROG: + return stmmac_xdp_set_prog(priv, bpf->prog, bpf->extack); + case XDP_SETUP_XSK_POOL: + return stmmac_xdp_setup_pool(priv, bpf->xsk.pool, + bpf->xsk.queue_id); + default: + return -EOPNOTSUPP; + } +} + +static int stmmac_xdp_xmit(struct net_device *dev, int num_frames, + struct xdp_frame **frames, u32 flags) +{ + struct stmmac_priv *priv = netdev_priv(dev); + int cpu = smp_processor_id(); + struct netdev_queue *nq; + int i, nxmit = 0; + int queue; + + if (unlikely(test_bit(STMMAC_DOWN, &priv->state))) + return -ENETDOWN; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + queue = stmmac_xdp_get_tx_queue(priv, cpu); + nq = netdev_get_tx_queue(priv->dev, queue); + + __netif_tx_lock(nq, cpu); + /* Avoids TX time-out as we are sharing with slow path */ + nq->trans_start = jiffies; + + for (i = 0; i < num_frames; i++) { + int res; + + res = stmmac_xdp_xmit_xdpf(priv, queue, frames[i], true); + if (res == STMMAC_XDP_CONSUMED) + break; + + nxmit++; + } + + if (flags & XDP_XMIT_FLUSH) { + stmmac_flush_tx_descriptors(priv, queue); + stmmac_tx_timer_arm(priv, queue); + } + + __netif_tx_unlock(nq); + + return nxmit; +} + +void stmmac_disable_rx_queue(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_channel *ch = &priv->channel[queue]; + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, queue, 1, 0); + spin_unlock_irqrestore(&ch->lock, flags); + + stmmac_stop_rx_dma(priv, queue); + __free_dma_rx_desc_resources(priv, queue); +} + +void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + struct stmmac_channel *ch = &priv->channel[queue]; + unsigned long flags; + u32 buf_size; + int ret; + + ret = __alloc_dma_rx_desc_resources(priv, queue); + if (ret) { + netdev_err(priv->dev, "Failed to alloc RX desc.\n"); + return; + } + + ret = __init_dma_rx_desc_rings(priv, queue, GFP_KERNEL); + if (ret) { + __free_dma_rx_desc_resources(priv, queue); + netdev_err(priv->dev, "Failed to init RX desc.\n"); + return; + } + + stmmac_clear_rx_descriptors(priv, queue); + + stmmac_init_rx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, + rx_q->dma_rx_phy, rx_q->queue_index); + + rx_q->rx_tail_addr = rx_q->dma_rx_phy + (rx_q->buf_alloc_num * + sizeof(struct dma_desc)); + stmmac_set_rx_tail_ptr(priv, priv->ioaddr, + rx_q->rx_tail_addr, rx_q->queue_index); + + if (rx_q->xsk_pool && rx_q->buf_alloc_num) { + buf_size = xsk_pool_get_rx_frame_size(rx_q->xsk_pool); + stmmac_set_dma_bfsize(priv, priv->ioaddr, + buf_size, + rx_q->queue_index); + } else { + stmmac_set_dma_bfsize(priv, priv->ioaddr, + priv->dma_buf_sz, + rx_q->queue_index); + } + + stmmac_start_rx_dma(priv, queue); + + spin_lock_irqsave(&ch->lock, flags); + stmmac_enable_dma_irq(priv, priv->ioaddr, queue, 1, 0); + spin_unlock_irqrestore(&ch->lock, flags); +} + +void stmmac_disable_tx_queue(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_channel *ch = &priv->channel[queue]; + unsigned long flags; + + spin_lock_irqsave(&ch->lock, flags); + stmmac_disable_dma_irq(priv, priv->ioaddr, queue, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); + + stmmac_stop_tx_dma(priv, queue); + __free_dma_tx_desc_resources(priv, queue); +} + +void stmmac_enable_tx_queue(struct stmmac_priv *priv, u32 queue) +{ + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; + struct stmmac_channel *ch = &priv->channel[queue]; + unsigned long flags; + int ret; + + ret = __alloc_dma_tx_desc_resources(priv, queue); + if (ret) { + netdev_err(priv->dev, "Failed to alloc TX desc.\n"); + return; + } + + ret = __init_dma_tx_desc_rings(priv, queue); + if (ret) { + __free_dma_tx_desc_resources(priv, queue); + netdev_err(priv->dev, "Failed to init TX desc.\n"); + return; + } + + stmmac_clear_tx_descriptors(priv, queue); + + stmmac_init_tx_chan(priv, priv->ioaddr, priv->plat->dma_cfg, + tx_q->dma_tx_phy, tx_q->queue_index); + + if (tx_q->tbs & STMMAC_TBS_AVAIL) + stmmac_enable_tbs(priv, priv->ioaddr, 1, tx_q->queue_index); + + tx_q->tx_tail_addr = tx_q->dma_tx_phy; + stmmac_set_tx_tail_ptr(priv, priv->ioaddr, + tx_q->tx_tail_addr, tx_q->queue_index); + + stmmac_start_tx_dma(priv, queue); + + spin_lock_irqsave(&ch->lock, flags); + stmmac_enable_dma_irq(priv, priv->ioaddr, queue, 0, 1); + spin_unlock_irqrestore(&ch->lock, flags); +} + +int stmmac_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags) +{ + struct stmmac_priv *priv = netdev_priv(dev); + struct stmmac_rx_queue *rx_q; + struct stmmac_tx_queue *tx_q; + struct stmmac_channel *ch; + + if (test_bit(STMMAC_DOWN, &priv->state) || + !netif_carrier_ok(priv->dev)) + return -ENETDOWN; + + if (!stmmac_xdp_is_enabled(priv)) + return -ENXIO; + + if (queue >= priv->plat->rx_queues_to_use || + queue >= priv->plat->tx_queues_to_use) + return -EINVAL; + + rx_q = &priv->rx_queue[queue]; + tx_q = &priv->tx_queue[queue]; + ch = &priv->channel[queue]; + + if (!rx_q->xsk_pool && !tx_q->xsk_pool) + return -ENXIO; + + if (!napi_if_scheduled_mark_missed(&ch->rxtx_napi)) { + /* EQoS does not have per-DMA channel SW interrupt, + * so we schedule RX Napi straight-away. + */ + if (likely(napi_schedule_prep(&ch->rxtx_napi))) + __napi_schedule(&ch->rxtx_napi); + } + + return 0; } static const struct net_device_ops stmmac_netdev_ops = { @@ -4761,6 +6472,9 @@ static const struct net_device_ops stmmac_netdev_ops = { .ndo_set_mac_address = stmmac_set_mac_address, .ndo_vlan_rx_add_vid = stmmac_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = stmmac_vlan_rx_kill_vid, + .ndo_bpf = stmmac_bpf, + .ndo_xdp_xmit = stmmac_xdp_xmit, + .ndo_xsk_wakeup = stmmac_xsk_wakeup, }; static void stmmac_reset_subtask(struct stmmac_priv *priv) @@ -4919,6 +6633,12 @@ static void stmmac_napi_add(struct net_device *dev) stmmac_napi_poll_tx, NAPI_POLL_WEIGHT); } + if (queue < priv->plat->rx_queues_to_use && + queue < priv->plat->tx_queues_to_use) { + netif_napi_add(dev, &ch->rxtx_napi, + stmmac_napi_poll_rxtx, + NAPI_POLL_WEIGHT); + } } } @@ -4936,6 +6656,10 @@ static void stmmac_napi_del(struct net_device *dev) netif_napi_del(&ch->rx_napi); if (queue < priv->plat->tx_queues_to_use) netif_napi_del(&ch->tx_napi); + if (queue < priv->plat->rx_queues_to_use && + queue < priv->plat->tx_queues_to_use) { + netif_napi_del(&ch->rxtx_napi); + } } } @@ -4977,6 +6701,68 @@ int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size) return ret; } +#define SEND_VERIFY_MPAKCET_FMT "Send Verify mPacket lo_state=%d lp_state=%d\n" +static void stmmac_fpe_lp_task(struct work_struct *work) +{ + struct stmmac_priv *priv = container_of(work, struct stmmac_priv, + fpe_task); + struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg; + enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state; + enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state; + bool *hs_enable = &fpe_cfg->hs_enable; + bool *enable = &fpe_cfg->enable; + int retries = 20; + + while (retries-- > 0) { + /* Bail out immediately if FPE handshake is OFF */ + if (*lo_state == FPE_STATE_OFF || !*hs_enable) + break; + + if (*lo_state == FPE_STATE_ENTERING_ON && + *lp_state == FPE_STATE_ENTERING_ON) { + stmmac_fpe_configure(priv, priv->ioaddr, + priv->plat->tx_queues_to_use, + priv->plat->rx_queues_to_use, + *enable); + + netdev_info(priv->dev, "configured FPE\n"); + + *lo_state = FPE_STATE_ON; + *lp_state = FPE_STATE_ON; + netdev_info(priv->dev, "!!! BOTH FPE stations ON\n"); + break; + } + + if ((*lo_state == FPE_STATE_CAPABLE || + *lo_state == FPE_STATE_ENTERING_ON) && + *lp_state != FPE_STATE_ON) { + netdev_info(priv->dev, SEND_VERIFY_MPAKCET_FMT, + *lo_state, *lp_state); + stmmac_fpe_send_mpacket(priv, priv->ioaddr, + MPACKET_VERIFY); + } + /* Sleep then retry */ + msleep(500); + } + + clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state); +} + +void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable) +{ + if (priv->plat->fpe_cfg->hs_enable != enable) { + if (enable) { + stmmac_fpe_send_mpacket(priv, priv->ioaddr, + MPACKET_VERIFY); + } else { + priv->plat->fpe_cfg->lo_fpe_state = FPE_STATE_OFF; + priv->plat->fpe_cfg->lp_fpe_state = FPE_STATE_OFF; + } + + priv->plat->fpe_cfg->hs_enable = enable; + } +} + /** * stmmac_dvr_probe * @device: device pointer @@ -5012,12 +6798,19 @@ int stmmac_dvr_probe(struct device *device, priv->plat = plat_dat; priv->ioaddr = res->addr; priv->dev->base_addr = (unsigned long)res->addr; + priv->plat->dma_cfg->multi_msi_en = priv->plat->multi_msi_en; priv->dev->irq = res->irq; priv->wol_irq = res->wol_irq; priv->lpi_irq = res->lpi_irq; - - if (!IS_ERR_OR_NULL(res->mac)) + priv->sfty_ce_irq = res->sfty_ce_irq; + priv->sfty_ue_irq = res->sfty_ue_irq; + for (i = 0; i < MTL_MAX_RX_QUEUES; i++) + priv->rx_irq[i] = res->rx_irq[i]; + for (i = 0; i < MTL_MAX_TX_QUEUES; i++) + priv->tx_irq[i] = res->tx_irq[i]; + + if (!is_zero_ether_addr(res->mac)) memcpy(priv->dev->dev_addr, res->mac, ETH_ALEN); dev_set_drvdata(device, priv->dev); @@ -5025,6 +6818,10 @@ int stmmac_dvr_probe(struct device *device, /* Verify driver arguments */ stmmac_verify_args(); + priv->af_xdp_zc_qps = bitmap_zalloc(MTL_MAX_TX_QUEUES, GFP_KERNEL); + if (!priv->af_xdp_zc_qps) + return -ENOMEM; + /* Allocate workqueue */ priv->wq = create_singlethread_workqueue("stmmac_wq"); if (!priv->wq) { @@ -5034,6 +6831,9 @@ int stmmac_dvr_probe(struct device *device, INIT_WORK(&priv->service_task, stmmac_service_task); + /* Initialize Link Partner FPE workqueue */ + INIT_WORK(&priv->fpe_task, stmmac_fpe_lp_task); + /* Override with kernel parameters if supplied XXX CRS XXX * this needs to have multiple instances */ @@ -5055,6 +6855,11 @@ int stmmac_dvr_probe(struct device *device, if (ret) goto error_hw_init; + /* Only DWMAC core version 5.20 onwards supports HW descriptor prefetch. + */ + if (priv->synopsys_id < DWMAC_CORE_5_20) + priv->plat->dma_cfg->dche = false; + stmmac_check_ether_addr(priv); ndev->netdev_ops = &stmmac_netdev_ops; @@ -5077,7 +6882,8 @@ int stmmac_dvr_probe(struct device *device, if (priv->dma_cap.sphen) { ndev->hw_features |= NETIF_F_GRO; - priv->sph = true; + priv->sph_cap = true; + priv->sph = priv->sph_cap; dev_info(priv->device, "SPH feature enabled\n"); } @@ -5179,6 +6985,10 @@ int stmmac_dvr_probe(struct device *device, stmmac_check_pcs_mode(priv); + pm_runtime_get_noresume(device); + pm_runtime_set_active(device); + pm_runtime_enable(device); + if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) { /* MDIO bus Registration */ @@ -5216,6 +7026,11 @@ int stmmac_dvr_probe(struct device *device, stmmac_init_fs(ndev); #endif + /* Let pm_runtime_put() disable the clocks. + * If CONFIG_PM is not enabled, the clocks will stay powered. + */ + pm_runtime_put(device); + return ret; error_serdes_powerup: @@ -5230,6 +7045,8 @@ error_mdio_register: stmmac_napi_del(ndev); error_hw_init: destroy_workqueue(priv->wq); + stmmac_bus_clks_config(priv, false); + bitmap_free(priv->af_xdp_zc_qps); return ret; } @@ -5265,13 +7082,14 @@ int stmmac_dvr_remove(struct device *dev) phylink_destroy(priv->phylink); if (priv->plat->stmmac_rst) reset_control_assert(priv->plat->stmmac_rst); - clk_disable_unprepare(priv->plat->pclk); - clk_disable_unprepare(priv->plat->stmmac_clk); + pm_runtime_put(dev); + pm_runtime_disable(dev); if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) stmmac_mdio_unregister(ndev); destroy_workqueue(priv->wq); mutex_destroy(&priv->lock); + bitmap_free(priv->af_xdp_zc_qps); return 0; } @@ -5289,6 +7107,7 @@ int stmmac_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); u32 chan; + int ret; if (!ndev || !netif_running(ndev)) return 0; @@ -5332,11 +7151,24 @@ int stmmac_suspend(struct device *dev) pinctrl_pm_select_sleep_state(priv->device); /* Disable clock in case of PWM is off */ clk_disable_unprepare(priv->plat->clk_ptp_ref); - clk_disable_unprepare(priv->plat->pclk); - clk_disable_unprepare(priv->plat->stmmac_clk); + ret = pm_runtime_force_suspend(dev); + if (ret) { + mutex_unlock(&priv->lock); + return ret; + } } + mutex_unlock(&priv->lock); + if (priv->dma_cap.fpesel) { + /* Disable FPE */ + stmmac_fpe_configure(priv, priv->ioaddr, + priv->plat->tx_queues_to_use, + priv->plat->rx_queues_to_use, false); + + stmmac_fpe_handshake(priv, false); + } + priv->speed = SPEED_UNKNOWN; return 0; } @@ -5399,8 +7231,9 @@ int stmmac_resume(struct device *dev) } else { pinctrl_pm_select_default_state(priv->device); /* enable the clk previously disabled */ - clk_prepare_enable(priv->plat->stmmac_clk); - clk_prepare_enable(priv->plat->pclk); + ret = pm_runtime_force_resume(dev); + if (ret) + return ret; if (priv->plat->clk_ptp_ref) clk_prepare_enable(priv->plat->clk_ptp_ref); /* reset the phy so that it's ready */ @@ -5428,7 +7261,7 @@ int stmmac_resume(struct device *dev) mutex_lock(&priv->lock); stmmac_reset_queues_param(priv); - stmmac_reinit_rx_buffers(priv); + stmmac_free_tx_skbufs(priv); stmmac_clear_descriptors(priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index d64116e0543e..b750074f8f9c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -15,6 +15,7 @@ #include <linux/iopoll.h> #include <linux/mii.h> #include <linux/of_mdio.h> +#include <linux/pm_runtime.h> #include <linux/phy.h> #include <linux/property.h> #include <linux/slab.h> @@ -87,21 +88,29 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) u32 tmp, addr, value = MII_XGMAC_BUSY; int ret; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) - return -EBUSY; + !(tmp & MII_XGMAC_BUSY), 100, 10000)) { + ret = -EBUSY; + goto err_disable_clks; + } if (phyreg & MII_ADDR_C45) { phyreg &= ~MII_ADDR_C45; ret = stmmac_xgmac2_c45_format(priv, phyaddr, phyreg, &addr); if (ret) - return ret; + goto err_disable_clks; } else { ret = stmmac_xgmac2_c22_format(priv, phyaddr, phyreg, &addr); if (ret) - return ret; + goto err_disable_clks; value |= MII_XGMAC_SADDR; } @@ -112,8 +121,10 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) - return -EBUSY; + !(tmp & MII_XGMAC_BUSY), 100, 10000)) { + ret = -EBUSY; + goto err_disable_clks; + } /* Set the MII address register to read */ writel(addr, priv->ioaddr + mii_address); @@ -121,11 +132,18 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) - return -EBUSY; + !(tmp & MII_XGMAC_BUSY), 100, 10000)) { + ret = -EBUSY; + goto err_disable_clks; + } /* Read the data from the MII data register */ - return readl(priv->ioaddr + mii_data) & GENMASK(15, 0); + ret = (int)readl(priv->ioaddr + mii_data) & GENMASK(15, 0); + +err_disable_clks: + pm_runtime_put(priv->device); + + return ret; } static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr, @@ -138,21 +156,29 @@ static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr, u32 addr, tmp, value = MII_XGMAC_BUSY; int ret; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) - return -EBUSY; + !(tmp & MII_XGMAC_BUSY), 100, 10000)) { + ret = -EBUSY; + goto err_disable_clks; + } if (phyreg & MII_ADDR_C45) { phyreg &= ~MII_ADDR_C45; ret = stmmac_xgmac2_c45_format(priv, phyaddr, phyreg, &addr); if (ret) - return ret; + goto err_disable_clks; } else { ret = stmmac_xgmac2_c22_format(priv, phyaddr, phyreg, &addr); if (ret) - return ret; + goto err_disable_clks; value |= MII_XGMAC_SADDR; } @@ -164,16 +190,23 @@ static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr, /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) - return -EBUSY; + !(tmp & MII_XGMAC_BUSY), 100, 10000)) { + ret = -EBUSY; + goto err_disable_clks; + } /* Set the MII address register to write */ writel(addr, priv->ioaddr + mii_address); writel(value, priv->ioaddr + mii_data); /* Wait until any existing MII operation is complete */ - return readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000); + ret = readl_poll_timeout(priv->ioaddr + mii_data, tmp, + !(tmp & MII_XGMAC_BUSY), 100, 10000); + +err_disable_clks: + pm_runtime_put(priv->device); + + return ret; } /** @@ -196,6 +229,12 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) int data = 0; u32 v; + data = pm_runtime_get_sync(priv->device); + if (data < 0) { + pm_runtime_put_noidle(priv->device); + return data; + } + value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; @@ -216,19 +255,26 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) } if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), - 100, 10000)) - return -EBUSY; + 100, 10000)) { + data = -EBUSY; + goto err_disable_clks; + } writel(data, priv->ioaddr + mii_data); writel(value, priv->ioaddr + mii_address); if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), - 100, 10000)) - return -EBUSY; + 100, 10000)) { + data = -EBUSY; + goto err_disable_clks; + } /* Read the data from the MII data register */ data = (int)readl(priv->ioaddr + mii_data) & MII_DATA_MASK; +err_disable_clks: + pm_runtime_put(priv->device); + return data; } @@ -247,10 +293,16 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, struct stmmac_priv *priv = netdev_priv(ndev); unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; + int ret, data = phydata; u32 value = MII_BUSY; - int data = phydata; u32 v; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; @@ -275,16 +327,23 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), - 100, 10000)) - return -EBUSY; + 100, 10000)) { + ret = -EBUSY; + goto err_disable_clks; + } /* Set the MII address register to write */ writel(data, priv->ioaddr + mii_data); writel(value, priv->ioaddr + mii_address); /* Wait until any existing MII operation is complete */ - return readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), - 100, 10000); + ret = readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), + 100, 10000); + +err_disable_clks: + pm_runtime_put(priv->device); + + return ret; } /** diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 272cb47af9f2..95e0e4d6f74d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -198,8 +198,6 @@ static int stmmac_pci_probe(struct pci_dev *pdev, if (ret) return ret; - pci_enable_msi(pdev); - memset(&res, 0, sizeof(res)); res.addr = pcim_iomap_table(pdev)[i]; res.wol_irq = pdev->irq; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 6dc9f10414e4..1e17a23d9118 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -394,7 +394,7 @@ static int stmmac_of_get_mac_mode(struct device_node *np) * set some private fields that will be used by the main at runtime. */ struct plat_stmmacenet_data * -stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) +stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) { struct device_node *np = pdev->dev.of_node; struct plat_stmmacenet_data *plat; @@ -406,12 +406,12 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) if (!plat) return ERR_PTR(-ENOMEM); - *mac = of_get_mac_address(np); - if (IS_ERR(*mac)) { - if (PTR_ERR(*mac) == -EPROBE_DEFER) - return ERR_CAST(*mac); + rc = of_get_mac_address(np, mac); + if (rc) { + if (rc == -EPROBE_DEFER) + return ERR_PTR(rc); - *mac = NULL; + eth_zero_addr(mac); } plat->phy_interface = device_get_phy_mode(&pdev->dev); @@ -627,7 +627,7 @@ void stmmac_remove_config_dt(struct platform_device *pdev, } #else struct plat_stmmacenet_data * -stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) +stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) { return ERR_PTR(-EINVAL); } @@ -704,7 +704,6 @@ int stmmac_pltfr_remove(struct platform_device *pdev) } EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); -#ifdef CONFIG_PM_SLEEP /** * stmmac_pltfr_suspend * @dev: device pointer @@ -712,7 +711,7 @@ EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); * call the main suspend function and then, if required, on some platform, it * can call an exit helper. */ -static int stmmac_pltfr_suspend(struct device *dev) +static int __maybe_unused stmmac_pltfr_suspend(struct device *dev) { int ret; struct net_device *ndev = dev_get_drvdata(dev); @@ -733,7 +732,7 @@ static int stmmac_pltfr_suspend(struct device *dev) * the main resume function, on some platforms, it can call own init helper * if required. */ -static int stmmac_pltfr_resume(struct device *dev) +static int __maybe_unused stmmac_pltfr_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -744,10 +743,29 @@ static int stmmac_pltfr_resume(struct device *dev) return stmmac_resume(dev); } -#endif /* CONFIG_PM_SLEEP */ -SIMPLE_DEV_PM_OPS(stmmac_pltfr_pm_ops, stmmac_pltfr_suspend, - stmmac_pltfr_resume); +static int __maybe_unused stmmac_runtime_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + + stmmac_bus_clks_config(priv, false); + + return 0; +} + +static int __maybe_unused stmmac_runtime_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + + return stmmac_bus_clks_config(priv, true); +} + +const struct dev_pm_ops stmmac_pltfr_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_suspend, stmmac_pltfr_resume) + SET_RUNTIME_PM_OPS(stmmac_runtime_suspend, stmmac_runtime_resume, NULL) +}; EXPORT_SYMBOL_GPL(stmmac_pltfr_pm_ops); MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet platform support"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index 3a4663b7b460..3fff3f59d73d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -12,7 +12,7 @@ #include "stmmac.h" struct plat_stmmacenet_data * -stmmac_probe_config_dt(struct platform_device *pdev, const char **mac); +stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac); void stmmac_remove_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 0989e2bb6ee3..4e86cdf2bc9f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -9,6 +9,7 @@ *******************************************************************************/ #include "stmmac.h" #include "stmmac_ptp.h" +#include "dwmac4.h" /** * stmmac_adjust_freq @@ -134,7 +135,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, { struct stmmac_priv *priv = container_of(ptp, struct stmmac_priv, ptp_clock_ops); + void __iomem *ptpaddr = priv->ptpaddr; + void __iomem *ioaddr = priv->hw->pcsr; struct stmmac_pps_cfg *cfg; + u32 intr_value, acr_value; int ret = -EOPNOTSUPP; unsigned long flags; @@ -158,6 +162,37 @@ static int stmmac_enable(struct ptp_clock_info *ptp, priv->systime_flags); spin_unlock_irqrestore(&priv->ptp_lock, flags); break; + case PTP_CLK_REQ_EXTTS: + priv->plat->ext_snapshot_en = on; + mutex_lock(&priv->aux_ts_lock); + acr_value = readl(ptpaddr + PTP_ACR); + acr_value &= ~PTP_ACR_MASK; + if (on) { + /* Enable External snapshot trigger */ + acr_value |= priv->plat->ext_snapshot_num; + acr_value |= PTP_ACR_ATSFC; + netdev_dbg(priv->dev, "Auxiliary Snapshot %d enabled.\n", + priv->plat->ext_snapshot_num >> + PTP_ACR_ATSEN_SHIFT); + /* Enable Timestamp Interrupt */ + intr_value = readl(ioaddr + GMAC_INT_EN); + intr_value |= GMAC_INT_TSIE; + writel(intr_value, ioaddr + GMAC_INT_EN); + + } else { + netdev_dbg(priv->dev, "Auxiliary Snapshot %d disabled.\n", + priv->plat->ext_snapshot_num >> + PTP_ACR_ATSEN_SHIFT); + /* Disable Timestamp Interrupt */ + intr_value = readl(ioaddr + GMAC_INT_EN); + intr_value &= ~GMAC_INT_TSIE; + writel(intr_value, ioaddr + GMAC_INT_EN); + } + writel(acr_value, ptpaddr + PTP_ACR); + mutex_unlock(&priv->aux_ts_lock); + ret = 0; + break; + default: break; } @@ -165,13 +200,43 @@ static int stmmac_enable(struct ptp_clock_info *ptp, return ret; } +/** + * stmmac_get_syncdevicetime + * @device: current device time + * @system: system counter value read synchronously with device time + * @ctx: context provided by timekeeping code + * Description: Read device and system clock simultaneously and return the + * corrected clock values in ns. + **/ +static int stmmac_get_syncdevicetime(ktime_t *device, + struct system_counterval_t *system, + void *ctx) +{ + struct stmmac_priv *priv = (struct stmmac_priv *)ctx; + + if (priv->plat->crosststamp) + return priv->plat->crosststamp(device, system, ctx); + else + return -EOPNOTSUPP; +} + +static int stmmac_getcrosststamp(struct ptp_clock_info *ptp, + struct system_device_crosststamp *xtstamp) +{ + struct stmmac_priv *priv = + container_of(ptp, struct stmmac_priv, ptp_clock_ops); + + return get_device_system_crosststamp(stmmac_get_syncdevicetime, + priv, NULL, xtstamp); +} + /* structure describing a PTP hardware clock */ static struct ptp_clock_info stmmac_ptp_clock_ops = { .owner = THIS_MODULE, .name = "stmmac ptp", .max_adj = 62500000, .n_alarm = 0, - .n_ext_ts = 0, + .n_ext_ts = 0, /* will be overwritten in stmmac_ptp_register */ .n_per_out = 0, /* will be overwritten in stmmac_ptp_register */ .n_pins = 0, .pps = 0, @@ -180,6 +245,7 @@ static struct ptp_clock_info stmmac_ptp_clock_ops = { .gettime64 = stmmac_get_time, .settime64 = stmmac_set_time, .enable = stmmac_enable, + .getcrosststamp = stmmac_getcrosststamp, }; /** @@ -192,6 +258,9 @@ void stmmac_ptp_register(struct stmmac_priv *priv) { int i; + if (priv->plat->ptp_clk_freq_config) + priv->plat->ptp_clk_freq_config(priv); + for (i = 0; i < priv->dma_cap.pps_out_num; i++) { if (i >= STMMAC_PPS_MAX) break; @@ -202,8 +271,10 @@ void stmmac_ptp_register(struct stmmac_priv *priv) stmmac_ptp_clock_ops.max_adj = priv->plat->ptp_max_adj; stmmac_ptp_clock_ops.n_per_out = priv->dma_cap.pps_out_num; + stmmac_ptp_clock_ops.n_ext_ts = priv->dma_cap.aux_snapshot_n; spin_lock_init(&priv->ptp_lock); + mutex_init(&priv->aux_ts_lock); priv->ptp_clock_ops = stmmac_ptp_clock_ops; priv->ptp_clock = ptp_clock_register(&priv->ptp_clock_ops, @@ -229,4 +300,6 @@ void stmmac_ptp_unregister(struct stmmac_priv *priv) pr_debug("Removed PTP HW clock successfully on %s\n", priv->dev->name); } + + mutex_destroy(&priv->aux_ts_lock); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h index 7abb1d47e7da..53172a439810 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h @@ -23,6 +23,9 @@ #define PTP_STSUR 0x10 /* System Time – Seconds Update Reg */ #define PTP_STNSUR 0x14 /* System Time – Nanoseconds Update Reg */ #define PTP_TAR 0x18 /* Timestamp Addend Reg */ +#define PTP_ACR 0x40 /* Auxiliary Control Reg */ +#define PTP_ATNR 0x48 /* Auxiliary Timestamp - Nanoseconds Reg */ +#define PTP_ATSR 0x4c /* Auxiliary Timestamp - Seconds Reg */ #define PTP_STNSUR_ADDSUB_SHIFT 31 #define PTP_DIGITAL_ROLLOVER_MODE 0x3B9ACA00 /* 10e9-1 ns */ @@ -64,4 +67,25 @@ #define PTP_SSIR_SSINC_MASK 0xff #define GMAC4_PTP_SSIR_SSINC_SHIFT 16 +/* Auxiliary Control defines */ +#define PTP_ACR_ATSFC BIT(0) /* Auxiliary Snapshot FIFO Clear */ +#define PTP_ACR_ATSEN0 BIT(4) /* Auxiliary Snapshot 0 Enable */ +#define PTP_ACR_ATSEN1 BIT(5) /* Auxiliary Snapshot 1 Enable */ +#define PTP_ACR_ATSEN2 BIT(6) /* Auxiliary Snapshot 2 Enable */ +#define PTP_ACR_ATSEN3 BIT(7) /* Auxiliary Snapshot 3 Enable */ +#define PTP_ACR_ATSEN_SHIFT 5 /* Auxiliary Snapshot shift */ +#define PTP_ACR_MASK GENMASK(7, 4) /* Aux Snapshot Mask */ +#define PMC_ART_VALUE0 0x01 /* PMC_ART[15:0] timer value */ +#define PMC_ART_VALUE1 0x02 /* PMC_ART[31:16] timer value */ +#define PMC_ART_VALUE2 0x03 /* PMC_ART[47:32] timer value */ +#define PMC_ART_VALUE3 0x04 /* PMC_ART[63:48] timer value */ +#define GMAC4_ART_TIME_SHIFT 16 /* ART TIME 16-bits shift */ + +enum aux_snapshot { + AUX_SNAPSHOT0 = 0x10, + AUX_SNAPSHOT1 = 0x20, + AUX_SNAPSHOT2 = 0x40, + AUX_SNAPSHOT3 = 0x80, +}; + #endif /* __STMMAC_PTP_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 44bb133c3000..4e70efc45458 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -254,6 +254,16 @@ static int tc_init(struct stmmac_priv *priv) priv->flow_entries_max); } + if (!priv->plat->fpe_cfg) { + priv->plat->fpe_cfg = devm_kzalloc(priv->device, + sizeof(*priv->plat->fpe_cfg), + GFP_KERNEL); + if (!priv->plat->fpe_cfg) + return -ENOMEM; + } else { + memset(priv->plat->fpe_cfg, 0, sizeof(*priv->plat->fpe_cfg)); + } + /* Fail silently as we can still use remaining features, e.g. CBS */ if (!dma_cap->frpsel) return 0; @@ -297,6 +307,7 @@ static int tc_init(struct stmmac_priv *priv) dev_info(priv->device, "Enabling HW TC (entries=%d, max_off=%d)\n", priv->tc_entries_max, priv->tc_off_max); + return 0; } @@ -598,6 +609,87 @@ static int tc_del_flow(struct stmmac_priv *priv, return ret; } +#define VLAN_PRIO_FULL_MASK (0x07) + +static int tc_add_vlan_flow(struct stmmac_priv *priv, + struct flow_cls_offload *cls) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; + int tc = tc_classid_to_hwtc(priv->dev, cls->classid); + struct flow_match_vlan match; + + /* Nothing to do here */ + if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) + return -EINVAL; + + if (tc < 0) { + netdev_err(priv->dev, "Invalid traffic class\n"); + return -EINVAL; + } + + flow_rule_match_vlan(rule, &match); + + if (match.mask->vlan_priority) { + u32 prio; + + if (match.mask->vlan_priority != VLAN_PRIO_FULL_MASK) { + netdev_err(priv->dev, "Only full mask is supported for VLAN priority"); + return -EINVAL; + } + + prio = BIT(match.key->vlan_priority); + stmmac_rx_queue_prio(priv, priv->hw, prio, tc); + } + + return 0; +} + +static int tc_del_vlan_flow(struct stmmac_priv *priv, + struct flow_cls_offload *cls) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); + struct flow_dissector *dissector = rule->match.dissector; + int tc = tc_classid_to_hwtc(priv->dev, cls->classid); + + /* Nothing to do here */ + if (!dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) + return -EINVAL; + + if (tc < 0) { + netdev_err(priv->dev, "Invalid traffic class\n"); + return -EINVAL; + } + + stmmac_rx_queue_prio(priv, priv->hw, 0, tc); + + return 0; +} + +static int tc_add_flow_cls(struct stmmac_priv *priv, + struct flow_cls_offload *cls) +{ + int ret; + + ret = tc_add_flow(priv, cls); + if (!ret) + return ret; + + return tc_add_vlan_flow(priv, cls); +} + +static int tc_del_flow_cls(struct stmmac_priv *priv, + struct flow_cls_offload *cls) +{ + int ret; + + ret = tc_del_flow(priv, cls); + if (!ret) + return ret; + + return tc_del_vlan_flow(priv, cls); +} + static int tc_setup_cls(struct stmmac_priv *priv, struct flow_cls_offload *cls) { @@ -609,10 +701,10 @@ static int tc_setup_cls(struct stmmac_priv *priv, switch (cls->command) { case FLOW_CLS_REPLACE: - ret = tc_add_flow(priv, cls); + ret = tc_add_flow_cls(priv, cls); break; case FLOW_CLS_DESTROY: - ret = tc_del_flow(priv, cls); + ret = tc_del_flow_cls(priv, cls); break; default: return -EOPNOTSUPP; @@ -748,13 +840,10 @@ static int tc_setup_taprio(struct stmmac_priv *priv, if (fpe && !priv->dma_cap.fpesel) return -EOPNOTSUPP; - ret = stmmac_fpe_configure(priv, priv->ioaddr, - priv->plat->tx_queues_to_use, - priv->plat->rx_queues_to_use, fpe); - if (ret && fpe) { - netdev_err(priv->dev, "failed to enable Frame Preemption\n"); - return ret; - } + /* Actual FPE register configuration will be done after FPE handshake + * is success. + */ + priv->plat->fpe_cfg->enable = fpe; ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); @@ -764,12 +853,29 @@ static int tc_setup_taprio(struct stmmac_priv *priv, } netdev_info(priv->dev, "configured EST\n"); + + if (fpe) { + stmmac_fpe_handshake(priv, true); + netdev_info(priv->dev, "start FPE handshake\n"); + } + return 0; disable: priv->plat->est->enable = false; stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); + + priv->plat->fpe_cfg->enable = false; + stmmac_fpe_configure(priv, priv->ioaddr, + priv->plat->tx_queues_to_use, + priv->plat->rx_queues_to_use, + false); + netdev_info(priv->dev, "disabled FPE\n"); + + stmmac_fpe_handshake(priv, false); + netdev_info(priv->dev, "stop FPE handshake\n"); + return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c new file mode 100644 index 000000000000..105821b53020 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021, Intel Corporation. */ + +#include <net/xdp_sock_drv.h> + +#include "stmmac.h" +#include "stmmac_xdp.h" + +static int stmmac_xdp_enable_pool(struct stmmac_priv *priv, + struct xsk_buff_pool *pool, u16 queue) +{ + struct stmmac_channel *ch = &priv->channel[queue]; + bool need_update; + u32 frame_size; + int err; + + if (queue >= priv->plat->rx_queues_to_use || + queue >= priv->plat->tx_queues_to_use) + return -EINVAL; + + frame_size = xsk_pool_get_rx_frame_size(pool); + /* XDP ZC does not span multiple frame, make sure XSK pool buffer + * size can at least store Q-in-Q frame. + */ + if (frame_size < ETH_FRAME_LEN + VLAN_HLEN * 2) + return -EOPNOTSUPP; + + err = xsk_pool_dma_map(pool, priv->device, STMMAC_RX_DMA_ATTR); + if (err) { + netdev_err(priv->dev, "Failed to map xsk pool\n"); + return err; + } + + need_update = netif_running(priv->dev) && stmmac_xdp_is_enabled(priv); + + if (need_update) { + stmmac_disable_rx_queue(priv, queue); + stmmac_disable_tx_queue(priv, queue); + napi_disable(&ch->rx_napi); + napi_disable(&ch->tx_napi); + } + + set_bit(queue, priv->af_xdp_zc_qps); + + if (need_update) { + napi_enable(&ch->rxtx_napi); + stmmac_enable_rx_queue(priv, queue); + stmmac_enable_tx_queue(priv, queue); + + err = stmmac_xsk_wakeup(priv->dev, queue, XDP_WAKEUP_RX); + if (err) + return err; + } + + return 0; +} + +static int stmmac_xdp_disable_pool(struct stmmac_priv *priv, u16 queue) +{ + struct stmmac_channel *ch = &priv->channel[queue]; + struct xsk_buff_pool *pool; + bool need_update; + + if (queue >= priv->plat->rx_queues_to_use || + queue >= priv->plat->tx_queues_to_use) + return -EINVAL; + + pool = xsk_get_pool_from_qid(priv->dev, queue); + if (!pool) + return -EINVAL; + + need_update = netif_running(priv->dev) && stmmac_xdp_is_enabled(priv); + + if (need_update) { + stmmac_disable_rx_queue(priv, queue); + stmmac_disable_tx_queue(priv, queue); + synchronize_rcu(); + napi_disable(&ch->rxtx_napi); + } + + xsk_pool_dma_unmap(pool, STMMAC_RX_DMA_ATTR); + + clear_bit(queue, priv->af_xdp_zc_qps); + + if (need_update) { + napi_enable(&ch->rx_napi); + napi_enable(&ch->tx_napi); + stmmac_enable_rx_queue(priv, queue); + stmmac_enable_tx_queue(priv, queue); + } + + return 0; +} + +int stmmac_xdp_setup_pool(struct stmmac_priv *priv, struct xsk_buff_pool *pool, + u16 queue) +{ + return pool ? stmmac_xdp_enable_pool(priv, pool, queue) : + stmmac_xdp_disable_pool(priv, queue); +} + +int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = priv->dev; + struct bpf_prog *old_prog; + bool need_update; + bool if_running; + + if_running = netif_running(dev); + + if (prog && dev->mtu > ETH_DATA_LEN) { + /* For now, the driver doesn't support XDP functionality with + * jumbo frames so we return error. + */ + NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported"); + return -EOPNOTSUPP; + } + + need_update = !!priv->xdp_prog != !!prog; + if (if_running && need_update) + stmmac_release(dev); + + old_prog = xchg(&priv->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + /* Disable RX SPH for XDP operation */ + priv->sph = priv->sph_cap && !stmmac_xdp_is_enabled(priv); + + if (if_running && need_update) + stmmac_open(dev); + + return 0; +} diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h new file mode 100644 index 000000000000..896dc987d4ef --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021, Intel Corporation. */ + +#ifndef _STMMAC_XDP_H_ +#define _STMMAC_XDP_H_ + +#define STMMAC_MAX_RX_BUF_SIZE(num) (((num) * PAGE_SIZE) - XDP_PACKET_HEADROOM) +#define STMMAC_RX_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) + +int stmmac_xdp_setup_pool(struct stmmac_priv *priv, struct xsk_buff_pool *pool, + u16 queue); +int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog, + struct netlink_ext_ack *extack); + +#endif /* _STMMAC_XDP_H_ */ diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 9ff894ba8d3e..54f45d8c79a7 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -1599,6 +1599,7 @@ static inline int cas_mdio_link_not_up(struct cas *cp) cas_phy_write(cp, MII_BMCR, val); break; } + break; default: break; } diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 58f142ee78a3..9790656cf970 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -1674,8 +1674,8 @@ static void gem_init_phy(struct gem *gp) if (gp->pdev->vendor == PCI_VENDOR_ID_APPLE) { int i; - /* Those delay sucks, the HW seem to love them though, I'll - * serisouly consider breaking some locks here to be able + /* Those delays sucks, the HW seems to love them though, I'll + * seriously consider breaking some locks here to be able * to schedule instead */ for (i = 0; i < 3; i++) { diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 638d7b03be4b..6a67b026df0b 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1824,7 +1824,6 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) for_each_child_of_node(node, port_np) { struct am65_cpsw_port *port; - const void *mac_addr; u32 port_id; /* it is not a slave port node, continue */ @@ -1903,15 +1902,15 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) return ret; } - mac_addr = of_get_mac_address(port_np); - if (!IS_ERR(mac_addr)) { - ether_addr_copy(port->slave.mac_addr, mac_addr); - } else if (am65_cpsw_am654_get_efuse_macid(port_np, - port->port_id, - port->slave.mac_addr) || - !is_valid_ether_addr(port->slave.mac_addr)) { - random_ether_addr(port->slave.mac_addr); - dev_err(dev, "Use random MAC address\n"); + ret = of_get_mac_address(port_np, port->slave.mac_addr); + if (ret) { + am65_cpsw_am654_get_efuse_macid(port_np, + port->port_id, + port->slave.mac_addr); + if (!is_valid_ether_addr(port->slave.mac_addr)) { + random_ether_addr(port->slave.mac_addr); + dev_err(dev, "Use random MAC address\n"); + } } } of_node_put(node); diff --git a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c index d93ffd8a08b0..23cfb91e9c4d 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c +++ b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c @@ -385,7 +385,7 @@ static void am65_cpsw_switchdev_event_work(struct work_struct *work) fdb->addr, fdb->vid, fdb->added_by_user, fdb->offloaded, port_id); - if (!fdb->added_by_user) + if (!fdb->added_by_user || fdb->is_local) break; if (memcmp(port->slave.mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) port_id = HOST_PORT_NUM; @@ -401,7 +401,7 @@ static void am65_cpsw_switchdev_event_work(struct work_struct *work) fdb->addr, fdb->vid, fdb->added_by_user, fdb->offloaded, port_id); - if (!fdb->added_by_user) + if (!fdb->added_by_user || fdb->is_local) break; if (memcmp(port->slave.mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) port_id = HOST_PORT_NUM; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index fd966567464c..c0cd7de88316 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1123,25 +1123,23 @@ static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n, struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; struct xdp_frame *xdpf; - int i, drops = 0, port; + int i, nxmit = 0, port; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; for (i = 0; i < n; i++) { xdpf = frames[i]; - if (xdpf->len < CPSW_MIN_PACKET_SIZE) { - xdp_return_frame_rx_napi(xdpf); - drops++; - continue; - } + if (xdpf->len < CPSW_MIN_PACKET_SIZE) + break; port = priv->emac_port + cpsw->data.dual_emac; if (cpsw_xdp_tx_frame(priv, xdpf, NULL, port)) - drops++; + break; + nxmit++; } - return n - drops; + return nxmit; } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1298,7 +1296,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, for_each_available_child_of_node(node, slave_node) { struct cpsw_slave_data *slave_data = data->slave_data + i; - const void *mac_addr = NULL; int lenp; const __be32 *parp; @@ -1370,10 +1367,8 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } no_phy_slave: - mac_addr = of_get_mac_address(slave_node); - if (!IS_ERR(mac_addr)) { - ether_addr_copy(slave_data->mac_addr, mac_addr); - } else { + ret = of_get_mac_address(slave_node, slave_data->mac_addr); + if (ret) { ret = ti_cm_get_macid(&pdev->dev, i, slave_data->mac_addr); if (ret) diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 58a64313ac00..69b7a4e0220a 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1093,24 +1093,22 @@ static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n, { struct cpsw_priv *priv = netdev_priv(ndev); struct xdp_frame *xdpf; - int i, drops = 0; + int i, nxmit = 0; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; for (i = 0; i < n; i++) { xdpf = frames[i]; - if (xdpf->len < CPSW_MIN_PACKET_SIZE) { - xdp_return_frame_rx_napi(xdpf); - drops++; - continue; - } + if (xdpf->len < CPSW_MIN_PACKET_SIZE) + break; if (cpsw_xdp_tx_frame(priv, xdpf, NULL, priv->emac_port)) - drops++; + break; + nxmit++; } - return n - drops; + return nxmit; } static int cpsw_get_port_parent_id(struct net_device *ndev, @@ -1259,7 +1257,6 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw) for_each_child_of_node(tmp_node, port_np) { struct cpsw_slave_data *slave_data; - const void *mac_addr; u32 port_id; ret = of_property_read_u32(port_np, "reg", &port_id); @@ -1318,10 +1315,8 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw) goto err_node_put; } - mac_addr = of_get_mac_address(port_np); - if (!IS_ERR(mac_addr)) { - ether_addr_copy(slave_data->mac_addr, mac_addr); - } else { + ret = of_get_mac_address(port_np, slave_data->mac_addr); + if (ret) { ret = ti_cm_get_macid(dev, port_id - 1, slave_data->mac_addr); if (ret) diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index bb59e768915e..5862f0a4a975 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -1305,19 +1305,15 @@ int cpsw_xdp_tx_frame(struct cpsw_priv *priv, struct xdp_frame *xdpf, ret = cpdma_chan_submit_mapped(txch, cpsw_xdpf_to_handle(xdpf), dma, xdpf->len, port); } else { - if (sizeof(*xmeta) > xdpf->headroom) { - xdp_return_frame_rx_napi(xdpf); + if (sizeof(*xmeta) > xdpf->headroom) return -EINVAL; - } ret = cpdma_chan_submit(txch, cpsw_xdpf_to_handle(xdpf), xdpf->data, xdpf->len, port); } - if (ret) { + if (ret) priv->ndev->stats.tx_dropped++; - xdp_return_frame_rx_napi(xdpf); - } return ret; } @@ -1353,7 +1349,8 @@ int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp, if (unlikely(!xdpf)) goto drop; - cpsw_xdp_tx_frame(priv, xdpf, page, port); + if (cpsw_xdp_tx_frame(priv, xdpf, page, port)) + xdp_return_frame_rx_napi(xdpf); break; case XDP_REDIRECT: if (xdp_do_redirect(ndev, xdp, prog)) diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c index a72bb570756f..05a64fb7a04f 100644 --- a/drivers/net/ethernet/ti/cpsw_switchdev.c +++ b/drivers/net/ethernet/ti/cpsw_switchdev.c @@ -395,7 +395,7 @@ static void cpsw_switchdev_event_work(struct work_struct *work) fdb->addr, fdb->vid, fdb->added_by_user, fdb->offloaded, port); - if (!fdb->added_by_user) + if (!fdb->added_by_user || fdb->is_local) break; if (memcmp(priv->mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) port = HOST_PORT_NUM; @@ -411,7 +411,7 @@ static void cpsw_switchdev_event_work(struct work_struct *work) fdb->addr, fdb->vid, fdb->added_by_user, fdb->offloaded, port); - if (!fdb->added_by_user) + if (!fdb->added_by_user || fdb->is_local) break; if (memcmp(priv->mac_addr, (u8 *)fdb->addr, ETH_ALEN) == 0) port = HOST_PORT_NUM; diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index c7031e1960d4..f9417b44cae8 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -169,11 +169,11 @@ static const char emac_version_string[] = "TI DaVinci EMAC Linux v6.1"; /* EMAC mac_status register */ #define EMAC_MACSTATUS_TXERRCODE_MASK (0xF00000) #define EMAC_MACSTATUS_TXERRCODE_SHIFT (20) -#define EMAC_MACSTATUS_TXERRCH_MASK (0x7) +#define EMAC_MACSTATUS_TXERRCH_MASK (0x70000) #define EMAC_MACSTATUS_TXERRCH_SHIFT (16) #define EMAC_MACSTATUS_RXERRCODE_MASK (0xF000) #define EMAC_MACSTATUS_RXERRCODE_SHIFT (12) -#define EMAC_MACSTATUS_RXERRCH_MASK (0x7) +#define EMAC_MACSTATUS_RXERRCH_MASK (0x700) #define EMAC_MACSTATUS_RXERRCH_SHIFT (8) /* EMAC RX register masks */ @@ -1687,7 +1687,6 @@ davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv) const struct of_device_id *match; const struct emac_platform_data *auxdata; struct emac_platform_data *pdata = NULL; - const u8 *mac_addr; if (!IS_ENABLED(CONFIG_OF) || !pdev->dev.of_node) return dev_get_platdata(&pdev->dev); @@ -1699,11 +1698,8 @@ davinci_emac_of_get_pdata(struct platform_device *pdev, struct emac_priv *priv) np = pdev->dev.of_node; pdata->version = EMAC_VERSION_2; - if (!is_valid_ether_addr(pdata->mac_addr)) { - mac_addr = of_get_mac_address(np); - if (!IS_ERR(mac_addr)) - ether_addr_copy(pdata->mac_addr, mac_addr); - } + if (!is_valid_ether_addr(pdata->mac_addr)) + of_get_mac_address(np, pdata->mac_addr); of_property_read_u32(np, "ti,davinci-ctrl-reg-offset", &pdata->ctrl_reg_offset); diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index d7a144b4a09f..9030e619e543 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1966,7 +1966,6 @@ static int netcp_create_interface(struct netcp_device *netcp_device, struct resource res; void __iomem *efuse = NULL; u32 efuse_mac = 0; - const void *mac_addr; u8 efuse_mac_addr[6]; u32 temp[2]; int ret = 0; @@ -2036,10 +2035,8 @@ static int netcp_create_interface(struct netcp_device *netcp_device, devm_iounmap(dev, efuse); devm_release_mem_region(dev, res.start, size); } else { - mac_addr = of_get_mac_address(node_interface); - if (!IS_ERR(mac_addr)) - ether_addr_copy(ndev->dev_addr, mac_addr); - else + ret = of_get_mac_address(node_interface, ndev->dev_addr); + if (ret) eth_random_addr(ndev->dev_addr); } diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index d5a75ef7e3ca..226a76633e65 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -146,7 +146,8 @@ spider_net_read_phy(struct net_device *netdev, int mii_id, int reg) /* we don't use semaphores to wait for an SPIDER_NET_GPROPCMPINT * interrupt, as we poll for the completion of the read operation - * in spider_net_read_phy. Should take about 50 us */ + * in spider_net_read_phy. Should take about 50 us + */ do { readvalue = spider_net_read_reg(card, SPIDER_NET_GPCROPCMD); } while (readvalue & SPIDER_NET_GPREXEC); @@ -387,7 +388,8 @@ spider_net_prepare_rx_descr(struct spider_net_card *card, (~(SPIDER_NET_RXBUF_ALIGN - 1)); /* and we need to have it 128 byte aligned, therefore we allocate a - * bit more */ + * bit more + */ /* allocate an skb */ descr->skb = netdev_alloc_skb(card->netdev, bufsize + SPIDER_NET_RXBUF_ALIGN - 1); @@ -488,7 +490,8 @@ spider_net_refill_rx_chain(struct spider_net_card *card) /* one context doing the refill (and a second context seeing that * and omitting it) is ok. If called by NAPI, we'll be called again * as spider_net_decode_one_descr is called several times. If some - * interrupt calls us, the NAPI is about to clean up anyway. */ + * interrupt calls us, the NAPI is about to clean up anyway. + */ if (!spin_trylock_irqsave(&chain->lock, flags)) return; @@ -523,14 +526,16 @@ spider_net_alloc_rx_skbs(struct spider_net_card *card) /* Put at least one buffer into the chain. if this fails, * we've got a problem. If not, spider_net_refill_rx_chain - * will do the rest at the end of this function. */ + * will do the rest at the end of this function. + */ if (spider_net_prepare_rx_descr(card, chain->head)) goto error; else chain->head = chain->head->next; /* This will allocate the rest of the rx buffers; - * if not, it's business as usual later on. */ + * if not, it's business as usual later on. + */ spider_net_refill_rx_chain(card); spider_net_enable_rxdmac(card); return 0; @@ -706,7 +711,8 @@ spider_net_set_low_watermark(struct spider_net_card *card) int i; /* Measure the length of the queue. Measurement does not - * need to be precise -- does not need a lock. */ + * need to be precise -- does not need a lock. + */ while (descr != card->tx_chain.head) { status = descr->hwdescr->dmac_cmd_status & SPIDER_NET_DESCR_NOT_IN_USE; if (status == SPIDER_NET_DESCR_NOT_IN_USE) @@ -786,7 +792,8 @@ spider_net_release_tx_chain(struct spider_net_card *card, int brutal) /* fallthrough, if we release the descriptors * brutally (then we don't care about - * SPIDER_NET_DESCR_CARDOWNED) */ + * SPIDER_NET_DESCR_CARDOWNED) + */ fallthrough; case SPIDER_NET_DESCR_RESPONSE_ERROR: @@ -948,7 +955,8 @@ spider_net_pass_skb_up(struct spider_net_descr *descr, skb_put(skb, hwdescr->valid_size); /* the card seems to add 2 bytes of junk in front - * of the ethernet frame */ + * of the ethernet frame + */ #define SPIDER_MISALIGN 2 skb_pull(skb, SPIDER_MISALIGN); skb->protocol = eth_type_trans(skb, netdev); @@ -1382,7 +1390,8 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg, /* PHY read operation completed */ /* we don't use semaphores, as we poll for the completion * of the read operation in spider_net_read_phy. Should take - * about 50 us */ + * about 50 us + */ show_error = 0; break; case SPIDER_NET_GPWFFINT: @@ -1450,7 +1459,8 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg, { case SPIDER_NET_GTMFLLINT: /* TX RAM full may happen on a usual case. - * Logging is not needed. */ + * Logging is not needed. + */ show_error = 0; break; case SPIDER_NET_GRFDFLLINT: @@ -1694,7 +1704,8 @@ spider_net_enable_card(struct spider_net_card *card) { int i; /* the following array consists of (register),(value) pairs - * that are set in this function. A register of 0 ends the list */ + * that are set in this function. A register of 0 ends the list + */ u32 regs[][2] = { { SPIDER_NET_GRESUMINTNUM, 0 }, { SPIDER_NET_GREINTNUM, 0 }, @@ -1757,7 +1768,8 @@ spider_net_enable_card(struct spider_net_card *card) spider_net_write_reg(card, SPIDER_NET_ECMODE, SPIDER_NET_ECMODE_VALUE); /* set chain tail address for RX chains and - * enable DMA */ + * enable DMA + */ spider_net_enable_rxchtails(card); spider_net_enable_rxdmac(card); @@ -1995,7 +2007,8 @@ static void spider_net_link_phy(struct timer_list *t) case BCM54XX_UNKNOWN: /* copper, fiber with and without failed, - * retry from beginning */ + * retry from beginning + */ spider_net_setup_aneg(card); card->medium = BCM54XX_COPPER; break; @@ -2263,7 +2276,8 @@ spider_net_setup_netdev(struct spider_net_card *card) netdev->features |= NETIF_F_RXCSUM; netdev->features |= NETIF_F_IP_CSUM | NETIF_F_LLTX; /* some time: NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | - * NETIF_F_HW_VLAN_CTAG_FILTER */ + * NETIF_F_HW_VLAN_CTAG_FILTER + */ /* MTU range: 64 - 2294 */ netdev->min_mtu = SPIDER_NET_MIN_MTU; diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 7a6e5ff8e5d4..fedb2bf69261 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -1914,7 +1914,8 @@ tc35815_set_multicast_list(struct net_device *dev) if (dev->flags & IFF_PROMISC) { /* With some (all?) 100MHalf HUB, controller will hang - * if we enabled promiscuous mode before linkup... */ + * if we enabled promiscuous mode before linkup... + */ struct tc35815_local *lp = netdev_priv(dev); if (!lp->link) diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index b65767f9e499..fecc4d7b00b0 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2525,7 +2525,7 @@ static int velocity_close(struct net_device *dev) * @skb: buffer to transmit * @dev: network device * - * Called by the networ layer to request a packet is queued to + * Called by the network layer to request a packet is queued to * the velocity. Returns zero on success. */ static netdev_tx_t velocity_xmit(struct sk_buff *skb, diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c index 2b4126d2427d..2b84848dc26a 100644 --- a/drivers/net/ethernet/wiznet/w5100-spi.c +++ b/drivers/net/ethernet/wiznet/w5100-spi.c @@ -423,8 +423,14 @@ static int w5100_spi_probe(struct spi_device *spi) const struct of_device_id *of_id; const struct w5100_ops *ops; kernel_ulong_t driver_data; + const void *mac = NULL; + u8 tmpmac[ETH_ALEN]; int priv_size; - const void *mac = of_get_mac_address(spi->dev.of_node); + int ret; + + ret = of_get_mac_address(spi->dev.of_node, tmpmac); + if (!ret) + mac = tmpmac; if (spi->dev.of_node) { of_id = of_match_device(w5100_of_match, &spi->dev); diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index c0d181a7f83a..ec5db481c9cd 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -1157,7 +1157,7 @@ int w5100_probe(struct device *dev, const struct w5100_ops *ops, INIT_WORK(&priv->setrx_work, w5100_setrx_work); INIT_WORK(&priv->restart_work, w5100_restart_work); - if (!IS_ERR_OR_NULL(mac_addr)) + if (mac_addr) memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); else eth_hw_addr_random(ndev); diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index c6eb7f2368aa..911b5ef9e680 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -18,12 +18,14 @@ if NET_VENDOR_XILINX config XILINX_EMACLITE tristate "Xilinx 10/100 Ethernet Lite support" + depends on HAS_IOMEM select PHYLIB help This driver supports the 10/100 Ethernet Lite from Xilinx. config XILINX_AXI_EMAC tristate "Xilinx 10/100/1000 AXI Ethernet support" + depends on HAS_IOMEM select PHYLINK help This driver supports the 10/100/1000 Ethernet from Xilinx for the @@ -31,6 +33,7 @@ config XILINX_AXI_EMAC config XILINX_LL_TEMAC tristate "Xilinx LL TEMAC (LocalLink Tri-mode Ethernet MAC) driver" + depends on HAS_IOMEM select PHYLIB help This driver supports the Xilinx 10/100/1000 LocalLink TEMAC diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 030185301014..a1f5f07f4ca9 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -438,7 +438,7 @@ static void temac_do_set_mac_address(struct net_device *ndev) static int temac_init_mac_address(struct net_device *ndev, const void *address) { - ether_addr_copy(ndev->dev_addr, address); + memcpy(ndev->dev_addr, address, ETH_ALEN); if (!is_valid_ether_addr(ndev->dev_addr)) eth_hw_addr_random(ndev); temac_do_set_mac_address(ndev); @@ -1351,7 +1351,7 @@ static int temac_probe(struct platform_device *pdev) struct device_node *temac_np = dev_of_node(&pdev->dev), *dma_np; struct temac_local *lp; struct net_device *ndev; - const void *addr; + u8 addr[ETH_ALEN]; __be32 *p; bool little_endian; int rc = 0; @@ -1542,8 +1542,8 @@ static int temac_probe(struct platform_device *pdev) if (temac_np) { /* Retrieve the MAC address */ - addr = of_get_mac_address(temac_np); - if (IS_ERR(addr)) { + rc = of_get_mac_address(temac_np, addr); + if (rc) { dev_err(&pdev->dev, "could not find MAC address\n"); return -ENODEV; } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 1e966a39967e..5b4d153b1492 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -376,6 +376,8 @@ struct axidma_bd { struct sk_buff *skb; } __aligned(XAXIDMA_BD_MINIMUM_ALIGNMENT); +#define XAE_NUM_MISC_CLOCKS 3 + /** * struct axienet_local - axienet private per device data * @ndev: Pointer for net_device to which it will be attached. @@ -385,7 +387,8 @@ struct axidma_bd { * @phylink_config: phylink configuration settings * @pcs_phy: Reference to PCS/PMA PHY if used * @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core - * @clk: Clock for AXI bus + * @axi_clk: AXI4-Lite bus clock + * @misc_clks: Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks) * @mii_bus: Pointer to MII bus structure * @mii_clk_div: MII bus clock divider value * @regs_start: Resource start for axienet device addresses @@ -434,7 +437,8 @@ struct axienet_local { bool switch_x_sgmii; - struct clk *clk; + struct clk *axi_clk; + struct clk_bulk_data misc_clks[XAE_NUM_MISC_CLOCKS]; struct mii_bus *mii_bus; u8 mii_clk_div; @@ -504,6 +508,18 @@ static inline u32 axinet_ior_read_mcr(struct axienet_local *lp) return axienet_ior(lp, XAE_MDIO_MCR_OFFSET); } +static inline void axienet_lock_mii(struct axienet_local *lp) +{ + if (lp->mii_bus) + mutex_lock(&lp->mii_bus->mdio_lock); +} + +static inline void axienet_unlock_mii(struct axienet_local *lp) +{ + if (lp->mii_bus) + mutex_unlock(&lp->mii_bus->mdio_lock); +} + /** * axienet_iow - Memory mapped Axi Ethernet register write * @lp: Pointer to axienet local structure diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 3a8775e0ca55..b508c9453f40 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1053,9 +1053,9 @@ static int axienet_open(struct net_device *ndev) * including the MDIO. MDIO must be disabled before resetting. * Hold MDIO bus lock to avoid MDIO accesses during the reset. */ - mutex_lock(&lp->mii_bus->mdio_lock); + axienet_lock_mii(lp); ret = axienet_device_reset(ndev); - mutex_unlock(&lp->mii_bus->mdio_lock); + axienet_unlock_mii(lp); ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0); if (ret) { @@ -1148,9 +1148,9 @@ static int axienet_stop(struct net_device *ndev) } /* Do a reset to ensure DMA is really stopped */ - mutex_lock(&lp->mii_bus->mdio_lock); + axienet_lock_mii(lp); __axienet_device_reset(lp); - mutex_unlock(&lp->mii_bus->mdio_lock); + axienet_unlock_mii(lp); cancel_work_sync(&lp->dma_err_task); @@ -1709,9 +1709,9 @@ static void axienet_dma_err_handler(struct work_struct *work) * including the MDIO. MDIO must be disabled before resetting. * Hold MDIO bus lock to avoid MDIO accesses during the reset. */ - mutex_lock(&lp->mii_bus->mdio_lock); + axienet_lock_mii(lp); __axienet_device_reset(lp); - mutex_unlock(&lp->mii_bus->mdio_lock); + axienet_unlock_mii(lp); for (i = 0; i < lp->tx_bd_num; i++) { cur_p = &lp->tx_bd_v[i]; @@ -1835,8 +1835,8 @@ static int axienet_probe(struct platform_device *pdev) struct device_node *np; struct axienet_local *lp; struct net_device *ndev; - const void *mac_addr; struct resource *ethres; + u8 mac_addr[ETH_ALEN]; int addr_width = 32; u32 value; @@ -1863,24 +1863,41 @@ static int axienet_probe(struct platform_device *pdev) lp->rx_bd_num = RX_BD_NUM_DEFAULT; lp->tx_bd_num = TX_BD_NUM_DEFAULT; - lp->clk = devm_clk_get_optional(&pdev->dev, NULL); - if (IS_ERR(lp->clk)) { - ret = PTR_ERR(lp->clk); + lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk"); + if (!lp->axi_clk) { + /* For backward compatibility, if named AXI clock is not present, + * treat the first clock specified as the AXI clock. + */ + lp->axi_clk = devm_clk_get_optional(&pdev->dev, NULL); + } + if (IS_ERR(lp->axi_clk)) { + ret = PTR_ERR(lp->axi_clk); goto free_netdev; } - ret = clk_prepare_enable(lp->clk); + ret = clk_prepare_enable(lp->axi_clk); if (ret) { - dev_err(&pdev->dev, "Unable to enable clock: %d\n", ret); + dev_err(&pdev->dev, "Unable to enable AXI clock: %d\n", ret); goto free_netdev; } + lp->misc_clks[0].id = "axis_clk"; + lp->misc_clks[1].id = "ref_clk"; + lp->misc_clks[2].id = "mgt_clk"; + + ret = devm_clk_bulk_get_optional(&pdev->dev, XAE_NUM_MISC_CLOCKS, lp->misc_clks); + if (ret) + goto cleanup_clk; + + ret = clk_bulk_prepare_enable(XAE_NUM_MISC_CLOCKS, lp->misc_clks); + if (ret) + goto cleanup_clk; + /* Map device registers */ ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); lp->regs = devm_ioremap_resource(&pdev->dev, ethres); if (IS_ERR(lp->regs)) { - dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n"); ret = PTR_ERR(lp->regs); - goto free_netdev; + goto cleanup_clk; } lp->regs_start = ethres->start; @@ -1958,18 +1975,18 @@ static int axienet_probe(struct platform_device *pdev) break; default: ret = -EINVAL; - goto free_netdev; + goto cleanup_clk; } } else { ret = of_get_phy_mode(pdev->dev.of_node, &lp->phy_mode); if (ret) - goto free_netdev; + goto cleanup_clk; } if (lp->switch_x_sgmii && lp->phy_mode != PHY_INTERFACE_MODE_SGMII && lp->phy_mode != PHY_INTERFACE_MODE_1000BASEX) { dev_err(&pdev->dev, "xlnx,switch-x-sgmii only supported with SGMII or 1000BaseX\n"); ret = -EINVAL; - goto free_netdev; + goto cleanup_clk; } /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */ @@ -1982,7 +1999,7 @@ static int axienet_probe(struct platform_device *pdev) dev_err(&pdev->dev, "unable to get DMA resource\n"); of_node_put(np); - goto free_netdev; + goto cleanup_clk; } lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares); @@ -2002,12 +2019,12 @@ static int axienet_probe(struct platform_device *pdev) if (IS_ERR(lp->dma_regs)) { dev_err(&pdev->dev, "could not map DMA regs\n"); ret = PTR_ERR(lp->dma_regs); - goto free_netdev; + goto cleanup_clk; } if ((lp->rx_irq <= 0) || (lp->tx_irq <= 0)) { dev_err(&pdev->dev, "could not determine irqs\n"); ret = -ENOMEM; - goto free_netdev; + goto cleanup_clk; } /* Autodetect the need for 64-bit DMA pointers. @@ -2037,7 +2054,7 @@ static int axienet_probe(struct platform_device *pdev) ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width)); if (ret) { dev_err(&pdev->dev, "No suitable DMA available\n"); - goto free_netdev; + goto cleanup_clk; } /* Check for Ethernet core IRQ (optional) */ @@ -2045,13 +2062,14 @@ static int axienet_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Ethernet core IRQ not defined\n"); /* Retrieve the MAC address */ - mac_addr = of_get_mac_address(pdev->dev.of_node); - if (IS_ERR(mac_addr)) { - dev_warn(&pdev->dev, "could not find MAC address property: %ld\n", - PTR_ERR(mac_addr)); - mac_addr = NULL; + ret = of_get_mac_address(pdev->dev.of_node, mac_addr); + if (!ret) { + axienet_set_mac_address(ndev, mac_addr); + } else { + dev_warn(&pdev->dev, "could not find MAC address property: %d\n", + ret); + axienet_set_mac_address(ndev, NULL); } - axienet_set_mac_address(ndev, mac_addr); lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; @@ -2068,12 +2086,12 @@ static int axienet_probe(struct platform_device *pdev) if (!lp->phy_node) { dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n"); ret = -EINVAL; - goto free_netdev; + goto cleanup_mdio; } lp->pcs_phy = of_mdio_find_device(lp->phy_node); if (!lp->pcs_phy) { ret = -EPROBE_DEFER; - goto free_netdev; + goto cleanup_mdio; } lp->phylink_config.pcs_poll = true; } @@ -2087,17 +2105,31 @@ static int axienet_probe(struct platform_device *pdev) if (IS_ERR(lp->phylink)) { ret = PTR_ERR(lp->phylink); dev_err(&pdev->dev, "phylink_create error (%i)\n", ret); - goto free_netdev; + goto cleanup_mdio; } ret = register_netdev(lp->ndev); if (ret) { dev_err(lp->dev, "register_netdev() error (%i)\n", ret); - goto free_netdev; + goto cleanup_phylink; } return 0; +cleanup_phylink: + phylink_destroy(lp->phylink); + +cleanup_mdio: + if (lp->pcs_phy) + put_device(&lp->pcs_phy->dev); + if (lp->mii_bus) + axienet_mdio_teardown(lp); + of_node_put(lp->phy_node); + +cleanup_clk: + clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks); + clk_disable_unprepare(lp->axi_clk); + free_netdev: free_netdev(ndev); @@ -2119,7 +2151,8 @@ static int axienet_remove(struct platform_device *pdev) axienet_mdio_teardown(lp); - clk_disable_unprepare(lp->clk); + clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks); + clk_disable_unprepare(lp->axi_clk); of_node_put(lp->phy_node); lp->phy_node = NULL; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 9c014cee34b2..48f544f6c999 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -159,8 +159,8 @@ int axienet_mdio_enable(struct axienet_local *lp) lp->mii_clk_div = 0; - if (lp->clk) { - host_clock = clk_get_rate(lp->clk); + if (lp->axi_clk) { + host_clock = clk_get_rate(lp->axi_clk); } else { struct device_node *np1; diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 007840d4a807..d9d58a7dabee 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1115,7 +1115,6 @@ static int xemaclite_of_probe(struct platform_device *ofdev) struct net_device *ndev = NULL; struct net_local *lp = NULL; struct device *dev = &ofdev->dev; - const void *mac_address; int rc = 0; @@ -1157,12 +1156,9 @@ static int xemaclite_of_probe(struct platform_device *ofdev) lp->next_rx_buf_to_use = 0x0; lp->tx_ping_pong = get_bool(ofdev, "xlnx,tx-ping-pong"); lp->rx_ping_pong = get_bool(ofdev, "xlnx,rx-ping-pong"); - mac_address = of_get_mac_address(ofdev->dev.of_node); - if (!IS_ERR(mac_address)) { - /* Set the MAC address. */ - ether_addr_copy(ndev->dev_addr, mac_address); - } else { + rc = of_get_mac_address(ofdev->dev.of_node, ndev->dev_addr); + if (rc) { dev_warn(dev, "No MAC address found, using random\n"); eth_hw_addr_random(ndev); } diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index 3e337142b516..2049d76a0e68 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -798,8 +798,6 @@ xirc2ps_config(struct pcmcia_device * link) goto config_error; } port_found: - if (err) - goto config_error; /**************** * Now allocate an interrupt line. Note that this does not diff --git a/drivers/net/ethernet/xscale/Kconfig b/drivers/net/ethernet/xscale/Kconfig index 7b83a6e5d894..468ffe3d1707 100644 --- a/drivers/net/ethernet/xscale/Kconfig +++ b/drivers/net/ethernet/xscale/Kconfig @@ -22,6 +22,7 @@ config IXP4XX_ETH tristate "Intel IXP4xx Ethernet support" depends on ARM && ARCH_IXP4XX && IXP4XX_NPE && IXP4XX_QMGR select PHYLIB + select OF_MDIO if OF select NET_PTP_CLASSIFY help Say Y here if you want to use built-in Ethernet ports diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 0152f1e70783..cb89323855d8 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -28,6 +28,7 @@ #include <linux/kernel.h> #include <linux/net_tstamp.h> #include <linux/of.h> +#include <linux/of_mdio.h> #include <linux/phy.h> #include <linux/platform_data/eth_ixp4xx.h> #include <linux/platform_device.h> @@ -165,7 +166,6 @@ struct eth_regs { }; struct port { - struct resource *mem_res; struct eth_regs __iomem *regs; struct npe *npe; struct net_device *netdev; @@ -250,6 +250,7 @@ static inline void memcpy_swab32(u32 *dest, u32 *src, int cnt) static DEFINE_SPINLOCK(mdio_lock); static struct eth_regs __iomem *mdio_regs; /* mdio command and status only */ static struct mii_bus *mdio_bus; +static struct device_node *mdio_bus_np; static int ports_open; static struct port *npe_port_tab[MAX_NPES]; static struct dma_pool *dma_pool; @@ -533,7 +534,8 @@ static int ixp4xx_mdio_register(struct eth_regs __iomem *regs) mdio_bus->write = &ixp4xx_mdio_write; snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "ixp4xx-eth-0"); - if ((err = mdiobus_register(mdio_bus))) + err = of_mdiobus_register(mdio_bus, mdio_bus_np); + if (err) mdiobus_free(mdio_bus); return err; } @@ -1085,7 +1087,7 @@ static int init_queues(struct port *port) int i; if (!ports_open) { - dma_pool = dma_pool_create(DRV_NAME, port->netdev->dev.parent, + dma_pool = dma_pool_create(DRV_NAME, &port->netdev->dev, POOL_ALLOC_SIZE, 32, 0); if (!dma_pool) return -ENOMEM; @@ -1358,19 +1360,118 @@ static const struct net_device_ops ixp4xx_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; +#ifdef CONFIG_OF +static struct eth_plat_info *ixp4xx_of_get_platdata(struct device *dev) +{ + struct device_node *np = dev->of_node; + struct of_phandle_args queue_spec; + struct of_phandle_args npe_spec; + struct device_node *mdio_np; + struct eth_plat_info *plat; + int ret; + + plat = devm_kzalloc(dev, sizeof(*plat), GFP_KERNEL); + if (!plat) + return NULL; + + ret = of_parse_phandle_with_fixed_args(np, "intel,npe-handle", 1, 0, + &npe_spec); + if (ret) { + dev_err(dev, "no NPE engine specified\n"); + return NULL; + } + /* NPE ID 0x00, 0x10, 0x20... */ + plat->npe = (npe_spec.args[0] << 4); + + /* Check if this device has an MDIO bus */ + mdio_np = of_get_child_by_name(np, "mdio"); + if (mdio_np) { + plat->has_mdio = true; + mdio_bus_np = mdio_np; + /* DO NOT put the mdio_np, it will be used */ + } + + /* Get the rx queue as a resource from queue manager */ + ret = of_parse_phandle_with_fixed_args(np, "queue-rx", 1, 0, + &queue_spec); + if (ret) { + dev_err(dev, "no rx queue phandle\n"); + return NULL; + } + plat->rxq = queue_spec.args[0]; + + /* Get the txready queue as resource from queue manager */ + ret = of_parse_phandle_with_fixed_args(np, "queue-txready", 1, 0, + &queue_spec); + if (ret) { + dev_err(dev, "no txready queue phandle\n"); + return NULL; + } + plat->txreadyq = queue_spec.args[0]; + + return plat; +} +#else +static struct eth_plat_info *ixp4xx_of_get_platdata(struct device *dev) +{ + return NULL; +} +#endif + static int ixp4xx_eth_probe(struct platform_device *pdev) { - char phy_id[MII_BUS_ID_SIZE + 3]; struct phy_device *phydev = NULL; struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; struct eth_plat_info *plat; - resource_size_t regs_phys; struct net_device *ndev; struct resource *res; struct port *port; int err; - plat = dev_get_platdata(dev); + if (np) { + plat = ixp4xx_of_get_platdata(dev); + if (!plat) + return -ENODEV; + } else { + plat = dev_get_platdata(dev); + if (!plat) + return -ENODEV; + plat->npe = pdev->id; + switch (plat->npe) { + case IXP4XX_ETH_NPEA: + /* If the MDIO bus is not up yet, defer probe */ + break; + case IXP4XX_ETH_NPEB: + /* On all except IXP43x, NPE-B is used for the MDIO bus. + * If there is no NPE-B in the feature set, bail out, + * else we have the MDIO bus here. + */ + if (!cpu_is_ixp43x()) { + if (!(ixp4xx_read_feature_bits() & + IXP4XX_FEATURE_NPEB_ETH0)) + return -ENODEV; + /* Else register the MDIO bus on NPE-B */ + plat->has_mdio = true; + } + break; + case IXP4XX_ETH_NPEC: + /* IXP43x lacks NPE-B and uses NPE-C for the MDIO bus + * access, if there is no NPE-C, no bus, nothing works, + * so bail out. + */ + if (cpu_is_ixp43x()) { + if (!(ixp4xx_read_feature_bits() & + IXP4XX_FEATURE_NPEC_ETH)) + return -ENODEV; + /* Else register the MDIO bus on NPE-B */ + plat->has_mdio = true; + } + break; + default: + return -ENODEV; + } + } if (!(ndev = devm_alloc_etherdev(dev, sizeof(struct port)))) return -ENOMEM; @@ -1378,75 +1479,42 @@ static int ixp4xx_eth_probe(struct platform_device *pdev) SET_NETDEV_DEV(ndev, dev); port = netdev_priv(ndev); port->netdev = ndev; - port->id = pdev->id; + port->id = plat->npe; /* Get the port resource and remap */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) return -ENODEV; - regs_phys = res->start; port->regs = devm_ioremap_resource(dev, res); if (IS_ERR(port->regs)) return PTR_ERR(port->regs); - switch (port->id) { - case IXP4XX_ETH_NPEA: - /* If the MDIO bus is not up yet, defer probe */ - if (!mdio_bus) - return -EPROBE_DEFER; - break; - case IXP4XX_ETH_NPEB: - /* - * On all except IXP43x, NPE-B is used for the MDIO bus. - * If there is no NPE-B in the feature set, bail out, else - * register the MDIO bus. - */ - if (!cpu_is_ixp43x()) { - if (!(ixp4xx_read_feature_bits() & - IXP4XX_FEATURE_NPEB_ETH0)) - return -ENODEV; - /* Else register the MDIO bus on NPE-B */ - if ((err = ixp4xx_mdio_register(port->regs))) - return err; - } - if (!mdio_bus) - return -EPROBE_DEFER; - break; - case IXP4XX_ETH_NPEC: - /* - * IXP43x lacks NPE-B and uses NPE-C for the MDIO bus access, - * of there is no NPE-C, no bus, nothing works, so bail out. - */ - if (cpu_is_ixp43x()) { - if (!(ixp4xx_read_feature_bits() & - IXP4XX_FEATURE_NPEC_ETH)) - return -ENODEV; - /* Else register the MDIO bus on NPE-C */ - if ((err = ixp4xx_mdio_register(port->regs))) - return err; + /* Register the MDIO bus if we have it */ + if (plat->has_mdio) { + err = ixp4xx_mdio_register(port->regs); + if (err) { + dev_err(dev, "failed to register MDIO bus\n"); + return err; } - if (!mdio_bus) - return -EPROBE_DEFER; - break; - default: - return -ENODEV; } + /* If the instance with the MDIO bus has not yet appeared, + * defer probing until it gets probed. + */ + if (!mdio_bus) + return -EPROBE_DEFER; ndev->netdev_ops = &ixp4xx_netdev_ops; ndev->ethtool_ops = &ixp4xx_ethtool_ops; ndev->tx_queue_len = 100; + /* Inherit the DMA masks from the platform device */ + ndev->dev.dma_mask = dev->dma_mask; + ndev->dev.coherent_dma_mask = dev->coherent_dma_mask; netif_napi_add(ndev, &port->napi, eth_poll, NAPI_WEIGHT); if (!(port->npe = npe_request(NPE_ID(port->id)))) return -EIO; - port->mem_res = request_mem_region(regs_phys, REGS_SIZE, ndev->name); - if (!port->mem_res) { - err = -EBUSY; - goto err_npe_rel; - } - port->plat = plat; npe_port_tab[NPE_ID(port->id)] = port; memcpy(ndev->dev_addr, plat->hwaddr, ETH_ALEN); @@ -1459,12 +1527,24 @@ static int ixp4xx_eth_probe(struct platform_device *pdev) __raw_writel(DEFAULT_CORE_CNTRL, &port->regs->core_control); udelay(50); - snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, - mdio_bus->id, plat->phy); - phydev = phy_connect(ndev, phy_id, &ixp4xx_adjust_link, - PHY_INTERFACE_MODE_MII); - if (IS_ERR(phydev)) { - err = PTR_ERR(phydev); + if (np) { + phydev = of_phy_get_and_connect(ndev, np, ixp4xx_adjust_link); + } else { + phydev = mdiobus_get_phy(mdio_bus, plat->phy); + if (IS_ERR(phydev)) { + err = PTR_ERR(phydev); + dev_err(dev, "could not connect phydev (%d)\n", err); + goto err_free_mem; + } + err = phy_connect_direct(ndev, phydev, ixp4xx_adjust_link, + PHY_INTERFACE_MODE_MII); + if (err) + goto err_free_mem; + + } + if (!phydev) { + err = -ENODEV; + dev_err(dev, "no phydev\n"); goto err_free_mem; } @@ -1482,8 +1562,6 @@ err_phy_dis: phy_disconnect(phydev); err_free_mem: npe_port_tab[NPE_ID(port->id)] = NULL; - release_resource(port->mem_res); -err_npe_rel: npe_release(port->npe); return err; } @@ -1499,12 +1577,21 @@ static int ixp4xx_eth_remove(struct platform_device *pdev) ixp4xx_mdio_remove(); npe_port_tab[NPE_ID(port->id)] = NULL; npe_release(port->npe); - release_resource(port->mem_res); return 0; } +static const struct of_device_id ixp4xx_eth_of_match[] = { + { + .compatible = "intel,ixp4xx-ethernet", + }, + { }, +}; + static struct platform_driver ixp4xx_eth_driver = { - .driver.name = DRV_NAME, + .driver = { + .name = DRV_NAME, + .of_match_table = of_match_ptr(ixp4xx_eth_of_match), + }, .probe = ixp4xx_eth_probe, .remove = ixp4xx_eth_remove, }; |
