diff options
100 files changed, 1965 insertions, 1265 deletions
diff --git a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml index ee7a65b528cd..d1e2bca3c503 100644 --- a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml @@ -58,18 +58,18 @@ allOf: - const: timing-adjustment amlogic,tx-delay-ns: - $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 2, 4, 6] + default: 2 description: - The internal RGMII TX clock delay (provided by this driver) in - nanoseconds. Allowed values are 0ns, 2ns, 4ns, 6ns. - When phy-mode is set to "rgmii" then the TX delay should be - explicitly configured. When not configured a fallback of 2ns is - used. When the phy-mode is set to either "rgmii-id" or "rgmii-txid" - the TX clock delay is already provided by the PHY. In that case - this property should be set to 0ns (which disables the TX clock - delay in the MAC to prevent the clock from going off because both - PHY and MAC are adding a delay). - Any configuration is ignored when the phy-mode is set to "rmii". + The internal RGMII TX clock delay (provided by this driver) + in nanoseconds. When phy-mode is set to "rgmii" then the TX + delay should be explicitly configured. When the phy-mode is + set to either "rgmii-id" or "rgmii-txid" the TX clock delay + is already provided by the PHY. In that case this property + should be set to 0ns (which disables the TX clock delay in + the MAC to prevent the clock from going off because both + PHY and MAC are adding a delay). Any configuration is + ignored when the phy-mode is set to "rmii". amlogic,rx-delay-ns: deprecated: true diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 6932d8c043c2..6fc1961492b7 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -44,6 +44,7 @@ Contents: marvell/octeon_ep marvell/octeon_ep_vf mellanox/mlx5/index + meta/fbnic microsoft/netvsc neterion/s2io netronome/nfp diff --git a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst new file mode 100644 index 000000000000..32ff114f5c26 --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst @@ -0,0 +1,29 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +===================================== +Meta Platforms Host Network Interface +===================================== + +Firmware Versions +----------------- + +fbnic has three components stored on the flash which are provided in one PLDM +image: + +1. fw - The control firmware used to view and modify firmware settings, request + firmware actions, and retrieve firmware counters outside of the data path. + This is the firmware which fbnic_fw.c interacts with. +2. bootloader - The firmware which validate firmware security and control basic + operations including loading and updating the firmware. This is also known + as the cmrt firmware. +3. undi - This is the UEFI driver which is based on the Linux driver. + +fbnic stores two copies of these three components on flash. This allows fbnic +to fall back to an older version of firmware automatically in case firmware +fails to boot. Version information for both is provided as running and stored. +The undi is only provided in stored as it is not actively running once the Linux +driver takes over. + +devlink dev info provides version information for all three components. In +addition to the version the hg commit hash of the build is included as a +separate entry. diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index 5e93cd71f99f..8199e6917671 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -158,7 +158,8 @@ SOF_TIMESTAMPING_SYS_HARDWARE: SOF_TIMESTAMPING_RAW_HARDWARE: Report hardware timestamps as generated by - SOF_TIMESTAMPING_TX_HARDWARE when available. + SOF_TIMESTAMPING_TX_HARDWARE or SOF_TIMESTAMPING_RX_HARDWARE + when available. 1.3.3 Timestamp Options @@ -266,6 +267,23 @@ SOF_TIMESTAMPING_OPT_TX_SWHW: two separate messages will be looped to the socket's error queue, each containing just one timestamp. +SOF_TIMESTAMPING_OPT_RX_FILTER: + Filter out spurious receive timestamps: report a receive timestamp + only if the matching timestamp generation flag is enabled. + + Receive timestamps are generated early in the ingress path, before a + packet's destination socket is known. If any socket enables receive + timestamps, packets for all socket will receive timestamped packets. + Including those that request timestamp reporting with + SOF_TIMESTAMPING_SOFTWARE and/or SOF_TIMESTAMPING_RAW_HARDWARE, but + do not request receive timestamp generation. This can happen when + requesting transmit timestamps only. + + Receiving spurious timestamps is generally benign. A process can + ignore the unexpected non-zero value. But it makes behavior subtly + dependent on other sockets. This flag isolates the socket for more + deterministic behavior. + New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate regardless of the setting of sysctl net.core.tstamp_allow_data. diff --git a/MAINTAINERS b/MAINTAINERS index ca1469d52076..4053168fdc12 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14833,6 +14833,7 @@ M: Alexander Duyck <alexanderduyck@fb.com> M: Jakub Kicinski <kuba@kernel.org> R: kernel-team@meta.com S: Supported +F: Documentation/networking/device_drivers/ethernet/meta/ F: drivers/net/ethernet/meta/ METHODE UDPU SUPPORT diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 47ab4ccd6fc1..b560644ee1b1 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5886,9 +5886,6 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev, if (real_dev) { ret = ethtool_get_ts_info_by_layer(real_dev, info); } else { - info->phc_index = -1; - info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; /* Check if all slaves support software tx timestamping */ rcu_read_lock(); bond_for_each_slave_rcu(bond, slave, iter) { diff --git a/drivers/net/can/cc770/cc770_platform.c b/drivers/net/can/cc770/cc770_platform.c index 13bcfba05f18..f2424fe58612 100644 --- a/drivers/net/can/cc770/cc770_platform.c +++ b/drivers/net/can/cc770/cc770_platform.c @@ -70,17 +70,10 @@ static void cc770_platform_write_reg(const struct cc770_priv *priv, int reg, static int cc770_get_of_node_data(struct platform_device *pdev, struct cc770_priv *priv) { + u32 clkext = CC770_PLATFORM_CAN_CLOCK, clkout = 0; struct device_node *np = pdev->dev.of_node; - const u32 *prop; - int prop_size; - u32 clkext; - - prop = of_get_property(np, "bosch,external-clock-frequency", - &prop_size); - if (prop && (prop_size == sizeof(u32))) - clkext = *prop; - else - clkext = CC770_PLATFORM_CAN_CLOCK; /* default */ + + of_property_read_u32(np, "bosch,external-clock-frequency", &clkext); priv->can.clock.freq = clkext; /* The system clock may not exceed 10 MHz */ @@ -98,7 +91,7 @@ static int cc770_get_of_node_data(struct platform_device *pdev, if (of_property_read_bool(np, "bosch,iso-low-speed-mux")) priv->cpu_interface |= CPUIF_MUX; - if (!of_get_property(np, "bosch,no-comperator-bypass", NULL)) + if (!of_property_read_bool(np, "bosch,no-comperator-bypass")) priv->bus_config |= BUSCFG_CBY; if (of_property_read_bool(np, "bosch,disconnect-rx0-input")) priv->bus_config |= BUSCFG_DR0; @@ -109,25 +102,22 @@ static int cc770_get_of_node_data(struct platform_device *pdev, if (of_property_read_bool(np, "bosch,polarity-dominant")) priv->bus_config |= BUSCFG_POL; - prop = of_get_property(np, "bosch,clock-out-frequency", &prop_size); - if (prop && (prop_size == sizeof(u32)) && *prop > 0) { - u32 cdv = clkext / *prop; - int slew; + of_property_read_u32(np, "bosch,clock-out-frequency", &clkout); + if (clkout > 0) { + u32 cdv = clkext / clkout; if (cdv > 0 && cdv < 16) { + u32 slew; + priv->cpu_interface |= CPUIF_CEN; priv->clkout |= (cdv - 1) & CLKOUT_CD_MASK; - prop = of_get_property(np, "bosch,slew-rate", - &prop_size); - if (prop && (prop_size == sizeof(u32))) { - slew = *prop; - } else { + if (of_property_read_u32(np, "bosch,slew-rate", &slew)) { /* Determine default slew rate */ slew = (CLKOUT_SL_MASK >> CLKOUT_SL_SHIFT) - ((cdv * clkext - 1) / 8000000); - if (slew < 0) + if (slew > (CLKOUT_SL_MASK >> CLKOUT_SL_SHIFT)) slew = 0; } priv->clkout |= (slew << CLKOUT_SL_SHIFT) & diff --git a/drivers/net/can/rockchip/rockchip_canfd-timestamp.c b/drivers/net/can/rockchip/rockchip_canfd-timestamp.c index 81cccc5fd838..fb1a8f4e6217 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-timestamp.c +++ b/drivers/net/can/rockchip/rockchip_canfd-timestamp.c @@ -71,7 +71,7 @@ void rkcanfd_timestamp_init(struct rkcanfd_priv *priv) max_cycles = div_u64(ULLONG_MAX, cc->mult); max_cycles = min(max_cycles, cc->mask); - work_delay_ns = clocksource_cyc2ns(max_cycles, cc->mult, cc->shift) / 3; + work_delay_ns = div_u64(clocksource_cyc2ns(max_cycles, cc->mult, cc->shift), 3); priv->work_delay_jiffies = nsecs_to_jiffies(work_delay_ns); INIT_DELAYED_WORK(&priv->timestamp, rkcanfd_timestamp_work); diff --git a/drivers/net/can/rockchip/rockchip_canfd-tx.c b/drivers/net/can/rockchip/rockchip_canfd-tx.c index f954f38b955f..865a15e033a9 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-tx.c +++ b/drivers/net/can/rockchip/rockchip_canfd-tx.c @@ -63,7 +63,7 @@ void rkcanfd_xmit_retry(struct rkcanfd_priv *priv) rkcanfd_start_xmit_write_cmd(priv, reg_cmd); } -int rkcanfd_start_xmit(struct sk_buff *skb, struct net_device *ndev) +netdev_tx_t rkcanfd_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct rkcanfd_priv *priv = netdev_priv(ndev); u32 reg_frameinfo, reg_id, reg_cmd; diff --git a/drivers/net/can/rockchip/rockchip_canfd.h b/drivers/net/can/rockchip/rockchip_canfd.h index 3efd7f174e14..93131c7d7f54 100644 --- a/drivers/net/can/rockchip/rockchip_canfd.h +++ b/drivers/net/can/rockchip/rockchip_canfd.h @@ -546,7 +546,7 @@ void rkcanfd_timestamp_stop_sync(struct rkcanfd_priv *priv); unsigned int rkcanfd_get_effective_tx_free(const struct rkcanfd_priv *priv); void rkcanfd_xmit_retry(struct rkcanfd_priv *priv); -int rkcanfd_start_xmit(struct sk_buff *skb, struct net_device *ndev); +netdev_tx_t rkcanfd_start_xmit(struct sk_buff *skb, struct net_device *ndev); void rkcanfd_handle_tx_done_one(struct rkcanfd_priv *priv, const u32 ts, unsigned int *frame_len_p); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 21407a26f806..5fc94c2f638e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -582,16 +582,12 @@ static int xgbe_get_ts_info(struct net_device *netdev, struct xgbe_prv_data *pdata = netdev_priv(netdev); ts_info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; if (pdata->ptp_clock) ts_info->phc_index = ptp_clock_index(pdata->ptp_clock); - else - ts_info->phc_index = -1; ts_info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON); ts_info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | diff --git a/drivers/net/ethernet/atheros/Kconfig b/drivers/net/ethernet/atheros/Kconfig index 482c58c4c584..bec5cdf8d1da 100644 --- a/drivers/net/ethernet/atheros/Kconfig +++ b/drivers/net/ethernet/atheros/Kconfig @@ -6,7 +6,7 @@ config NET_VENDOR_ATHEROS bool "Atheros devices" default y - depends on (PCI || ATH79) + depends on PCI || ATH79 || COMPILE_TEST help If you have a network (Ethernet) card belonging to this class, say Y. @@ -19,7 +19,7 @@ if NET_VENDOR_ATHEROS config AG71XX tristate "Atheros AR7XXX/AR9XXX built-in ethernet mac support" - depends on ATH79 + depends on ATH79 || COMPILE_TEST select PHYLINK imply NET_SELFTESTS help diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index db2a8ade6205..96a6189cc31e 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -149,11 +149,11 @@ #define FIFO_CFG4_MC BIT(8) /* Multicast Packet */ #define FIFO_CFG4_BC BIT(9) /* Broadcast Packet */ #define FIFO_CFG4_DR BIT(10) /* Dribble */ -#define FIFO_CFG4_LE BIT(11) /* Long Event */ -#define FIFO_CFG4_CF BIT(12) /* Control Frame */ -#define FIFO_CFG4_PF BIT(13) /* Pause Frame */ -#define FIFO_CFG4_UO BIT(14) /* Unsupported Opcode */ -#define FIFO_CFG4_VT BIT(15) /* VLAN tag detected */ +#define FIFO_CFG4_CF BIT(11) /* Control Frame */ +#define FIFO_CFG4_PF BIT(12) /* Pause Frame */ +#define FIFO_CFG4_UO BIT(13) /* Unsupported Opcode */ +#define FIFO_CFG4_VT BIT(14) /* VLAN tag detected */ +#define FIFO_CFG4_LE BIT(15) /* Long Event */ #define FIFO_CFG4_FT BIT(16) /* Frame Truncated */ #define FIFO_CFG4_UC BIT(17) /* Unicast Packet */ #define FIFO_CFG4_INIT (FIFO_CFG4_DE | FIFO_CFG4_DV | FIFO_CFG4_FC | \ @@ -168,28 +168,28 @@ #define FIFO_CFG5_DV BIT(1) /* RX_DV Event */ #define FIFO_CFG5_FC BIT(2) /* False Carrier */ #define FIFO_CFG5_CE BIT(3) /* Code Error */ -#define FIFO_CFG5_LM BIT(4) /* Length Mismatch */ -#define FIFO_CFG5_LO BIT(5) /* Length Out of Range */ -#define FIFO_CFG5_OK BIT(6) /* Packet is OK */ -#define FIFO_CFG5_MC BIT(7) /* Multicast Packet */ -#define FIFO_CFG5_BC BIT(8) /* Broadcast Packet */ -#define FIFO_CFG5_DR BIT(9) /* Dribble */ -#define FIFO_CFG5_CF BIT(10) /* Control Frame */ -#define FIFO_CFG5_PF BIT(11) /* Pause Frame */ -#define FIFO_CFG5_UO BIT(12) /* Unsupported Opcode */ -#define FIFO_CFG5_VT BIT(13) /* VLAN tag detected */ -#define FIFO_CFG5_LE BIT(14) /* Long Event */ -#define FIFO_CFG5_FT BIT(15) /* Frame Truncated */ -#define FIFO_CFG5_16 BIT(16) /* unknown */ -#define FIFO_CFG5_17 BIT(17) /* unknown */ +#define FIFO_CFG5_CR BIT(4) /* CRC error */ +#define FIFO_CFG5_LM BIT(5) /* Length Mismatch */ +#define FIFO_CFG5_LO BIT(6) /* Length Out of Range */ +#define FIFO_CFG5_OK BIT(7) /* Packet is OK */ +#define FIFO_CFG5_MC BIT(8) /* Multicast Packet */ +#define FIFO_CFG5_BC BIT(9) /* Broadcast Packet */ +#define FIFO_CFG5_DR BIT(10) /* Dribble */ +#define FIFO_CFG5_CF BIT(11) /* Control Frame */ +#define FIFO_CFG5_PF BIT(12) /* Pause Frame */ +#define FIFO_CFG5_UO BIT(13) /* Unsupported Opcode */ +#define FIFO_CFG5_VT BIT(14) /* VLAN tag detected */ +#define FIFO_CFG5_LE BIT(15) /* Long Event */ +#define FIFO_CFG5_FT BIT(16) /* Frame Truncated */ +#define FIFO_CFG5_UC BIT(17) /* Unicast Packet */ #define FIFO_CFG5_SF BIT(18) /* Short Frame */ #define FIFO_CFG5_BM BIT(19) /* Byte Mode */ #define FIFO_CFG5_INIT (FIFO_CFG5_DE | FIFO_CFG5_DV | FIFO_CFG5_FC | \ - FIFO_CFG5_CE | FIFO_CFG5_LO | FIFO_CFG5_OK | \ - FIFO_CFG5_MC | FIFO_CFG5_BC | FIFO_CFG5_DR | \ - FIFO_CFG5_CF | FIFO_CFG5_PF | FIFO_CFG5_VT | \ - FIFO_CFG5_LE | FIFO_CFG5_FT | FIFO_CFG5_16 | \ - FIFO_CFG5_17 | FIFO_CFG5_SF) + FIFO_CFG5_CE | FIFO_CFG5_LM | FIFO_CFG5_LO | \ + FIFO_CFG5_OK | FIFO_CFG5_MC | FIFO_CFG5_BC | \ + FIFO_CFG5_DR | FIFO_CFG5_CF | FIFO_CFG5_UO | \ + FIFO_CFG5_VT | FIFO_CFG5_LE | FIFO_CFG5_FT | \ + FIFO_CFG5_UC | FIFO_CFG5_SF) #define AG71XX_REG_TX_CTRL 0x0180 #define TX_CTRL_TXE BIT(0) /* Tx Enable */ @@ -379,7 +379,6 @@ struct ag71xx { u32 fifodata[3]; int mac_idx; - struct reset_control *mdio_reset; struct clk *clk_mdio; }; @@ -509,8 +508,7 @@ static void ag71xx_ethtool_get_strings(struct net_device *netdev, u32 sset, switch (sset) { case ETH_SS_STATS: for (i = 0; i < ARRAY_SIZE(ag71xx_statistics); i++) - memcpy(data + i * ETH_GSTRING_LEN, - ag71xx_statistics[i].name, ETH_GSTRING_LEN); + ethtool_puts(&data, ag71xx_statistics[i].name); break; case ETH_SS_TEST: net_selftest_get_strings(data); @@ -690,6 +688,7 @@ static int ag71xx_mdio_probe(struct ag71xx *ag) { struct device *dev = &ag->pdev->dev; struct net_device *ndev = ag->ndev; + struct reset_control *mdio_reset; static struct mii_bus *mii_bus; struct device_node *np, *mnp; int err; @@ -706,10 +705,10 @@ static int ag71xx_mdio_probe(struct ag71xx *ag) if (!mii_bus) return -ENOMEM; - ag->mdio_reset = of_reset_control_get_exclusive(np, "mdio"); - if (IS_ERR(ag->mdio_reset)) { + mdio_reset = devm_reset_control_get_exclusive(dev, "mdio"); + if (IS_ERR(mdio_reset)) { netif_err(ag, probe, ndev, "Failed to get reset mdio.\n"); - return PTR_ERR(ag->mdio_reset); + return PTR_ERR(mdio_reset); } mii_bus->name = "ag71xx_mdio"; @@ -720,12 +719,10 @@ static int ag71xx_mdio_probe(struct ag71xx *ag) mii_bus->parent = dev; snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%s.%d", np->name, ag->mac_idx); - if (!IS_ERR(ag->mdio_reset)) { - reset_control_assert(ag->mdio_reset); - msleep(100); - reset_control_deassert(ag->mdio_reset); - msleep(200); - } + reset_control_assert(mdio_reset); + msleep(100); + reset_control_deassert(mdio_reset); + msleep(200); mnp = of_get_child_by_name(np, "mdio"); err = devm_of_mdiobus_register(dev, mii_bus, mnp); @@ -1853,6 +1850,12 @@ static int ag71xx_probe(struct platform_device *pdev) if (!ag->mac_base) return -ENOMEM; + /* ensure that HW is in manual polling mode before interrupts are + * activated. Otherwise ag71xx_interrupt might call napi_schedule + * before it is initialized by netif_napi_add. + */ + ag71xx_int_disable(ag, AG71XX_INT_POLL); + ndev->irq = platform_get_irq(pdev, 0); err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt, 0x0, dev_name(&pdev->dev), ndev); @@ -2033,4 +2036,5 @@ static struct platform_driver ag71xx_driver = { }; module_platform_driver(ag71xx_driver); +MODULE_DESCRIPTION("Atheros AR71xx built-in ethernet mac driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c9248ed9330c..6e422e24750a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13803,6 +13803,7 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, int max_rx, max_tx, max_cp, tx_sets = 1, tx_cp; struct bnxt_hw_rings hwr = {0}; int rx_rings = rx; + int rc; if (tcs) tx_sets = tcs; @@ -13835,7 +13836,23 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs, } if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) hwr.cp_p5 = hwr.tx + rx; - return bnxt_hwrm_check_rings(bp, &hwr); + rc = bnxt_hwrm_check_rings(bp, &hwr); + if (!rc && pci_msix_can_alloc_dyn(bp->pdev)) { + if (!bnxt_ulp_registered(bp->edev)) { + hwr.cp += bnxt_get_ulp_msix_num(bp); + hwr.cp = min_t(int, hwr.cp, bnxt_get_max_func_irqs(bp)); + } + if (hwr.cp > bp->total_irqs) { + int total_msix = bnxt_change_msix(bp, hwr.cp); + + if (total_msix < hwr.cp) { + netdev_warn(bp->dev, "Unable to allocate %d MSIX vectors, maximum available %d\n", + hwr.cp, total_msix); + rc = -ENOSPC; + } + } + } + return rc; } static void bnxt_unmap_bars(struct bnxt *bp, struct pci_dev *pdev) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 3b805ed433ed..69231e85140b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1217,12 +1217,15 @@ struct bnxt_napi { bool in_reset; }; +/* "TxRx", 2 hypens, plus maximum integer */ +#define BNXT_IRQ_NAME_EXTRA 17 + struct bnxt_irq { irq_handler_t handler; unsigned int vector; u8 requested:1; u8 have_cpumask:1; - char name[IFNAMSIZ + 2]; + char name[IFNAMSIZ + BNXT_IRQ_NAME_EXTRA]; cpumask_var_t cpu_mask; }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 7392a716f28d..f71cc8188b4e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -955,11 +955,6 @@ static int bnxt_set_channels(struct net_device *dev, } tx_xdp = req_rx_rings; } - rc = bnxt_check_rings(bp, req_tx_rings, req_rx_rings, sh, tcs, tx_xdp); - if (rc) { - netdev_warn(dev, "Unable to allocate the requested rings\n"); - return rc; - } if (bnxt_get_nr_rss_ctxs(bp, req_rx_rings) != bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) && @@ -968,6 +963,12 @@ static int bnxt_set_channels(struct net_device *dev, return -EINVAL; } + rc = bnxt_check_rings(bp, req_tx_rings, req_rx_rings, sh, tcs, tx_xdp); + if (rc) { + netdev_warn(dev, "Unable to allocate the requested rings\n"); + return rc; + } + if (netif_running(dev)) { if (BNXT_PF(bp)) { /* TODO CHIMP_FW: Send message to all VF's @@ -5043,11 +5044,8 @@ static int bnxt_get_ts_info(struct net_device *dev, struct bnxt_ptp_cfg *ptp; ptp = bp->ptp_cfg; - info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE; - info->phc_index = -1; if (!ptp) return 0; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index b9e7d3e7b15d..fdd6356f21ef 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -176,11 +176,17 @@ EXPORT_SYMBOL(bnxt_unregister_dev); static int bnxt_set_dflt_ulp_msix(struct bnxt *bp) { - u32 roce_msix = BNXT_VF(bp) ? - BNXT_MAX_VF_ROCE_MSIX : BNXT_MAX_ROCE_MSIX; + int roce_msix = BNXT_MAX_ROCE_MSIX; - return ((bp->flags & BNXT_FLAG_ROCE_CAP) ? - min_t(u32, roce_msix, num_online_cpus()) : 0); + if (BNXT_VF(bp)) + roce_msix = BNXT_MAX_ROCE_MSIX_VF; + else if (bp->port_partition_type) + roce_msix = BNXT_MAX_ROCE_MSIX_NPAR_PF; + + /* NQ MSIX vectors should match the number of CPUs plus 1 more for + * the CREQ MSIX, up to the default. + */ + return min_t(int, roce_msix, num_online_cpus() + 1); } int bnxt_send_msg(struct bnxt_en_dev *edev, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index 4eafe6ec0abf..4f4914f5c84c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -15,8 +15,10 @@ #define BNXT_MIN_ROCE_CP_RINGS 2 #define BNXT_MIN_ROCE_STAT_CTXS 1 -#define BNXT_MAX_ROCE_MSIX 9 -#define BNXT_MAX_VF_ROCE_MSIX 2 + +#define BNXT_MAX_ROCE_MSIX_VF 2 +#define BNXT_MAX_ROCE_MSIX_NPAR_PF 5 +#define BNXT_MAX_ROCE_MSIX 64 struct hwrm_async_event_cmpl; struct bnxt; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 0ec5f01551f9..378815917741 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6145,9 +6145,7 @@ static int tg3_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info { struct tg3 *tp = netdev_priv(dev); - info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE; if (tg3_flag(tp, PTP_CAPABLE)) { info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE | @@ -6157,8 +6155,6 @@ static int tg3_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info if (tp->ptp_clock) info->phc_index = ptp_clock_index(tp->ptp_clock); - else - info->phc_index = -1; info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON); diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 8e1e4b2b2386..f06babec04a0 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3410,8 +3410,6 @@ static int gem_get_ts_info(struct net_device *dev, info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; @@ -3423,7 +3421,8 @@ static int gem_get_ts_info(struct net_device *dev, (1 << HWTSTAMP_FILTER_NONE) | (1 << HWTSTAMP_FILTER_ALL); - info->phc_index = bp->ptp_clock ? ptp_clock_index(bp->ptp_clock) : -1; + if (bp->ptp_clock) + info->phc_index = ptp_clock_index(bp->ptp_clock); return 0; } diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index 5835965dbc32..c849e2c871a9 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -2496,37 +2496,31 @@ ret_intrmod: return ret; } +#ifdef PTP_HARDWARE_TIMESTAMPING static int lio_get_ts_info(struct net_device *netdev, struct kernel_ethtool_ts_info *info) { struct lio *lio = GET_LIO(netdev); info->so_timestamping = -#ifdef PTP_HARDWARE_TIMESTAMPING SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE | - SOF_TIMESTAMPING_TX_SOFTWARE | -#endif - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; + SOF_TIMESTAMPING_TX_SOFTWARE; if (lio->ptp_clock) info->phc_index = ptp_clock_index(lio->ptp_clock); - else - info->phc_index = -1; -#ifdef PTP_HARDWARE_TIMESTAMPING info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON); info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | (1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT) | (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT); -#endif return 0; } +#endif /* Return register dump len. */ static int lio_get_regs_len(struct net_device *dev) @@ -3146,7 +3140,9 @@ static const struct ethtool_ops lio_ethtool_ops = { .set_coalesce = lio_set_intr_coalesce, .get_priv_flags = lio_get_priv_flags, .set_priv_flags = lio_set_priv_flags, +#ifdef PTP_HARDWARE_TIMESTAMPING .get_ts_info = lio_get_ts_info, +#endif }; static const struct ethtool_ops lio_vf_ethtool_ops = { @@ -3169,7 +3165,9 @@ static const struct ethtool_ops lio_vf_ethtool_ops = { .set_coalesce = lio_set_intr_coalesce, .get_priv_flags = lio_get_priv_flags, .set_priv_flags = lio_set_priv_flags, +#ifdef PTP_HARDWARE_TIMESTAMPING .get_ts_info = lio_get_ts_info, +#endif }; void liquidio_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index 6a04d2530176..d0ff0c170b1a 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -844,8 +844,6 @@ static int nicvf_get_ts_info(struct net_device *netdev, return ethtool_op_get_ts_info(netdev, info); info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index f2f1055880b2..31685ee304c6 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -601,9 +601,7 @@ static int enic_set_rxfh(struct net_device *netdev, static int enic_get_ts_info(struct net_device *netdev, struct kernel_ethtool_ts_info *info) { - info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE; return 0; } diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c index 7f081e6e8c87..ba83dbf4ed22 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c @@ -1042,12 +1042,9 @@ static int fun_set_rxfh(struct net_device *netdev, static int fun_get_ts_info(struct net_device *netdev, struct kernel_ethtool_ts_info *info) { - info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_RX_HARDWARE | + info->so_timestamping = SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RAW_HARDWARE; - info->phc_index = -1; info->tx_types = BIT(HWTSTAMP_TX_OFF); info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL); return 0; diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index a19d098f2e2b..6ace55837172 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -418,8 +418,8 @@ do_retry: static void emac_hash_mc(struct emac_instance *dev) { + u32 __iomem *gaht_base = emac_gaht_base(dev); const int regs = EMAC_XAHT_REGS(dev); - u32 *gaht_base = emac_gaht_base(dev); u32 gaht_temp[EMAC_XAHT_MAX_REGS]; struct netdev_hw_addr *ha; int i; diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 295516b07662..d8664bd65e1f 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -400,7 +400,7 @@ static inline int emac_has_feature(struct emac_instance *dev, ((u32)(1 << (EMAC_XAHT_WIDTH(dev) - 1)) >> \ ((slot) & (u32)(EMAC_XAHT_WIDTH(dev) - 1))) -static inline u32 *emac_xaht_base(struct emac_instance *dev) +static inline u32 __iomem *emac_xaht_base(struct emac_instance *dev) { struct emac_regs __iomem *p = dev->emacp; int offset; @@ -413,10 +413,10 @@ static inline u32 *emac_xaht_base(struct emac_instance *dev) else offset = offsetof(struct emac_regs, u0.emac4.iaht1); - return (u32 *)((ptrdiff_t)p + offset); + return (u32 __iomem *)((__force ptrdiff_t)p + offset); } -static inline u32 *emac_gaht_base(struct emac_instance *dev) +static inline u32 __iomem *emac_gaht_base(struct emac_instance *dev) { /* GAHT registers always come after an identical number of * IAHT registers. diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c index e87049dfd223..ef05ae8f5039 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c @@ -10,6 +10,56 @@ #define FBNIC_SN_STR_LEN 24 +static int fbnic_version_running_put(struct devlink_info_req *req, + struct fbnic_fw_ver *fw_ver, + char *ver_name) +{ + char running_ver[FBNIC_FW_VER_MAX_SIZE]; + int err; + + fbnic_mk_fw_ver_str(fw_ver->version, running_ver); + err = devlink_info_version_running_put(req, ver_name, running_ver); + if (err) + return err; + + if (strlen(fw_ver->commit) > 0) { + char commit_name[FBNIC_SN_STR_LEN]; + + snprintf(commit_name, FBNIC_SN_STR_LEN, "%s.commit", ver_name); + err = devlink_info_version_running_put(req, commit_name, + fw_ver->commit); + if (err) + return err; + } + + return 0; +} + +static int fbnic_version_stored_put(struct devlink_info_req *req, + struct fbnic_fw_ver *fw_ver, + char *ver_name) +{ + char stored_ver[FBNIC_FW_VER_MAX_SIZE]; + int err; + + fbnic_mk_fw_ver_str(fw_ver->version, stored_ver); + err = devlink_info_version_stored_put(req, ver_name, stored_ver); + if (err) + return err; + + if (strlen(fw_ver->commit) > 0) { + char commit_name[FBNIC_SN_STR_LEN]; + + snprintf(commit_name, FBNIC_SN_STR_LEN, "%s.commit", ver_name); + err = devlink_info_version_stored_put(req, commit_name, + fw_ver->commit); + if (err) + return err; + } + + return 0; +} + static int fbnic_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack) @@ -17,6 +67,31 @@ static int fbnic_devlink_info_get(struct devlink *devlink, struct fbnic_dev *fbd = devlink_priv(devlink); int err; + err = fbnic_version_running_put(req, &fbd->fw_cap.running.mgmt, + DEVLINK_INFO_VERSION_GENERIC_FW); + if (err) + return err; + + err = fbnic_version_running_put(req, &fbd->fw_cap.running.bootloader, + DEVLINK_INFO_VERSION_GENERIC_FW_BOOTLOADER); + if (err) + return err; + + err = fbnic_version_stored_put(req, &fbd->fw_cap.stored.mgmt, + DEVLINK_INFO_VERSION_GENERIC_FW); + if (err) + return err; + + err = fbnic_version_stored_put(req, &fbd->fw_cap.stored.bootloader, + DEVLINK_INFO_VERSION_GENERIC_FW_BOOTLOADER); + if (err) + return err; + + err = fbnic_version_stored_put(req, &fbd->fw_cap.stored.undi, + DEVLINK_INFO_VERSION_GENERIC_FW_UNDI); + if (err) + return err; + if (fbd->dsn) { unsigned char serial[FBNIC_SN_STR_LEN]; u8 dsn[8]; diff --git a/drivers/net/ethernet/microchip/lan966x/Kconfig b/drivers/net/ethernet/microchip/lan966x/Kconfig index f9ebffc04eb8..f663b6e12466 100644 --- a/drivers/net/ethernet/microchip/lan966x/Kconfig +++ b/drivers/net/ethernet/microchip/lan966x/Kconfig @@ -8,6 +8,7 @@ config LAN966X_SWITCH select PHYLINK select PAGE_POOL select VCAP + select FDMA help This driver supports the Lan966x network switch device. diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile index 3b6ac331691d..4cdbe263502c 100644 --- a/drivers/net/ethernet/microchip/lan966x/Makefile +++ b/drivers/net/ethernet/microchip/lan966x/Makefile @@ -20,3 +20,4 @@ lan966x-switch-$(CONFIG_DEBUG_FS) += lan966x_vcap_debugfs.o # Provide include files ccflags-y += -I$(srctree)/drivers/net/ethernet/microchip/vcap +ccflags-y += -I$(srctree)/drivers/net/ethernet/microchip/fdma diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c index 3960534ac2ad..502670718104 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c @@ -6,31 +6,55 @@ #include "lan966x_main.h" -static int lan966x_fdma_channel_active(struct lan966x *lan966x) -{ - return lan_rd(lan966x, FDMA_CH_ACTIVE); -} - -static struct page *lan966x_fdma_rx_alloc_page(struct lan966x_rx *rx, - struct lan966x_db *db) +static int lan966x_fdma_rx_dataptr_cb(struct fdma *fdma, int dcb, int db, + u64 *dataptr) { + struct lan966x *lan966x = (struct lan966x *)fdma->priv; + struct lan966x_rx *rx = &lan966x->rx; struct page *page; page = page_pool_dev_alloc_pages(rx->page_pool); if (unlikely(!page)) - return NULL; + return -ENOMEM; + + rx->page[dcb][db] = page; + *dataptr = page_pool_get_dma_addr(page) + XDP_PACKET_HEADROOM; + + return 0; +} - db->dataptr = page_pool_get_dma_addr(page) + XDP_PACKET_HEADROOM; +static int lan966x_fdma_tx_dataptr_cb(struct fdma *fdma, int dcb, int db, + u64 *dataptr) +{ + struct lan966x *lan966x = (struct lan966x *)fdma->priv; + + *dataptr = lan966x->tx.dcbs_buf[dcb].dma_addr; - return page; + return 0; +} + +static int lan966x_fdma_xdp_tx_dataptr_cb(struct fdma *fdma, int dcb, int db, + u64 *dataptr) +{ + struct lan966x *lan966x = (struct lan966x *)fdma->priv; + + *dataptr = lan966x->tx.dcbs_buf[dcb].dma_addr + XDP_PACKET_HEADROOM; + + return 0; +} + +static int lan966x_fdma_channel_active(struct lan966x *lan966x) +{ + return lan_rd(lan966x, FDMA_CH_ACTIVE); } static void lan966x_fdma_rx_free_pages(struct lan966x_rx *rx) { + struct fdma *fdma = &rx->fdma; int i, j; - for (i = 0; i < FDMA_DCB_MAX; ++i) { - for (j = 0; j < FDMA_RX_DCB_MAX_DBS; ++j) + for (i = 0; i < fdma->n_dcbs; ++i) { + for (j = 0; j < fdma->n_dbs; ++j) page_pool_put_full_page(rx->page_pool, rx->page[i][j], false); } @@ -38,41 +62,23 @@ static void lan966x_fdma_rx_free_pages(struct lan966x_rx *rx) static void lan966x_fdma_rx_free_page(struct lan966x_rx *rx) { + struct fdma *fdma = &rx->fdma; struct page *page; - page = rx->page[rx->dcb_index][rx->db_index]; + page = rx->page[fdma->dcb_index][fdma->db_index]; if (unlikely(!page)) return; page_pool_recycle_direct(rx->page_pool, page); } -static void lan966x_fdma_rx_add_dcb(struct lan966x_rx *rx, - struct lan966x_rx_dcb *dcb, - u64 nextptr) -{ - struct lan966x_db *db; - int i; - - for (i = 0; i < FDMA_RX_DCB_MAX_DBS; ++i) { - db = &dcb->db[i]; - db->status = FDMA_DCB_STATUS_INTR; - } - - dcb->nextptr = FDMA_DCB_INVALID_DATA; - dcb->info = FDMA_DCB_INFO_DATAL(PAGE_SIZE << rx->page_order); - - rx->last_entry->nextptr = nextptr; - rx->last_entry = dcb; -} - static int lan966x_fdma_rx_alloc_page_pool(struct lan966x_rx *rx) { struct lan966x *lan966x = rx->lan966x; struct page_pool_params pp_params = { .order = rx->page_order, .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, - .pool_size = FDMA_DCB_MAX, + .pool_size = rx->fdma.n_dcbs, .nid = NUMA_NO_NODE, .dev = lan966x->dev, .dma_dir = DMA_FROM_DEVICE, @@ -104,84 +110,41 @@ static int lan966x_fdma_rx_alloc_page_pool(struct lan966x_rx *rx) static int lan966x_fdma_rx_alloc(struct lan966x_rx *rx) { struct lan966x *lan966x = rx->lan966x; - struct lan966x_rx_dcb *dcb; - struct lan966x_db *db; - struct page *page; - int i, j; - int size; + struct fdma *fdma = &rx->fdma; + int err; if (lan966x_fdma_rx_alloc_page_pool(rx)) return PTR_ERR(rx->page_pool); - /* calculate how many pages are needed to allocate the dcbs */ - size = sizeof(struct lan966x_rx_dcb) * FDMA_DCB_MAX; - size = ALIGN(size, PAGE_SIZE); - - rx->dcbs = dma_alloc_coherent(lan966x->dev, size, &rx->dma, GFP_KERNEL); - if (!rx->dcbs) - return -ENOMEM; - - rx->last_entry = rx->dcbs; - rx->db_index = 0; - rx->dcb_index = 0; - - /* Now for each dcb allocate the dbs */ - for (i = 0; i < FDMA_DCB_MAX; ++i) { - dcb = &rx->dcbs[i]; - dcb->info = 0; - - /* For each db allocate a page and map it to the DB dataptr. */ - for (j = 0; j < FDMA_RX_DCB_MAX_DBS; ++j) { - db = &dcb->db[j]; - page = lan966x_fdma_rx_alloc_page(rx, db); - if (!page) - return -ENOMEM; - - db->status = 0; - rx->page[i][j] = page; - } + err = fdma_alloc_coherent(lan966x->dev, fdma); + if (err) + return err; - lan966x_fdma_rx_add_dcb(rx, dcb, rx->dma + sizeof(*dcb) * i); - } + fdma_dcbs_init(fdma, FDMA_DCB_INFO_DATAL(fdma->db_size), + FDMA_DCB_STATUS_INTR); return 0; } -static void lan966x_fdma_rx_advance_dcb(struct lan966x_rx *rx) -{ - rx->dcb_index++; - rx->dcb_index &= FDMA_DCB_MAX - 1; -} - -static void lan966x_fdma_rx_free(struct lan966x_rx *rx) -{ - struct lan966x *lan966x = rx->lan966x; - u32 size; - - /* Now it is possible to do the cleanup of dcb */ - size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX; - size = ALIGN(size, PAGE_SIZE); - dma_free_coherent(lan966x->dev, size, rx->dcbs, rx->dma); -} - static void lan966x_fdma_rx_start(struct lan966x_rx *rx) { struct lan966x *lan966x = rx->lan966x; + struct fdma *fdma = &rx->fdma; u32 mask; /* When activating a channel, first is required to write the first DCB * address and then to activate it */ - lan_wr(lower_32_bits((u64)rx->dma), lan966x, - FDMA_DCB_LLP(rx->channel_id)); - lan_wr(upper_32_bits((u64)rx->dma), lan966x, - FDMA_DCB_LLP1(rx->channel_id)); + lan_wr(lower_32_bits((u64)fdma->dma), lan966x, + FDMA_DCB_LLP(fdma->channel_id)); + lan_wr(upper_32_bits((u64)fdma->dma), lan966x, + FDMA_DCB_LLP1(fdma->channel_id)); - lan_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_RX_DCB_MAX_DBS) | + lan_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(fdma->n_dbs) | FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) | FDMA_CH_CFG_CH_INJ_PORT_SET(0) | FDMA_CH_CFG_CH_MEM_SET(1), - lan966x, FDMA_CH_CFG(rx->channel_id)); + lan966x, FDMA_CH_CFG(fdma->channel_id)); /* Start fdma */ lan_rmw(FDMA_PORT_CTRL_XTR_STOP_SET(0), @@ -191,13 +154,13 @@ static void lan966x_fdma_rx_start(struct lan966x_rx *rx) /* Enable interrupts */ mask = lan_rd(lan966x, FDMA_INTR_DB_ENA); mask = FDMA_INTR_DB_ENA_INTR_DB_ENA_GET(mask); - mask |= BIT(rx->channel_id); + mask |= BIT(fdma->channel_id); lan_rmw(FDMA_INTR_DB_ENA_INTR_DB_ENA_SET(mask), FDMA_INTR_DB_ENA_INTR_DB_ENA, lan966x, FDMA_INTR_DB_ENA); /* Activate the channel */ - lan_rmw(FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(BIT(rx->channel_id)), + lan_rmw(FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(BIT(fdma->channel_id)), FDMA_CH_ACTIVATE_CH_ACTIVATE, lan966x, FDMA_CH_ACTIVATE); } @@ -205,18 +168,19 @@ static void lan966x_fdma_rx_start(struct lan966x_rx *rx) static void lan966x_fdma_rx_disable(struct lan966x_rx *rx) { struct lan966x *lan966x = rx->lan966x; + struct fdma *fdma = &rx->fdma; u32 val; /* Disable the channel */ - lan_rmw(FDMA_CH_DISABLE_CH_DISABLE_SET(BIT(rx->channel_id)), + lan_rmw(FDMA_CH_DISABLE_CH_DISABLE_SET(BIT(fdma->channel_id)), FDMA_CH_DISABLE_CH_DISABLE, lan966x, FDMA_CH_DISABLE); readx_poll_timeout_atomic(lan966x_fdma_channel_active, lan966x, - val, !(val & BIT(rx->channel_id)), + val, !(val & BIT(fdma->channel_id)), READL_SLEEP_US, READL_TIMEOUT_US); - lan_rmw(FDMA_CH_DB_DISCARD_DB_DISCARD_SET(BIT(rx->channel_id)), + lan_rmw(FDMA_CH_DB_DISCARD_DB_DISCARD_SET(BIT(fdma->channel_id)), FDMA_CH_DB_DISCARD_DB_DISCARD, lan966x, FDMA_CH_DB_DISCARD); } @@ -225,50 +189,27 @@ static void lan966x_fdma_rx_reload(struct lan966x_rx *rx) { struct lan966x *lan966x = rx->lan966x; - lan_rmw(FDMA_CH_RELOAD_CH_RELOAD_SET(BIT(rx->channel_id)), + lan_rmw(FDMA_CH_RELOAD_CH_RELOAD_SET(BIT(rx->fdma.channel_id)), FDMA_CH_RELOAD_CH_RELOAD, lan966x, FDMA_CH_RELOAD); } -static void lan966x_fdma_tx_add_dcb(struct lan966x_tx *tx, - struct lan966x_tx_dcb *dcb) -{ - dcb->nextptr = FDMA_DCB_INVALID_DATA; - dcb->info = 0; -} - static int lan966x_fdma_tx_alloc(struct lan966x_tx *tx) { struct lan966x *lan966x = tx->lan966x; - struct lan966x_tx_dcb *dcb; - struct lan966x_db *db; - int size; - int i, j; + struct fdma *fdma = &tx->fdma; + int err; - tx->dcbs_buf = kcalloc(FDMA_DCB_MAX, sizeof(struct lan966x_tx_dcb_buf), + tx->dcbs_buf = kcalloc(fdma->n_dcbs, sizeof(struct lan966x_tx_dcb_buf), GFP_KERNEL); if (!tx->dcbs_buf) return -ENOMEM; - /* calculate how many pages are needed to allocate the dcbs */ - size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX; - size = ALIGN(size, PAGE_SIZE); - tx->dcbs = dma_alloc_coherent(lan966x->dev, size, &tx->dma, GFP_KERNEL); - if (!tx->dcbs) + err = fdma_alloc_coherent(lan966x->dev, fdma); + if (err) goto out; - /* Now for each dcb allocate the db */ - for (i = 0; i < FDMA_DCB_MAX; ++i) { - dcb = &tx->dcbs[i]; - - for (j = 0; j < FDMA_TX_DCB_MAX_DBS; ++j) { - db = &dcb->db[j]; - db->dataptr = 0; - db->status = 0; - } - - lan966x_fdma_tx_add_dcb(tx, dcb); - } + fdma_dcbs_init(fdma, 0, 0); return 0; @@ -280,33 +221,30 @@ out: static void lan966x_fdma_tx_free(struct lan966x_tx *tx) { struct lan966x *lan966x = tx->lan966x; - int size; kfree(tx->dcbs_buf); - - size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX; - size = ALIGN(size, PAGE_SIZE); - dma_free_coherent(lan966x->dev, size, tx->dcbs, tx->dma); + fdma_free_coherent(lan966x->dev, &tx->fdma); } static void lan966x_fdma_tx_activate(struct lan966x_tx *tx) { struct lan966x *lan966x = tx->lan966x; + struct fdma *fdma = &tx->fdma; u32 mask; /* When activating a channel, first is required to write the first DCB * address and then to activate it */ - lan_wr(lower_32_bits((u64)tx->dma), lan966x, - FDMA_DCB_LLP(tx->channel_id)); - lan_wr(upper_32_bits((u64)tx->dma), lan966x, - FDMA_DCB_LLP1(tx->channel_id)); + lan_wr(lower_32_bits((u64)fdma->dma), lan966x, + FDMA_DCB_LLP(fdma->channel_id)); + lan_wr(upper_32_bits((u64)fdma->dma), lan966x, + FDMA_DCB_LLP1(fdma->channel_id)); - lan_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(FDMA_TX_DCB_MAX_DBS) | + lan_wr(FDMA_CH_CFG_CH_DCB_DB_CNT_SET(fdma->n_dbs) | FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(1) | FDMA_CH_CFG_CH_INJ_PORT_SET(0) | FDMA_CH_CFG_CH_MEM_SET(1), - lan966x, FDMA_CH_CFG(tx->channel_id)); + lan966x, FDMA_CH_CFG(fdma->channel_id)); /* Start fdma */ lan_rmw(FDMA_PORT_CTRL_INJ_STOP_SET(0), @@ -316,13 +254,13 @@ static void lan966x_fdma_tx_activate(struct lan966x_tx *tx) /* Enable interrupts */ mask = lan_rd(lan966x, FDMA_INTR_DB_ENA); mask = FDMA_INTR_DB_ENA_INTR_DB_ENA_GET(mask); - mask |= BIT(tx->channel_id); + mask |= BIT(fdma->channel_id); lan_rmw(FDMA_INTR_DB_ENA_INTR_DB_ENA_SET(mask), FDMA_INTR_DB_ENA_INTR_DB_ENA, lan966x, FDMA_INTR_DB_ENA); /* Activate the channel */ - lan_rmw(FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(BIT(tx->channel_id)), + lan_rmw(FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(BIT(fdma->channel_id)), FDMA_CH_ACTIVATE_CH_ACTIVATE, lan966x, FDMA_CH_ACTIVATE); } @@ -330,23 +268,23 @@ static void lan966x_fdma_tx_activate(struct lan966x_tx *tx) static void lan966x_fdma_tx_disable(struct lan966x_tx *tx) { struct lan966x *lan966x = tx->lan966x; + struct fdma *fdma = &tx->fdma; u32 val; /* Disable the channel */ - lan_rmw(FDMA_CH_DISABLE_CH_DISABLE_SET(BIT(tx->channel_id)), + lan_rmw(FDMA_CH_DISABLE_CH_DISABLE_SET(BIT(fdma->channel_id)), FDMA_CH_DISABLE_CH_DISABLE, lan966x, FDMA_CH_DISABLE); readx_poll_timeout_atomic(lan966x_fdma_channel_active, lan966x, - val, !(val & BIT(tx->channel_id)), + val, !(val & BIT(fdma->channel_id)), READL_SLEEP_US, READL_TIMEOUT_US); - lan_rmw(FDMA_CH_DB_DISCARD_DB_DISCARD_SET(BIT(tx->channel_id)), + lan_rmw(FDMA_CH_DB_DISCARD_DB_DISCARD_SET(BIT(fdma->channel_id)), FDMA_CH_DB_DISCARD_DB_DISCARD, lan966x, FDMA_CH_DB_DISCARD); tx->activated = false; - tx->last_in_use = -1; } static void lan966x_fdma_tx_reload(struct lan966x_tx *tx) @@ -354,7 +292,7 @@ static void lan966x_fdma_tx_reload(struct lan966x_tx *tx) struct lan966x *lan966x = tx->lan966x; /* Write the registers to reload the channel */ - lan_rmw(FDMA_CH_RELOAD_CH_RELOAD_SET(BIT(tx->channel_id)), + lan_rmw(FDMA_CH_RELOAD_CH_RELOAD_SET(BIT(tx->fdma.channel_id)), FDMA_CH_RELOAD_CH_RELOAD, lan966x, FDMA_CH_RELOAD); } @@ -393,23 +331,24 @@ static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight) struct lan966x_tx *tx = &lan966x->tx; struct lan966x_rx *rx = &lan966x->rx; struct lan966x_tx_dcb_buf *dcb_buf; + struct fdma *fdma = &tx->fdma; struct xdp_frame_bulk bq; - struct lan966x_db *db; unsigned long flags; bool clear = false; + struct fdma_db *db; int i; xdp_frame_bulk_init(&bq); spin_lock_irqsave(&lan966x->tx_lock, flags); - for (i = 0; i < FDMA_DCB_MAX; ++i) { + for (i = 0; i < fdma->n_dcbs; ++i) { dcb_buf = &tx->dcbs_buf[i]; if (!dcb_buf->used) continue; - db = &tx->dcbs[i].db[0]; - if (!(db->status & FDMA_DCB_STATUS_DONE)) + db = fdma_db_get(fdma, i, 0); + if (!fdma_db_is_done(db)) continue; dcb_buf->dev->stats.tx_packets++; @@ -449,27 +388,16 @@ static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight) spin_unlock_irqrestore(&lan966x->tx_lock, flags); } -static bool lan966x_fdma_rx_more_frames(struct lan966x_rx *rx) -{ - struct lan966x_db *db; - - /* Check if there is any data */ - db = &rx->dcbs[rx->dcb_index].db[rx->db_index]; - if (unlikely(!(db->status & FDMA_DCB_STATUS_DONE))) - return false; - - return true; -} - static int lan966x_fdma_rx_check_frame(struct lan966x_rx *rx, u64 *src_port) { struct lan966x *lan966x = rx->lan966x; + struct fdma *fdma = &rx->fdma; struct lan966x_port *port; - struct lan966x_db *db; + struct fdma_db *db; struct page *page; - db = &rx->dcbs[rx->dcb_index].db[rx->db_index]; - page = rx->page[rx->dcb_index][rx->db_index]; + db = fdma_db_next_get(fdma); + page = rx->page[fdma->dcb_index][fdma->db_index]; if (unlikely(!page)) return FDMA_ERROR; @@ -494,16 +422,17 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx, u64 src_port) { struct lan966x *lan966x = rx->lan966x; - struct lan966x_db *db; + struct fdma *fdma = &rx->fdma; struct sk_buff *skb; + struct fdma_db *db; struct page *page; u64 timestamp; /* Get the received frame and unmap it */ - db = &rx->dcbs[rx->dcb_index].db[rx->db_index]; - page = rx->page[rx->dcb_index][rx->db_index]; + db = fdma_db_next_get(fdma); + page = rx->page[fdma->dcb_index][fdma->db_index]; - skb = build_skb(page_address(page), PAGE_SIZE << rx->page_order); + skb = build_skb(page_address(page), fdma->db_size); if (unlikely(!skb)) goto free_page; @@ -546,21 +475,19 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight) { struct lan966x *lan966x = container_of(napi, struct lan966x, napi); struct lan966x_rx *rx = &lan966x->rx; - int dcb_reload = rx->dcb_index; - struct lan966x_rx_dcb *old_dcb; - struct lan966x_db *db; + int old_dcb, dcb_reload, counter = 0; + struct fdma *fdma = &rx->fdma; bool redirect = false; struct sk_buff *skb; - struct page *page; - int counter = 0; u64 src_port; - u64 nextptr; + + dcb_reload = fdma->dcb_index; lan966x_fdma_tx_clear_buf(lan966x, weight); /* Get all received skb */ while (counter < weight) { - if (!lan966x_fdma_rx_more_frames(rx)) + if (!fdma_has_frames(fdma)) break; counter++; @@ -570,22 +497,22 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight) break; case FDMA_ERROR: lan966x_fdma_rx_free_page(rx); - lan966x_fdma_rx_advance_dcb(rx); + fdma_dcb_advance(fdma); goto allocate_new; case FDMA_REDIRECT: redirect = true; fallthrough; case FDMA_TX: - lan966x_fdma_rx_advance_dcb(rx); + fdma_dcb_advance(fdma); continue; case FDMA_DROP: lan966x_fdma_rx_free_page(rx); - lan966x_fdma_rx_advance_dcb(rx); + fdma_dcb_advance(fdma); continue; } skb = lan966x_fdma_rx_get_frame(rx, src_port); - lan966x_fdma_rx_advance_dcb(rx); + fdma_dcb_advance(fdma); if (!skb) goto allocate_new; @@ -594,20 +521,14 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight) allocate_new: /* Allocate new pages and map them */ - while (dcb_reload != rx->dcb_index) { - db = &rx->dcbs[dcb_reload].db[rx->db_index]; - page = lan966x_fdma_rx_alloc_page(rx, db); - if (unlikely(!page)) - break; - rx->page[dcb_reload][rx->db_index] = page; - - old_dcb = &rx->dcbs[dcb_reload]; + while (dcb_reload != fdma->dcb_index) { + old_dcb = dcb_reload; dcb_reload++; - dcb_reload &= FDMA_DCB_MAX - 1; + dcb_reload &= fdma->n_dcbs - 1; + + fdma_dcb_add(fdma, old_dcb, FDMA_DCB_INFO_DATAL(fdma->db_size), + FDMA_DCB_STATUS_INTR); - nextptr = rx->dma + ((unsigned long)old_dcb - - (unsigned long)rx->dcbs); - lan966x_fdma_rx_add_dcb(rx, old_dcb, nextptr); lan966x_fdma_rx_reload(rx); } @@ -650,56 +571,30 @@ irqreturn_t lan966x_fdma_irq_handler(int irq, void *args) static int lan966x_fdma_get_next_dcb(struct lan966x_tx *tx) { struct lan966x_tx_dcb_buf *dcb_buf; + struct fdma *fdma = &tx->fdma; int i; - for (i = 0; i < FDMA_DCB_MAX; ++i) { + for (i = 0; i < fdma->n_dcbs; ++i) { dcb_buf = &tx->dcbs_buf[i]; - if (!dcb_buf->used && i != tx->last_in_use) + if (!dcb_buf->used && + !fdma_is_last(&tx->fdma, &tx->fdma.dcbs[i])) return i; } return -1; } -static void lan966x_fdma_tx_setup_dcb(struct lan966x_tx *tx, - int next_to_use, int len, - dma_addr_t dma_addr) -{ - struct lan966x_tx_dcb *next_dcb; - struct lan966x_db *next_db; - - next_dcb = &tx->dcbs[next_to_use]; - next_dcb->nextptr = FDMA_DCB_INVALID_DATA; - - next_db = &next_dcb->db[0]; - next_db->dataptr = dma_addr; - next_db->status = FDMA_DCB_STATUS_SOF | - FDMA_DCB_STATUS_EOF | - FDMA_DCB_STATUS_INTR | - FDMA_DCB_STATUS_BLOCKO(0) | - FDMA_DCB_STATUS_BLOCKL(len); -} - -static void lan966x_fdma_tx_start(struct lan966x_tx *tx, int next_to_use) +static void lan966x_fdma_tx_start(struct lan966x_tx *tx) { struct lan966x *lan966x = tx->lan966x; - struct lan966x_tx_dcb *dcb; if (likely(lan966x->tx.activated)) { - /* Connect current dcb to the next db */ - dcb = &tx->dcbs[tx->last_in_use]; - dcb->nextptr = tx->dma + (next_to_use * - sizeof(struct lan966x_tx_dcb)); - lan966x_fdma_tx_reload(tx); } else { /* Because it is first time, then just activate */ lan966x->tx.activated = true; lan966x_fdma_tx_activate(tx); } - - /* Move to next dcb because this last in use */ - tx->last_in_use = next_to_use; } int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, void *ptr, u32 len) @@ -752,11 +647,6 @@ int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, void *ptr, u32 len) next_dcb_buf->data.xdpf = xdpf; next_dcb_buf->len = xdpf->len + IFH_LEN_BYTES; - - /* Setup next dcb */ - lan966x_fdma_tx_setup_dcb(tx, next_to_use, - xdpf->len + IFH_LEN_BYTES, - dma_addr); } else { page = ptr; @@ -773,11 +663,6 @@ int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, void *ptr, u32 len) next_dcb_buf->data.page = page; next_dcb_buf->len = len + IFH_LEN_BYTES; - - /* Setup next dcb */ - lan966x_fdma_tx_setup_dcb(tx, next_to_use, - len + IFH_LEN_BYTES, - dma_addr + XDP_PACKET_HEADROOM); } /* Fill up the buffer */ @@ -788,8 +673,19 @@ int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, void *ptr, u32 len) next_dcb_buf->ptp = false; next_dcb_buf->dev = port->dev; + __fdma_dcb_add(&tx->fdma, + next_to_use, + 0, + FDMA_DCB_STATUS_INTR | + FDMA_DCB_STATUS_SOF | + FDMA_DCB_STATUS_EOF | + FDMA_DCB_STATUS_BLOCKO(0) | + FDMA_DCB_STATUS_BLOCKL(next_dcb_buf->len), + &fdma_nextptr_cb, + &lan966x_fdma_xdp_tx_dataptr_cb); + /* Start the transmission */ - lan966x_fdma_tx_start(tx, next_to_use); + lan966x_fdma_tx_start(tx); out: spin_unlock(&lan966x->tx_lock); @@ -847,9 +743,6 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev) goto release; } - /* Setup next dcb */ - lan966x_fdma_tx_setup_dcb(tx, next_to_use, skb->len, dma_addr); - /* Fill up the buffer */ next_dcb_buf = &tx->dcbs_buf[next_to_use]; next_dcb_buf->use_skb = true; @@ -861,12 +754,21 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev) next_dcb_buf->ptp = false; next_dcb_buf->dev = dev; + fdma_dcb_add(&tx->fdma, + next_to_use, + 0, + FDMA_DCB_STATUS_INTR | + FDMA_DCB_STATUS_SOF | + FDMA_DCB_STATUS_EOF | + FDMA_DCB_STATUS_BLOCKO(0) | + FDMA_DCB_STATUS_BLOCKL(skb->len)); + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) next_dcb_buf->ptp = true; /* Start the transmission */ - lan966x_fdma_tx_start(tx, next_to_use); + lan966x_fdma_tx_start(tx); return NETDEV_TX_OK; @@ -908,14 +810,11 @@ static int lan966x_qsys_sw_status(struct lan966x *lan966x) static int lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu) { struct page_pool *page_pool; - dma_addr_t rx_dma; - void *rx_dcbs; - u32 size; + struct fdma fdma_rx_old; int err; /* Store these for later to free them */ - rx_dma = lan966x->rx.dma; - rx_dcbs = lan966x->rx.dcbs; + memcpy(&fdma_rx_old, &lan966x->rx.fdma, sizeof(struct fdma)); page_pool = lan966x->rx.page_pool; napi_synchronize(&lan966x->napi); @@ -931,9 +830,7 @@ static int lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu) goto restore; lan966x_fdma_rx_start(&lan966x->rx); - size = sizeof(struct lan966x_rx_dcb) * FDMA_DCB_MAX; - size = ALIGN(size, PAGE_SIZE); - dma_free_coherent(lan966x->dev, size, rx_dcbs, rx_dma); + fdma_free_coherent(lan966x->dev, &fdma_rx_old); page_pool_destroy(page_pool); @@ -943,8 +840,7 @@ static int lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu) return err; restore: lan966x->rx.page_pool = page_pool; - lan966x->rx.dma = rx_dma; - lan966x->rx.dcbs = rx_dcbs; + memcpy(&lan966x->rx.fdma, &fdma_rx_old, sizeof(struct fdma)); lan966x_fdma_rx_start(&lan966x->rx); return err; @@ -1034,11 +930,24 @@ int lan966x_fdma_init(struct lan966x *lan966x) return 0; lan966x->rx.lan966x = lan966x; - lan966x->rx.channel_id = FDMA_XTR_CHANNEL; + lan966x->rx.fdma.channel_id = FDMA_XTR_CHANNEL; + lan966x->rx.fdma.n_dcbs = FDMA_DCB_MAX; + lan966x->rx.fdma.n_dbs = FDMA_RX_DCB_MAX_DBS; + lan966x->rx.fdma.priv = lan966x; + lan966x->rx.fdma.size = fdma_get_size(&lan966x->rx.fdma); + lan966x->rx.fdma.db_size = PAGE_SIZE << lan966x->rx.page_order; + lan966x->rx.fdma.ops.nextptr_cb = &fdma_nextptr_cb; + lan966x->rx.fdma.ops.dataptr_cb = &lan966x_fdma_rx_dataptr_cb; lan966x->rx.max_mtu = lan966x_fdma_get_max_frame(lan966x); lan966x->tx.lan966x = lan966x; - lan966x->tx.channel_id = FDMA_INJ_CHANNEL; - lan966x->tx.last_in_use = -1; + lan966x->tx.fdma.channel_id = FDMA_INJ_CHANNEL; + lan966x->tx.fdma.n_dcbs = FDMA_DCB_MAX; + lan966x->tx.fdma.n_dbs = FDMA_TX_DCB_MAX_DBS; + lan966x->tx.fdma.priv = lan966x; + lan966x->tx.fdma.size = fdma_get_size(&lan966x->tx.fdma); + lan966x->tx.fdma.db_size = PAGE_SIZE << lan966x->rx.page_order; + lan966x->tx.fdma.ops.nextptr_cb = &fdma_nextptr_cb; + lan966x->tx.fdma.ops.dataptr_cb = &lan966x_fdma_tx_dataptr_cb; err = lan966x_fdma_rx_alloc(&lan966x->rx); if (err) @@ -1046,7 +955,7 @@ int lan966x_fdma_init(struct lan966x *lan966x) err = lan966x_fdma_tx_alloc(&lan966x->tx); if (err) { - lan966x_fdma_rx_free(&lan966x->rx); + fdma_free_coherent(lan966x->dev, &lan966x->rx.fdma); return err; } @@ -1067,7 +976,7 @@ void lan966x_fdma_deinit(struct lan966x *lan966x) napi_disable(&lan966x->napi); lan966x_fdma_rx_free_pages(&lan966x->rx); - lan966x_fdma_rx_free(&lan966x->rx); + fdma_free_coherent(lan966x->dev, &lan966x->rx.fdma); page_pool_destroy(lan966x->rx.page_pool); lan966x_fdma_tx_free(&lan966x->tx); } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index f8bebbcf77b2..25cb2f61986f 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -16,6 +16,7 @@ #include <net/switchdev.h> #include <net/xdp.h> +#include <fdma_api.h> #include <vcap_api.h> #include <vcap_api_client.h> @@ -76,15 +77,6 @@ #define FDMA_RX_DCB_MAX_DBS 1 #define FDMA_TX_DCB_MAX_DBS 1 -#define FDMA_DCB_INFO_DATAL(x) ((x) & GENMASK(15, 0)) - -#define FDMA_DCB_STATUS_BLOCKL(x) ((x) & GENMASK(15, 0)) -#define FDMA_DCB_STATUS_SOF BIT(16) -#define FDMA_DCB_STATUS_EOF BIT(17) -#define FDMA_DCB_STATUS_INTR BIT(18) -#define FDMA_DCB_STATUS_DONE BIT(19) -#define FDMA_DCB_STATUS_BLOCKO(x) (((x) << 20) & GENMASK(31, 20)) -#define FDMA_DCB_INVALID_DATA 0x1 #define FDMA_XTR_CHANNEL 6 #define FDMA_INJ_CHANNEL 0 @@ -199,49 +191,14 @@ enum vcap_is1_port_sel_rt { struct lan966x_port; -struct lan966x_db { - u64 dataptr; - u64 status; -}; - -struct lan966x_rx_dcb { - u64 nextptr; - u64 info; - struct lan966x_db db[FDMA_RX_DCB_MAX_DBS]; -}; - -struct lan966x_tx_dcb { - u64 nextptr; - u64 info; - struct lan966x_db db[FDMA_TX_DCB_MAX_DBS]; -}; - struct lan966x_rx { struct lan966x *lan966x; - /* Pointer to the array of hardware dcbs. */ - struct lan966x_rx_dcb *dcbs; - - /* Pointer to the last address in the dcbs. */ - struct lan966x_rx_dcb *last_entry; + struct fdma fdma; /* For each DB, there is a page */ struct page *page[FDMA_DCB_MAX][FDMA_RX_DCB_MAX_DBS]; - /* Represents the db_index, it can have a value between 0 and - * FDMA_RX_DCB_MAX_DBS, once it reaches the value of FDMA_RX_DCB_MAX_DBS - * it means that the DCB can be reused. - */ - int db_index; - - /* Represents the index in the dcbs. It has a value between 0 and - * FDMA_DCB_MAX - */ - int dcb_index; - - /* Represents the dma address to the dcbs array */ - dma_addr_t dma; - /* Represents the page order that is used to allocate the pages for the * RX buffers. This value is calculated based on max MTU of the devices. */ @@ -252,8 +209,6 @@ struct lan966x_rx { */ u32 max_mtu; - u8 channel_id; - struct page_pool *page_pool; }; @@ -275,18 +230,11 @@ struct lan966x_tx_dcb_buf { struct lan966x_tx { struct lan966x *lan966x; - /* Pointer to the dcb list */ - struct lan966x_tx_dcb *dcbs; - u16 last_in_use; - - /* Represents the DMA address to the first entry of the dcb entries. */ - dma_addr_t dma; + struct fdma fdma; /* Array of dcbs that are given to the HW */ struct lan966x_tx_dcb_buf *dcbs_buf; - u8 channel_id; - bool activated; }; diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index b3c28260adf8..e172638b0601 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -582,17 +582,13 @@ EXPORT_SYMBOL(ocelot_hwstamp_set); int ocelot_get_ts_info(struct ocelot *ocelot, int port, struct kernel_ethtool_ts_info *info) { - info->phc_index = ocelot->ptp_clock ? - ptp_clock_index(ocelot->ptp_clock) : -1; - if (info->phc_index == -1) { - info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; + if (ocelot->ptp_clock) { + info->phc_index = ptp_clock_index(ocelot->ptp_clock); + } else { + info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE; return 0; } info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig index 3f7519e435b8..01fe76786f77 100644 --- a/drivers/net/ethernet/pensando/Kconfig +++ b/drivers/net/ethernet/pensando/Kconfig @@ -23,6 +23,7 @@ config IONIC depends on PTP_1588_CLOCK_OPTIONAL select NET_DEVLINK select DIMLIB + select PAGE_POOL help This enables the support for the Pensando family of Ethernet adapters. More specific information on this driver can be diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h index f2f07bf88545..c8c710cfe70c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h @@ -181,10 +181,7 @@ struct ionic_queue; struct ionic_qcq; #define IONIC_MAX_BUF_LEN ((u16)-1) -#define IONIC_PAGE_SIZE PAGE_SIZE -#define IONIC_PAGE_SPLIT_SZ (PAGE_SIZE / 2) -#define IONIC_PAGE_GFP_MASK (GFP_ATOMIC | __GFP_NOWARN |\ - __GFP_COMP | __GFP_MEMALLOC) +#define IONIC_PAGE_SIZE MIN(PAGE_SIZE, IONIC_MAX_BUF_LEN) #define IONIC_XDP_MAX_LINEAR_MTU (IONIC_PAGE_SIZE - \ (VLAN_ETH_HLEN + \ @@ -238,9 +235,8 @@ struct ionic_queue { unsigned int index; unsigned int num_descs; unsigned int max_sg_elems; + u64 features; - unsigned int type; - unsigned int hw_index; unsigned int hw_type; bool xdp_flush; union { @@ -250,18 +246,23 @@ struct ionic_queue { struct ionic_admin_cmd *adminq; }; union { - void __iomem *cmb_base; - struct ionic_txq_desc __iomem *cmb_txq; - struct ionic_rxq_desc __iomem *cmb_rxq; - }; - union { void *sg_base; struct ionic_txq_sg_desc *txq_sgl; struct ionic_txq_sg_desc_v1 *txq_sgl_v1; struct ionic_rxq_sg_desc *rxq_sgl; }; struct xdp_rxq_info *xdp_rxq_info; + struct bpf_prog *xdp_prog; + struct page_pool *page_pool; struct ionic_queue *partner; + + union { + void __iomem *cmb_base; + struct ionic_txq_desc __iomem *cmb_txq; + struct ionic_rxq_desc __iomem *cmb_rxq; + }; + unsigned int type; + unsigned int hw_index; dma_addr_t base_pa; dma_addr_t cmb_base_pa; dma_addr_t sg_base_pa; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 86774d9922d8..40496587b2b3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -13,6 +13,7 @@ #include <linux/cpumask.h> #include <linux/crash_dump.h> #include <linux/vmalloc.h> +#include <net/page_pool/helpers.h> #include "ionic.h" #include "ionic_bus.h" @@ -46,8 +47,9 @@ static int ionic_start_queues(struct ionic_lif *lif); static void ionic_stop_queues(struct ionic_lif *lif); static void ionic_lif_queue_identify(struct ionic_lif *lif); -static int ionic_xdp_queues_config(struct ionic_lif *lif); -static void ionic_xdp_unregister_rxq_info(struct ionic_queue *q); +static void ionic_xdp_rxqs_prog_update(struct ionic_lif *lif); +static void ionic_unregister_rxq_info(struct ionic_queue *q); +static int ionic_register_rxq_info(struct ionic_queue *q, unsigned int napi_id); static void ionic_dim_work(struct work_struct *work) { @@ -380,6 +382,7 @@ static void ionic_lif_qcq_deinit(struct ionic_lif *lif, struct ionic_qcq *qcq) if (!(qcq->flags & IONIC_QCQ_F_INITED)) return; + ionic_unregister_rxq_info(&qcq->q); if (qcq->flags & IONIC_QCQ_F_INTR) { ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, IONIC_INTR_MASK_SET); @@ -437,9 +440,10 @@ static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq) qcq->sg_base_pa = 0; } - ionic_xdp_unregister_rxq_info(&qcq->q); - ionic_qcq_intr_free(lif, qcq); + page_pool_destroy(qcq->q.page_pool); + qcq->q.page_pool = NULL; + ionic_qcq_intr_free(lif, qcq); vfree(qcq->q.info); qcq->q.info = NULL; } @@ -553,7 +557,8 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, unsigned int cq_desc_size, unsigned int sg_desc_size, unsigned int desc_info_size, - unsigned int pid, struct ionic_qcq **qcq) + unsigned int pid, struct bpf_prog *xdp_prog, + struct ionic_qcq **qcq) { struct ionic_dev *idev = &lif->ionic->idev; struct device *dev = lif->ionic->dev; @@ -579,6 +584,31 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, goto err_out_free_qcq; } + if (type == IONIC_QTYPE_RXQ) { + struct page_pool_params pp_params = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, + .order = 0, + .pool_size = num_descs, + .nid = NUMA_NO_NODE, + .dev = lif->ionic->dev, + .napi = &new->napi, + .dma_dir = DMA_FROM_DEVICE, + .max_len = PAGE_SIZE, + .netdev = lif->netdev, + }; + + if (xdp_prog) + pp_params.dma_dir = DMA_BIDIRECTIONAL; + + new->q.page_pool = page_pool_create(&pp_params); + if (IS_ERR(new->q.page_pool)) { + netdev_err(lif->netdev, "Cannot create page_pool\n"); + err = PTR_ERR(new->q.page_pool); + new->q.page_pool = NULL; + goto err_out_free_q_info; + } + } + new->q.type = type; new->q.max_sg_elems = lif->qtype_info[type].max_sg_elems; @@ -586,12 +616,12 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, desc_size, sg_desc_size, pid); if (err) { netdev_err(lif->netdev, "Cannot initialize queue\n"); - goto err_out_free_q_info; + goto err_out_free_page_pool; } err = ionic_alloc_qcq_interrupt(lif, new); if (err) - goto err_out_free_q_info; + goto err_out_free_page_pool; err = ionic_cq_init(lif, &new->cq, &new->intr, num_descs, cq_desc_size); if (err) { @@ -712,6 +742,8 @@ err_out_free_irq: devm_free_irq(dev, new->intr.vector, &new->napi); ionic_intr_free(lif->ionic, new->intr.index); } +err_out_free_page_pool: + page_pool_destroy(new->q.page_pool); err_out_free_q_info: vfree(new->q.info); err_out_free_qcq: @@ -734,7 +766,7 @@ static int ionic_qcqs_alloc(struct ionic_lif *lif) sizeof(struct ionic_admin_comp), 0, sizeof(struct ionic_admin_desc_info), - lif->kern_pid, &lif->adminqcq); + lif->kern_pid, NULL, &lif->adminqcq); if (err) return err; ionic_debugfs_add_qcq(lif, lif->adminqcq); @@ -747,7 +779,7 @@ static int ionic_qcqs_alloc(struct ionic_lif *lif) sizeof(union ionic_notifyq_comp), 0, sizeof(struct ionic_admin_desc_info), - lif->kern_pid, &lif->notifyqcq); + lif->kern_pid, NULL, &lif->notifyqcq); if (err) goto err_out; ionic_debugfs_add_qcq(lif, lif->notifyqcq); @@ -925,6 +957,11 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq) netif_napi_add(lif->netdev, &qcq->napi, ionic_rx_napi); else netif_napi_add(lif->netdev, &qcq->napi, ionic_txrx_napi); + err = ionic_register_rxq_info(q, qcq->napi.napi_id); + if (err) { + netif_napi_del(&qcq->napi); + return err; + } qcq->flags |= IONIC_QCQ_F_INITED; @@ -960,7 +997,7 @@ int ionic_lif_create_hwstamp_txq(struct ionic_lif *lif) err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, txq_i, "hwstamp_tx", flags, num_desc, desc_sz, comp_sz, sg_desc_sz, sizeof(struct ionic_tx_desc_info), - lif->kern_pid, &txq); + lif->kern_pid, NULL, &txq); if (err) goto err_qcq_alloc; @@ -1020,7 +1057,7 @@ int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif) err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, rxq_i, "hwstamp_rx", flags, num_desc, desc_sz, comp_sz, sg_desc_sz, sizeof(struct ionic_rx_desc_info), - lif->kern_pid, &rxq); + lif->kern_pid, NULL, &rxq); if (err) goto err_qcq_alloc; @@ -1037,7 +1074,7 @@ int ionic_lif_create_hwstamp_rxq(struct ionic_lif *lif) goto err_qcq_init; if (test_bit(IONIC_LIF_F_UP, lif->state)) { - ionic_rx_fill(&rxq->q); + ionic_rx_fill(&rxq->q, NULL); err = ionic_qcq_enable(rxq); if (err) goto err_qcq_enable; @@ -2046,7 +2083,7 @@ static int ionic_txrx_alloc(struct ionic_lif *lif) err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags, num_desc, desc_sz, comp_sz, sg_desc_sz, sizeof(struct ionic_tx_desc_info), - lif->kern_pid, &lif->txqcqs[i]); + lif->kern_pid, NULL, &lif->txqcqs[i]); if (err) goto err_out; @@ -2078,7 +2115,8 @@ static int ionic_txrx_alloc(struct ionic_lif *lif) err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags, num_desc, desc_sz, comp_sz, sg_desc_sz, sizeof(struct ionic_rx_desc_info), - lif->kern_pid, &lif->rxqcqs[i]); + lif->kern_pid, lif->xdp_prog, + &lif->rxqcqs[i]); if (err) goto err_out; @@ -2143,9 +2181,7 @@ static int ionic_txrx_enable(struct ionic_lif *lif) int derr = 0; int i, err; - err = ionic_xdp_queues_config(lif); - if (err) - return err; + ionic_xdp_rxqs_prog_update(lif); for (i = 0; i < lif->nxqs; i++) { if (!(lif->rxqcqs[i] && lif->txqcqs[i])) { @@ -2154,7 +2190,8 @@ static int ionic_txrx_enable(struct ionic_lif *lif) goto err_out; } - ionic_rx_fill(&lif->rxqcqs[i]->q); + ionic_rx_fill(&lif->rxqcqs[i]->q, + READ_ONCE(lif->rxqcqs[i]->q.xdp_prog)); err = ionic_qcq_enable(lif->rxqcqs[i]); if (err) goto err_out; @@ -2167,7 +2204,7 @@ static int ionic_txrx_enable(struct ionic_lif *lif) } if (lif->hwstamp_rxq) { - ionic_rx_fill(&lif->hwstamp_rxq->q); + ionic_rx_fill(&lif->hwstamp_rxq->q, NULL); err = ionic_qcq_enable(lif->hwstamp_rxq); if (err) goto err_out_hwstamp_rx; @@ -2192,7 +2229,7 @@ err_out: derr = ionic_qcq_disable(lif, lif->rxqcqs[i], derr); } - ionic_xdp_queues_config(lif); + ionic_xdp_rxqs_prog_update(lif); return err; } @@ -2651,7 +2688,7 @@ static void ionic_vf_attr_replay(struct ionic_lif *lif) ionic_vf_start(ionic); } -static void ionic_xdp_unregister_rxq_info(struct ionic_queue *q) +static void ionic_unregister_rxq_info(struct ionic_queue *q) { struct xdp_rxq_info *xi; @@ -2665,7 +2702,7 @@ static void ionic_xdp_unregister_rxq_info(struct ionic_queue *q) kfree(xi); } -static int ionic_xdp_register_rxq_info(struct ionic_queue *q, unsigned int napi_id) +static int ionic_register_rxq_info(struct ionic_queue *q, unsigned int napi_id) { struct xdp_rxq_info *rxq_info; int err; @@ -2676,15 +2713,15 @@ static int ionic_xdp_register_rxq_info(struct ionic_queue *q, unsigned int napi_ err = xdp_rxq_info_reg(rxq_info, q->lif->netdev, q->index, napi_id); if (err) { - dev_err(q->dev, "Queue %d xdp_rxq_info_reg failed, err %d\n", - q->index, err); + netdev_err(q->lif->netdev, "q%d xdp_rxq_info_reg failed, err %d\n", + q->index, err); goto err_out; } - err = xdp_rxq_info_reg_mem_model(rxq_info, MEM_TYPE_PAGE_ORDER0, NULL); + err = xdp_rxq_info_reg_mem_model(rxq_info, MEM_TYPE_PAGE_POOL, q->page_pool); if (err) { - dev_err(q->dev, "Queue %d xdp_rxq_info_reg_mem_model failed, err %d\n", - q->index, err); + netdev_err(q->lif->netdev, "q%d xdp_rxq_info_reg_mem_model failed, err %d\n", + q->index, err); xdp_rxq_info_unreg(rxq_info); goto err_out; } @@ -2698,44 +2735,20 @@ err_out: return err; } -static int ionic_xdp_queues_config(struct ionic_lif *lif) +static void ionic_xdp_rxqs_prog_update(struct ionic_lif *lif) { + struct bpf_prog *xdp_prog; unsigned int i; - int err; if (!lif->rxqcqs) - return 0; - - /* There's no need to rework memory if not going to/from NULL program. - * If there is no lif->xdp_prog, there should also be no q.xdp_rxq_info - * This way we don't need to keep an *xdp_prog in every queue struct. - */ - if (!lif->xdp_prog == !lif->rxqcqs[0]->q.xdp_rxq_info) - return 0; + return; + xdp_prog = READ_ONCE(lif->xdp_prog); for (i = 0; i < lif->ionic->nrxqs_per_lif && lif->rxqcqs[i]; i++) { struct ionic_queue *q = &lif->rxqcqs[i]->q; - if (q->xdp_rxq_info) { - ionic_xdp_unregister_rxq_info(q); - continue; - } - - err = ionic_xdp_register_rxq_info(q, lif->rxqcqs[i]->napi.napi_id); - if (err) { - dev_err(lif->ionic->dev, "failed to register RX queue %d info for XDP, err %d\n", - i, err); - goto err_out; - } + WRITE_ONCE(q->xdp_prog, xdp_prog); } - - return 0; - -err_out: - for (i = 0; i < lif->ionic->nrxqs_per_lif && lif->rxqcqs[i]; i++) - ionic_xdp_unregister_rxq_info(&lif->rxqcqs[i]->q); - - return err; } static int ionic_xdp_config(struct net_device *netdev, struct netdev_bpf *bpf) @@ -2765,11 +2778,17 @@ static int ionic_xdp_config(struct net_device *netdev, struct netdev_bpf *bpf) if (!netif_running(netdev)) { old_prog = xchg(&lif->xdp_prog, bpf->prog); + } else if (lif->xdp_prog && bpf->prog) { + old_prog = xchg(&lif->xdp_prog, bpf->prog); + ionic_xdp_rxqs_prog_update(lif); } else { + struct ionic_queue_params qparams; + + ionic_init_queue_params(lif, &qparams); + qparams.xdp_prog = bpf->prog; mutex_lock(&lif->queue_lock); - ionic_stop_queues_reconfig(lif); + ionic_reconfigure_queues(lif, &qparams); old_prog = xchg(&lif->xdp_prog, bpf->prog); - ionic_start_queues_reconfig(lif); mutex_unlock(&lif->queue_lock); } @@ -2871,13 +2890,23 @@ err_out: static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b) { - /* only swapping the queues, not the napi, flags, or other stuff */ + /* only swapping the queues and napi, not flags or other stuff */ + swap(a->napi, b->napi); + + if (a->q.type == IONIC_QTYPE_RXQ) { + swap(a->q.page_pool, b->q.page_pool); + a->q.page_pool->p.napi = &a->napi; + if (b->q.page_pool) /* is NULL when increasing queue count */ + b->q.page_pool->p.napi = &b->napi; + } + swap(a->q.features, b->q.features); swap(a->q.num_descs, b->q.num_descs); swap(a->q.desc_size, b->q.desc_size); swap(a->q.base, b->q.base); swap(a->q.base_pa, b->q.base_pa); swap(a->q.info, b->q.info); + swap(a->q.xdp_prog, b->q.xdp_prog); swap(a->q.xdp_rxq_info, b->q.xdp_rxq_info); swap(a->q.partner, b->q.partner); swap(a->q_base, b->q_base); @@ -2928,7 +2957,8 @@ int ionic_reconfigure_queues(struct ionic_lif *lif, } if (qparam->nxqs != lif->nxqs || qparam->nrxq_descs != lif->nrxq_descs || - qparam->rxq_features != lif->rxq_features) { + qparam->rxq_features != lif->rxq_features || + qparam->xdp_prog != lif->xdp_prog) { rx_qcqs = devm_kcalloc(lif->ionic->dev, lif->ionic->nrxqs_per_lif, sizeof(struct ionic_qcq *), GFP_KERNEL); if (!rx_qcqs) { @@ -2959,7 +2989,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif, err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags, 4, desc_sz, comp_sz, sg_desc_sz, sizeof(struct ionic_tx_desc_info), - lif->kern_pid, &lif->txqcqs[i]); + lif->kern_pid, NULL, &lif->txqcqs[i]); if (err) goto err_out; } @@ -2968,7 +2998,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif, err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags, num_desc, desc_sz, comp_sz, sg_desc_sz, sizeof(struct ionic_tx_desc_info), - lif->kern_pid, &tx_qcqs[i]); + lif->kern_pid, NULL, &tx_qcqs[i]); if (err) goto err_out; } @@ -2990,7 +3020,7 @@ int ionic_reconfigure_queues(struct ionic_lif *lif, err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags, 4, desc_sz, comp_sz, sg_desc_sz, sizeof(struct ionic_rx_desc_info), - lif->kern_pid, &lif->rxqcqs[i]); + lif->kern_pid, NULL, &lif->rxqcqs[i]); if (err) goto err_out; } @@ -2999,11 +3029,12 @@ int ionic_reconfigure_queues(struct ionic_lif *lif, err = ionic_qcq_alloc(lif, IONIC_QTYPE_RXQ, i, "rx", flags, num_desc, desc_sz, comp_sz, sg_desc_sz, sizeof(struct ionic_rx_desc_info), - lif->kern_pid, &rx_qcqs[i]); + lif->kern_pid, qparam->xdp_prog, &rx_qcqs[i]); if (err) goto err_out; rx_qcqs[i]->q.features = qparam->rxq_features; + rx_qcqs[i]->q.xdp_prog = qparam->xdp_prog; } } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 3e1005293c4a..e01756fb7fdd 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -268,6 +268,7 @@ struct ionic_queue_params { unsigned int ntxq_descs; unsigned int nrxq_descs; u64 rxq_features; + struct bpf_prog *xdp_prog; bool intr_split; bool cmb_tx; bool cmb_rx; @@ -280,6 +281,7 @@ static inline void ionic_init_queue_params(struct ionic_lif *lif, qparam->ntxq_descs = lif->ntxq_descs; qparam->nrxq_descs = lif->nrxq_descs; qparam->rxq_features = lif->rxq_features; + qparam->xdp_prog = lif->xdp_prog; qparam->intr_split = test_bit(IONIC_LIF_F_SPLIT_INTR, lif->state); qparam->cmb_tx = test_bit(IONIC_LIF_F_CMB_TX_RINGS, lif->state); qparam->cmb_rx = test_bit(IONIC_LIF_F_CMB_RX_RINGS, lif->state); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index fc79baad4561..0eeda7e502db 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -6,6 +6,7 @@ #include <linux/if_vlan.h> #include <net/ip6_checksum.h> #include <net/netdev_queues.h> +#include <net/page_pool/helpers.h> #include "ionic.h" #include "ionic_lif.h" @@ -118,108 +119,57 @@ static void *ionic_rx_buf_va(struct ionic_buf_info *buf_info) static dma_addr_t ionic_rx_buf_pa(struct ionic_buf_info *buf_info) { - return buf_info->dma_addr + buf_info->page_offset; + return page_pool_get_dma_addr(buf_info->page) + buf_info->page_offset; } -static unsigned int ionic_rx_buf_size(struct ionic_buf_info *buf_info) +static void __ionic_rx_put_buf(struct ionic_queue *q, + struct ionic_buf_info *buf_info, + bool recycle_direct) { - return min_t(u32, IONIC_MAX_BUF_LEN, IONIC_PAGE_SIZE - buf_info->page_offset); -} - -static int ionic_rx_page_alloc(struct ionic_queue *q, - struct ionic_buf_info *buf_info) -{ - struct device *dev = q->dev; - dma_addr_t dma_addr; - struct page *page; - - page = alloc_pages(IONIC_PAGE_GFP_MASK, 0); - if (unlikely(!page)) { - net_err_ratelimited("%s: %s page alloc failed\n", - dev_name(dev), q->name); - q_to_rx_stats(q)->alloc_err++; - return -ENOMEM; - } - - dma_addr = dma_map_page(dev, page, 0, - IONIC_PAGE_SIZE, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, dma_addr))) { - __free_pages(page, 0); - net_err_ratelimited("%s: %s dma map failed\n", - dev_name(dev), q->name); - q_to_rx_stats(q)->dma_map_err++; - return -EIO; - } - - buf_info->dma_addr = dma_addr; - buf_info->page = page; - buf_info->page_offset = 0; - - return 0; -} - -static void ionic_rx_page_free(struct ionic_queue *q, - struct ionic_buf_info *buf_info) -{ - struct device *dev = q->dev; - - if (unlikely(!buf_info)) { - net_err_ratelimited("%s: %s invalid buf_info in free\n", - dev_name(dev), q->name); - return; - } - if (!buf_info->page) return; - dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE); - __free_pages(buf_info->page, 0); + page_pool_put_full_page(q->page_pool, buf_info->page, recycle_direct); buf_info->page = NULL; + buf_info->len = 0; + buf_info->page_offset = 0; } -static bool ionic_rx_buf_recycle(struct ionic_queue *q, - struct ionic_buf_info *buf_info, u32 len) -{ - u32 size; - - /* don't re-use pages allocated in low-mem condition */ - if (page_is_pfmemalloc(buf_info->page)) - return false; - - /* don't re-use buffers from non-local numa nodes */ - if (page_to_nid(buf_info->page) != numa_mem_id()) - return false; - - size = ALIGN(len, q->xdp_rxq_info ? IONIC_PAGE_SIZE : IONIC_PAGE_SPLIT_SZ); - buf_info->page_offset += size; - if (buf_info->page_offset >= IONIC_PAGE_SIZE) - return false; - get_page(buf_info->page); +static void ionic_rx_put_buf(struct ionic_queue *q, + struct ionic_buf_info *buf_info) +{ + __ionic_rx_put_buf(q, buf_info, false); +} - return true; +static void ionic_rx_put_buf_direct(struct ionic_queue *q, + struct ionic_buf_info *buf_info) +{ + __ionic_rx_put_buf(q, buf_info, true); } static void ionic_rx_add_skb_frag(struct ionic_queue *q, struct sk_buff *skb, struct ionic_buf_info *buf_info, - u32 off, u32 len, + u32 headroom, u32 len, bool synced) { if (!synced) - dma_sync_single_range_for_cpu(q->dev, ionic_rx_buf_pa(buf_info), - off, len, DMA_FROM_DEVICE); + page_pool_dma_sync_for_cpu(q->page_pool, + buf_info->page, + buf_info->page_offset + headroom, + len); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - buf_info->page, buf_info->page_offset + off, - len, - IONIC_PAGE_SIZE); + buf_info->page, buf_info->page_offset + headroom, + len, buf_info->len); - if (!ionic_rx_buf_recycle(q, buf_info, len)) { - dma_unmap_page(q->dev, buf_info->dma_addr, - IONIC_PAGE_SIZE, DMA_FROM_DEVICE); - buf_info->page = NULL; - } + /* napi_gro_frags() will release/recycle the + * page_pool buffers from the frags list + */ + buf_info->page = NULL; + buf_info->len = 0; + buf_info->page_offset = 0; } static struct sk_buff *ionic_rx_build_skb(struct ionic_queue *q, @@ -244,12 +194,13 @@ static struct sk_buff *ionic_rx_build_skb(struct ionic_queue *q, q_to_rx_stats(q)->alloc_err++; return NULL; } + skb_mark_for_recycle(skb); if (headroom) frag_len = min_t(u16, len, IONIC_XDP_MAX_LINEAR_MTU + VLAN_ETH_HLEN); else - frag_len = min_t(u16, len, ionic_rx_buf_size(buf_info)); + frag_len = min_t(u16, len, IONIC_PAGE_SIZE); if (unlikely(!buf_info->page)) goto err_bad_buf_page; @@ -260,7 +211,7 @@ static struct sk_buff *ionic_rx_build_skb(struct ionic_queue *q, for (i = 0; i < num_sg_elems; i++, buf_info++) { if (unlikely(!buf_info->page)) goto err_bad_buf_page; - frag_len = min_t(u16, len, ionic_rx_buf_size(buf_info)); + frag_len = min_t(u16, len, buf_info->len); ionic_rx_add_skb_frag(q, skb, buf_info, 0, frag_len, synced); len -= frag_len; } @@ -277,11 +228,13 @@ static struct sk_buff *ionic_rx_copybreak(struct net_device *netdev, struct ionic_rx_desc_info *desc_info, unsigned int headroom, unsigned int len, + unsigned int num_sg_elems, bool synced) { struct ionic_buf_info *buf_info; struct device *dev = q->dev; struct sk_buff *skb; + int i; buf_info = &desc_info->bufs[0]; @@ -292,54 +245,52 @@ static struct sk_buff *ionic_rx_copybreak(struct net_device *netdev, q_to_rx_stats(q)->alloc_err++; return NULL; } - - if (unlikely(!buf_info->page)) { - dev_kfree_skb(skb); - return NULL; - } + skb_mark_for_recycle(skb); if (!synced) - dma_sync_single_range_for_cpu(dev, ionic_rx_buf_pa(buf_info), - headroom, len, DMA_FROM_DEVICE); + page_pool_dma_sync_for_cpu(q->page_pool, + buf_info->page, + buf_info->page_offset + headroom, + len); + skb_copy_to_linear_data(skb, ionic_rx_buf_va(buf_info) + headroom, len); - dma_sync_single_range_for_device(dev, ionic_rx_buf_pa(buf_info), - headroom, len, DMA_FROM_DEVICE); skb_put(skb, len); skb->protocol = eth_type_trans(skb, netdev); + /* recycle the Rx buffer now that we're done with it */ + ionic_rx_put_buf_direct(q, buf_info); + buf_info++; + for (i = 0; i < num_sg_elems; i++, buf_info++) + ionic_rx_put_buf_direct(q, buf_info); + return skb; } static void ionic_xdp_tx_desc_clean(struct ionic_queue *q, - struct ionic_tx_desc_info *desc_info) + struct ionic_tx_desc_info *desc_info, + bool in_napi) { - unsigned int nbufs = desc_info->nbufs; - struct ionic_buf_info *buf_info; - struct device *dev = q->dev; - int i; + struct xdp_frame_bulk bq; - if (!nbufs) + if (!desc_info->nbufs) return; - buf_info = desc_info->bufs; - dma_unmap_single(dev, buf_info->dma_addr, - buf_info->len, DMA_TO_DEVICE); - if (desc_info->act == XDP_TX) - __free_pages(buf_info->page, 0); - buf_info->page = NULL; + xdp_frame_bulk_init(&bq); + rcu_read_lock(); /* need for xdp_return_frame_bulk */ - buf_info++; - for (i = 1; i < nbufs + 1 && buf_info->page; i++, buf_info++) { - dma_unmap_page(dev, buf_info->dma_addr, - buf_info->len, DMA_TO_DEVICE); - if (desc_info->act == XDP_TX) - __free_pages(buf_info->page, 0); - buf_info->page = NULL; + if (desc_info->act == XDP_TX) { + if (likely(in_napi)) + xdp_return_frame_rx_napi(desc_info->xdpf); + else + xdp_return_frame(desc_info->xdpf); + } else if (desc_info->act == XDP_REDIRECT) { + ionic_tx_desc_unmap_bufs(q, desc_info); + xdp_return_frame_bulk(desc_info->xdpf, &bq); } - if (desc_info->act == XDP_REDIRECT) - xdp_return_frame(desc_info->xdpf); + xdp_flush_frame_bulk(&bq); + rcu_read_unlock(); desc_info->nbufs = 0; desc_info->xdpf = NULL; @@ -363,9 +314,17 @@ static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame, buf_info = desc_info->bufs; stats = q_to_tx_stats(q); - dma_addr = ionic_tx_map_single(q, frame->data, len); - if (!dma_addr) - return -EIO; + if (act == XDP_TX) { + dma_addr = page_pool_get_dma_addr(page) + + off + XDP_PACKET_HEADROOM; + dma_sync_single_for_device(q->dev, dma_addr, + len, DMA_TO_DEVICE); + } else /* XDP_REDIRECT */ { + dma_addr = ionic_tx_map_single(q, frame->data, len); + if (!dma_addr) + return -EIO; + } + buf_info->dma_addr = dma_addr; buf_info->len = len; buf_info->page = page; @@ -387,10 +346,21 @@ static int ionic_xdp_post_frame(struct ionic_queue *q, struct xdp_frame *frame, frag = sinfo->frags; elem = ionic_tx_sg_elems(q); for (i = 0; i < sinfo->nr_frags; i++, frag++, bi++) { - dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag)); - if (!dma_addr) { - ionic_tx_desc_unmap_bufs(q, desc_info); - return -EIO; + if (act == XDP_TX) { + struct page *pg = skb_frag_page(frag); + + dma_addr = page_pool_get_dma_addr(pg) + + skb_frag_off(frag); + dma_sync_single_for_device(q->dev, dma_addr, + skb_frag_size(frag), + DMA_TO_DEVICE); + } else { + dma_addr = ionic_tx_map_frag(q, frag, 0, + skb_frag_size(frag)); + if (dma_mapping_error(q->dev, dma_addr)) { + ionic_tx_desc_unmap_bufs(q, desc_info); + return -EIO; + } } bi->dma_addr = dma_addr; bi->len = skb_frag_size(frag); @@ -481,15 +451,13 @@ int ionic_xdp_xmit(struct net_device *netdev, int n, return nxmit; } -static void ionic_xdp_rx_put_bufs(struct ionic_queue *q, - struct ionic_buf_info *buf_info, - int nbufs) +static void ionic_xdp_rx_unlink_bufs(struct ionic_queue *q, + struct ionic_buf_info *buf_info, + int nbufs) { int i; for (i = 0; i < nbufs; i++) { - dma_unmap_page(q->dev, buf_info->dma_addr, - IONIC_PAGE_SIZE, DMA_FROM_DEVICE); buf_info->page = NULL; buf_info++; } @@ -516,11 +484,9 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, frag_len = min_t(u16, len, IONIC_XDP_MAX_LINEAR_MTU + VLAN_ETH_HLEN); xdp_prepare_buff(&xdp_buf, ionic_rx_buf_va(buf_info), XDP_PACKET_HEADROOM, frag_len, false); - - dma_sync_single_range_for_cpu(rxq->dev, ionic_rx_buf_pa(buf_info), - XDP_PACKET_HEADROOM, frag_len, - DMA_FROM_DEVICE); - + page_pool_dma_sync_for_cpu(rxq->page_pool, buf_info->page, + buf_info->page_offset + XDP_PACKET_HEADROOM, + frag_len); prefetchw(&xdp_buf.data_hard_start); /* We limit MTU size to one buffer if !xdp_has_frags, so @@ -542,15 +508,16 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, do { if (unlikely(sinfo->nr_frags >= MAX_SKB_FRAGS)) { err = -ENOSPC; - goto out_xdp_abort; + break; } frag = &sinfo->frags[sinfo->nr_frags]; sinfo->nr_frags++; bi++; - frag_len = min_t(u16, remain_len, ionic_rx_buf_size(bi)); - dma_sync_single_range_for_cpu(rxq->dev, ionic_rx_buf_pa(bi), - 0, frag_len, DMA_FROM_DEVICE); + frag_len = min_t(u16, remain_len, bi->len); + page_pool_dma_sync_for_cpu(rxq->page_pool, bi->page, + buf_info->page_offset, + frag_len); skb_frag_fill_page_desc(frag, bi->page, 0, frag_len); sinfo->xdp_frags_size += frag_len; remain_len -= frag_len; @@ -569,14 +536,16 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, return false; /* false = we didn't consume the packet */ case XDP_DROP: - ionic_rx_page_free(rxq, buf_info); + ionic_rx_put_buf_direct(rxq, buf_info); stats->xdp_drop++; break; case XDP_TX: xdpf = xdp_convert_buff_to_frame(&xdp_buf); - if (!xdpf) - goto out_xdp_abort; + if (!xdpf) { + err = -ENOSPC; + break; + } txq = rxq->partner; nq = netdev_get_tx_queue(netdev, txq->index); @@ -588,7 +557,8 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, ionic_q_space_avail(txq), 1, 1)) { __netif_tx_unlock(nq); - goto out_xdp_abort; + err = -EIO; + break; } err = ionic_xdp_post_frame(txq, xdpf, XDP_TX, @@ -598,49 +568,47 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats, __netif_tx_unlock(nq); if (unlikely(err)) { netdev_dbg(netdev, "tx ionic_xdp_post_frame err %d\n", err); - goto out_xdp_abort; + break; } - ionic_xdp_rx_put_bufs(rxq, buf_info, nbufs); + ionic_xdp_rx_unlink_bufs(rxq, buf_info, nbufs); stats->xdp_tx++; - - /* the Tx completion will free the buffers */ break; case XDP_REDIRECT: err = xdp_do_redirect(netdev, &xdp_buf, xdp_prog); if (unlikely(err)) { netdev_dbg(netdev, "xdp_do_redirect err %d\n", err); - goto out_xdp_abort; + break; } - ionic_xdp_rx_put_bufs(rxq, buf_info, nbufs); + ionic_xdp_rx_unlink_bufs(rxq, buf_info, nbufs); rxq->xdp_flush = true; stats->xdp_redirect++; break; case XDP_ABORTED: default: - goto out_xdp_abort; + err = -EIO; + break; } - return true; - -out_xdp_abort: - trace_xdp_exception(netdev, xdp_prog, xdp_action); - ionic_rx_page_free(rxq, buf_info); - stats->xdp_aborted++; + if (err) { + ionic_rx_put_buf_direct(rxq, buf_info); + trace_xdp_exception(netdev, xdp_prog, xdp_action); + stats->xdp_aborted++; + } return true; } static void ionic_rx_clean(struct ionic_queue *q, struct ionic_rx_desc_info *desc_info, - struct ionic_rxq_comp *comp) + struct ionic_rxq_comp *comp, + struct bpf_prog *xdp_prog) { struct net_device *netdev = q->lif->netdev; struct ionic_qcq *qcq = q_to_qcq(q); struct ionic_rx_stats *stats; - struct bpf_prog *xdp_prog; - unsigned int headroom; + unsigned int headroom = 0; struct sk_buff *skb; bool synced = false; bool use_copybreak; @@ -648,7 +616,14 @@ static void ionic_rx_clean(struct ionic_queue *q, stats = q_to_rx_stats(q); - if (comp->status) { + if (unlikely(comp->status)) { + /* Most likely status==2 and the pkt received was bigger + * than the buffer available: comp->len will show the + * pkt size received that didn't fit the advertised desc.len + */ + dev_dbg(q->dev, "q%d drop comp->status %d comp->len %d desc->len %d\n", + q->index, comp->status, comp->len, q->rxq[q->head_idx].len); + stats->dropped++; return; } @@ -657,18 +632,18 @@ static void ionic_rx_clean(struct ionic_queue *q, stats->pkts++; stats->bytes += len; - xdp_prog = READ_ONCE(q->lif->xdp_prog); if (xdp_prog) { if (ionic_run_xdp(stats, netdev, xdp_prog, q, desc_info->bufs, len)) return; synced = true; + headroom = XDP_PACKET_HEADROOM; } - headroom = q->xdp_rxq_info ? XDP_PACKET_HEADROOM : 0; use_copybreak = len <= q->lif->rx_copybreak; if (use_copybreak) skb = ionic_rx_copybreak(netdev, q, desc_info, - headroom, len, synced); + headroom, len, + comp->num_sg_elems, synced); else skb = ionic_rx_build_skb(q, desc_info, headroom, len, comp->num_sg_elems, synced); @@ -744,7 +719,7 @@ static void ionic_rx_clean(struct ionic_queue *q, napi_gro_frags(&qcq->napi); } -bool ionic_rx_service(struct ionic_cq *cq) +static bool __ionic_rx_service(struct ionic_cq *cq, struct bpf_prog *xdp_prog) { struct ionic_rx_desc_info *desc_info; struct ionic_queue *q = cq->bound_q; @@ -766,11 +741,16 @@ bool ionic_rx_service(struct ionic_cq *cq) q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); /* clean the related q entry, only one per qc completion */ - ionic_rx_clean(q, desc_info, comp); + ionic_rx_clean(q, desc_info, comp, xdp_prog); return true; } +bool ionic_rx_service(struct ionic_cq *cq) +{ + return __ionic_rx_service(cq, NULL); +} + static inline void ionic_write_cmb_desc(struct ionic_queue *q, void *desc) { @@ -781,7 +761,7 @@ static inline void ionic_write_cmb_desc(struct ionic_queue *q, memcpy_toio(&q->cmb_txq[q->head_idx], desc, sizeof(q->cmb_txq[0])); } -void ionic_rx_fill(struct ionic_queue *q) +void ionic_rx_fill(struct ionic_queue *q, struct bpf_prog *xdp_prog) { struct net_device *netdev = q->lif->netdev; struct ionic_rx_desc_info *desc_info; @@ -789,6 +769,9 @@ void ionic_rx_fill(struct ionic_queue *q) struct ionic_buf_info *buf_info; unsigned int fill_threshold; struct ionic_rxq_desc *desc; + unsigned int first_frag_len; + unsigned int first_buf_len; + unsigned int headroom = 0; unsigned int remain_len; unsigned int frag_len; unsigned int nfrags; @@ -806,35 +789,43 @@ void ionic_rx_fill(struct ionic_queue *q) len = netdev->mtu + VLAN_ETH_HLEN; - for (i = n_fill; i; i--) { - unsigned int headroom; - unsigned int buf_len; + if (xdp_prog) { + /* Always alloc the full size buffer, but only need + * the actual frag_len in the descriptor + * XDP uses space in the first buffer, so account for + * head room, tail room, and ip header in the first frag size. + */ + headroom = XDP_PACKET_HEADROOM; + first_buf_len = IONIC_XDP_MAX_LINEAR_MTU + VLAN_ETH_HLEN + headroom; + first_frag_len = min_t(u16, len + headroom, first_buf_len); + } else { + /* Use MTU size if smaller than max buffer size */ + first_frag_len = min_t(u16, len, IONIC_PAGE_SIZE); + first_buf_len = first_frag_len; + } + for (i = n_fill; i; i--) { + /* fill main descriptor - buf[0] */ nfrags = 0; remain_len = len; desc = &q->rxq[q->head_idx]; desc_info = &q->rx_info[q->head_idx]; buf_info = &desc_info->bufs[0]; - if (!buf_info->page) { /* alloc a new buffer? */ - if (unlikely(ionic_rx_page_alloc(q, buf_info))) { - desc->addr = 0; - desc->len = 0; - return; - } + buf_info->len = first_buf_len; + frag_len = first_frag_len - headroom; + + /* get a new buffer if we can't reuse one */ + if (!buf_info->page) + buf_info->page = page_pool_alloc(q->page_pool, + &buf_info->page_offset, + &buf_info->len, + GFP_ATOMIC); + if (unlikely(!buf_info->page)) { + buf_info->len = 0; + return; } - /* fill main descriptor - buf[0] - * XDP uses space in the first buffer, so account for - * head room, tail room, and ip header in the first frag size. - */ - headroom = q->xdp_rxq_info ? XDP_PACKET_HEADROOM : 0; - if (q->xdp_rxq_info) - buf_len = IONIC_XDP_MAX_LINEAR_MTU + VLAN_ETH_HLEN; - else - buf_len = ionic_rx_buf_size(buf_info); - frag_len = min_t(u16, len, buf_len); - desc->addr = cpu_to_le64(ionic_rx_buf_pa(buf_info) + headroom); desc->len = cpu_to_le16(frag_len); remain_len -= frag_len; @@ -844,16 +835,26 @@ void ionic_rx_fill(struct ionic_queue *q) /* fill sg descriptors - buf[1..n] */ sg_elem = q->rxq_sgl[q->head_idx].elems; for (j = 0; remain_len > 0 && j < q->max_sg_elems; j++, sg_elem++) { - if (!buf_info->page) { /* alloc a new sg buffer? */ - if (unlikely(ionic_rx_page_alloc(q, buf_info))) { - sg_elem->addr = 0; - sg_elem->len = 0; + frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE); + + /* Recycle any leftover buffers that are too small to reuse */ + if (unlikely(buf_info->page && buf_info->len < frag_len)) + ionic_rx_put_buf_direct(q, buf_info); + + /* Get new buffer if needed */ + if (!buf_info->page) { + buf_info->len = frag_len; + buf_info->page = page_pool_alloc(q->page_pool, + &buf_info->page_offset, + &buf_info->len, + GFP_ATOMIC); + if (unlikely(!buf_info->page)) { + buf_info->len = 0; return; } } sg_elem->addr = cpu_to_le64(ionic_rx_buf_pa(buf_info)); - frag_len = min_t(u16, remain_len, ionic_rx_buf_size(buf_info)); sg_elem->len = cpu_to_le16(frag_len); remain_len -= frag_len; buf_info++; @@ -883,17 +884,12 @@ void ionic_rx_fill(struct ionic_queue *q) void ionic_rx_empty(struct ionic_queue *q) { struct ionic_rx_desc_info *desc_info; - struct ionic_buf_info *buf_info; unsigned int i, j; for (i = 0; i < q->num_descs; i++) { desc_info = &q->rx_info[i]; - for (j = 0; j < ARRAY_SIZE(desc_info->bufs); j++) { - buf_info = &desc_info->bufs[j]; - if (buf_info->page) - ionic_rx_page_free(q, buf_info); - } - + for (j = 0; j < ARRAY_SIZE(desc_info->bufs); j++) + ionic_rx_put_buf(q, &desc_info->bufs[j]); desc_info->nbufs = 0; } @@ -974,6 +970,32 @@ static void ionic_xdp_do_flush(struct ionic_cq *cq) } } +static unsigned int ionic_rx_cq_service(struct ionic_cq *cq, + unsigned int work_to_do) +{ + struct ionic_queue *q = cq->bound_q; + unsigned int work_done = 0; + struct bpf_prog *xdp_prog; + + if (work_to_do == 0) + return 0; + + xdp_prog = READ_ONCE(q->xdp_prog); + while (__ionic_rx_service(cq, xdp_prog)) { + if (cq->tail_idx == cq->num_descs - 1) + cq->done_color = !cq->done_color; + + cq->tail_idx = (cq->tail_idx + 1) & (cq->num_descs - 1); + + if (++work_done >= work_to_do) + break; + } + ionic_rx_fill(q, xdp_prog); + ionic_xdp_do_flush(cq); + + return work_done; +} + int ionic_rx_napi(struct napi_struct *napi, int budget) { struct ionic_qcq *qcq = napi_to_qcq(napi); @@ -984,12 +1006,8 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) if (unlikely(!budget)) return budget; - work_done = ionic_cq_service(cq, budget, - ionic_rx_service, NULL, NULL); - - ionic_rx_fill(cq->bound_q); + work_done = ionic_rx_cq_service(cq, budget); - ionic_xdp_do_flush(cq); if (work_done < budget && napi_complete_done(napi, work_done)) { ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR); flags |= IONIC_INTR_CRED_UNMASK; @@ -1030,12 +1048,8 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) if (unlikely(!budget)) return budget; - rx_work_done = ionic_cq_service(rxcq, budget, - ionic_rx_service, NULL, NULL); - - ionic_rx_fill(rxcq->bound_q); + rx_work_done = ionic_rx_cq_service(rxcq, budget); - ionic_xdp_do_flush(rxcq); if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) { ionic_dim_update(rxqcq, 0); flags |= IONIC_INTR_CRED_UNMASK; @@ -1166,7 +1180,7 @@ static void ionic_tx_clean(struct ionic_queue *q, struct sk_buff *skb; if (desc_info->xdpf) { - ionic_xdp_tx_desc_clean(q->partner, desc_info); + ionic_xdp_tx_desc_clean(q->partner, desc_info, in_napi); stats->clean++; if (unlikely(__netif_subqueue_stopped(q->lif->netdev, q->index))) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h index 9e73e324e7a1..b2b9a2dc9eb8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.h @@ -4,9 +4,11 @@ #ifndef _IONIC_TXRX_H_ #define _IONIC_TXRX_H_ +struct bpf_prog; + void ionic_tx_flush(struct ionic_cq *cq); -void ionic_rx_fill(struct ionic_queue *q); +void ionic_rx_fill(struct ionic_queue *q, struct bpf_prog *xdp_prog); void ionic_rx_empty(struct ionic_queue *q); void ionic_tx_empty(struct ionic_queue *q); int ionic_rx_napi(struct napi_struct *napi, int budget); diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 63e3dac4d5f7..9d6399a5c780 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -326,25 +326,18 @@ int qede_ptp_get_ts_info(struct qede_dev *edev, struct kernel_ethtool_ts_info *i struct qede_ptp *ptp = edev->ptp; if (!ptp) { - info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; - info->phc_index = -1; + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE; return 0; } info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; if (ptp->clock) info->phc_index = ptp_clock_index(ptp->clock); - else - info->phc_index = -1; info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_PTP_V1_L4_EVENT) | diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 3cb1c4f5c91a..45ac8befba29 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -578,7 +578,7 @@ struct rtl8169_counters { __le64 rx_broadcast; __le32 rx_multicast; __le16 tx_aborted; - __le16 tx_underun; + __le16 tx_underrun; }; struct rtl8169_tc_offsets { @@ -1843,7 +1843,7 @@ static void rtl8169_get_ethtool_stats(struct net_device *dev, data[9] = le64_to_cpu(counters->rx_broadcast); data[10] = le32_to_cpu(counters->rx_multicast); data[11] = le16_to_cpu(counters->tx_aborted); - data[12] = le16_to_cpu(counters->tx_underun); + data[12] = le16_to_cpu(counters->tx_underrun); } static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data) diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 7882f2c0e1a4..869183e1565e 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -98,7 +98,7 @@ struct rtase_counters { __le64 rx_broadcast; __le32 rx_multicast; __le16 tx_aborted; - __le16 tx_underun; + __le16 tx_underrun; } __packed; static void rtase_w8(const struct rtase_private *tp, u16 reg, u8 val8) @@ -1619,8 +1619,8 @@ static void rtase_dump_state(const struct net_device *dev) le32_to_cpu(counters->rx_multicast)); netdev_err(dev, "tx_aborted %d\n", le16_to_cpu(counters->tx_aborted)); - netdev_err(dev, "tx_underun %d\n", - le16_to_cpu(counters->tx_underun)); + netdev_err(dev, "tx_underrun %d\n", + le16_to_cpu(counters->tx_underrun)); } static void rtase_tx_timeout(struct net_device *dev, unsigned int txqueue) diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 848b1923133a..bb1930818beb 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -230,11 +230,6 @@ static int efx_ethtool_get_ts_info(struct net_device *net_dev, { struct efx_nic *efx = efx_netdev_priv(net_dev); - /* Software capabilities */ - ts_info->so_timestamping = (SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE); - ts_info->phc_index = -1; - efx_ptp_get_ts_info(efx, ts_info); return 0; } diff --git a/drivers/net/ethernet/sfc/siena/ethtool.c b/drivers/net/ethernet/sfc/siena/ethtool.c index 88ddc226b012..c5ad84db9613 100644 --- a/drivers/net/ethernet/sfc/siena/ethtool.c +++ b/drivers/net/ethernet/sfc/siena/ethtool.c @@ -230,11 +230,6 @@ static int efx_ethtool_get_ts_info(struct net_device *net_dev, { struct efx_nic *efx = netdev_priv(net_dev); - /* Software capabilities */ - ts_info->so_timestamping = (SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE); - ts_info->phc_index = -1; - efx_siena_ptp_get_ts_info(efx, ts_info); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 31c387cc5f26..a1858f083eef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -58,10 +58,6 @@ static void dwmac4_core_init(struct mac_device_info *hw, if (hw->pcs) value |= GMAC_PCS_IRQ_DEFAULT; - /* Enable FPE interrupt */ - if ((GMAC_HW_FEAT_FPESEL & readl(ioaddr + GMAC_HW_FEATURE3)) >> 26) - value |= GMAC_INT_FPE_EN; - writel(value, ioaddr + GMAC_INT_EN); if (GMAC_INT_DEFAULT_ENABLE & GMAC_INT_TSIE) @@ -1268,6 +1264,9 @@ const struct stmmac_ops dwmac410_ops = { .fpe_configure = dwmac5_fpe_configure, .fpe_send_mpacket = dwmac5_fpe_send_mpacket, .fpe_irq_status = dwmac5_fpe_irq_status, + .fpe_get_add_frag_size = dwmac5_fpe_get_add_frag_size, + .fpe_set_add_frag_size = dwmac5_fpe_set_add_frag_size, + .fpe_map_preemption_class = dwmac5_fpe_map_preemption_class, .add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr, .del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr, .restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr, @@ -1320,6 +1319,9 @@ const struct stmmac_ops dwmac510_ops = { .fpe_configure = dwmac5_fpe_configure, .fpe_send_mpacket = dwmac5_fpe_send_mpacket, .fpe_irq_status = dwmac5_fpe_irq_status, + .fpe_get_add_frag_size = dwmac5_fpe_get_add_frag_size, + .fpe_set_add_frag_size = dwmac5_fpe_set_add_frag_size, + .fpe_map_preemption_class = dwmac5_fpe_map_preemption_class, .add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr, .del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr, .restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index e02cebc3f1b7..08add508db84 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -575,11 +575,11 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, u32 num_txq, u32 num_rxq, - bool enable) + bool tx_enable, bool pmac_enable) { u32 value; - if (enable) { + if (tx_enable) { cfg->fpe_csr = EFPE; value = readl(ioaddr + GMAC_RXQ_CTRL1); value &= ~GMAC_RXQCTRL_FPRQ; @@ -589,6 +589,21 @@ void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, cfg->fpe_csr = 0; } writel(cfg->fpe_csr, ioaddr + MAC_FPE_CTRL_STS); + + value = readl(ioaddr + GMAC_INT_EN); + + if (pmac_enable) { + if (!(value & GMAC_INT_FPE_EN)) { + /* Dummy read to clear any pending masked interrupts */ + readl(ioaddr + MAC_FPE_CTRL_STS); + + value |= GMAC_INT_FPE_EN; + } + } else { + value &= ~GMAC_INT_FPE_EN; + } + + writel(value, ioaddr + GMAC_INT_EN); } int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev) @@ -605,22 +620,22 @@ int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev) if (value & TRSP) { status |= FPE_EVENT_TRSP; - netdev_info(dev, "FPE: Respond mPacket is transmitted\n"); + netdev_dbg(dev, "FPE: Respond mPacket is transmitted\n"); } if (value & TVER) { status |= FPE_EVENT_TVER; - netdev_info(dev, "FPE: Verify mPacket is transmitted\n"); + netdev_dbg(dev, "FPE: Verify mPacket is transmitted\n"); } if (value & RRSP) { status |= FPE_EVENT_RRSP; - netdev_info(dev, "FPE: Respond mPacket is received\n"); + netdev_dbg(dev, "FPE: Respond mPacket is received\n"); } if (value & RVER) { status |= FPE_EVENT_RVER; - netdev_info(dev, "FPE: Verify mPacket is received\n"); + netdev_dbg(dev, "FPE: Verify mPacket is received\n"); } return status; @@ -638,3 +653,72 @@ void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, writel(value, ioaddr + MAC_FPE_CTRL_STS); } + +int dwmac5_fpe_get_add_frag_size(const void __iomem *ioaddr) +{ + return FIELD_GET(DWMAC5_ADD_FRAG_SZ, readl(ioaddr + MTL_FPE_CTRL_STS)); +} + +void dwmac5_fpe_set_add_frag_size(void __iomem *ioaddr, u32 add_frag_size) +{ + u32 value; + + value = readl(ioaddr + MTL_FPE_CTRL_STS); + writel(u32_replace_bits(value, add_frag_size, DWMAC5_ADD_FRAG_SZ), + ioaddr + MTL_FPE_CTRL_STS); +} + +#define ALG_ERR_MSG "TX algorithm SP is not suitable for one-to-many mapping" +#define WEIGHT_ERR_MSG "TXQ weight %u differs across other TXQs in TC: [%u]" + +int dwmac5_fpe_map_preemption_class(struct net_device *ndev, + struct netlink_ext_ack *extack, u32 pclass) +{ + u32 val, offset, count, queue_weight, preemptible_txqs = 0; + struct stmmac_priv *priv = netdev_priv(ndev); + u32 num_tc = ndev->num_tc; + + if (!pclass) + goto update_mapping; + + /* DWMAC CORE4+ can not program TC:TXQ mapping to hardware. + * + * Synopsys Databook: + * "The number of Tx DMA channels is equal to the number of Tx queues, + * and is direct one-to-one mapping." + */ + for (u32 tc = 0; tc < num_tc; tc++) { + count = ndev->tc_to_txq[tc].count; + offset = ndev->tc_to_txq[tc].offset; + + if (pclass & BIT(tc)) + preemptible_txqs |= GENMASK(offset + count - 1, offset); + + /* This is 1:1 mapping, go to next TC */ + if (count == 1) + continue; + + if (priv->plat->tx_sched_algorithm == MTL_TX_ALGORITHM_SP) { + NL_SET_ERR_MSG_MOD(extack, ALG_ERR_MSG); + return -EINVAL; + } + + queue_weight = priv->plat->tx_queues_cfg[offset].weight; + + for (u32 i = 1; i < count; i++) { + if (priv->plat->tx_queues_cfg[offset + i].weight != + queue_weight) { + NL_SET_ERR_MSG_FMT_MOD(extack, WEIGHT_ERR_MSG, + queue_weight, tc); + return -EINVAL; + } + } + } + +update_mapping: + val = readl(priv->ioaddr + MTL_FPE_CTRL_STS); + writel(u32_replace_bits(val, preemptible_txqs, DWMAC5_PREEMPTION_CLASS), + priv->ioaddr + MTL_FPE_CTRL_STS); + + return 0; +} diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h index bf33a51d229e..6c6eb6790e83 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h @@ -39,6 +39,12 @@ #define MAC_PPSx_INTERVAL(x) (0x00000b88 + ((x) * 0x10)) #define MAC_PPSx_WIDTH(x) (0x00000b8c + ((x) * 0x10)) +#define MTL_FPE_CTRL_STS 0x00000c90 +/* Preemption Classification */ +#define DWMAC5_PREEMPTION_CLASS GENMASK(15, 8) +/* Additional Fragment Size of preempted frames */ +#define DWMAC5_ADD_FRAG_SZ GENMASK(1, 0) + #define MTL_RXP_CONTROL_STATUS 0x00000ca0 #define RXPI BIT(31) #define NPE GENMASK(23, 16) @@ -104,10 +110,14 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, u32 sub_second_inc, u32 systime_flags); void dwmac5_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, u32 num_txq, u32 num_rxq, - bool enable); + bool tx_enable, bool pmac_enable); void dwmac5_fpe_send_mpacket(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, enum stmmac_mpacket_type type); int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev); +int dwmac5_fpe_get_add_frag_size(const void __iomem *ioaddr); +void dwmac5_fpe_set_add_frag_size(void __iomem *ioaddr, u32 add_frag_size); +int dwmac5_fpe_map_preemption_class(struct net_device *ndev, + struct netlink_ext_ack *extack, u32 pclass); #endif /* __DWMAC5_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index cbf2dd976ab1..f519d43738b0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -1504,13 +1504,14 @@ static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en, writel(value, ioaddr + XGMAC_RX_CONFIG); } -static void dwxgmac3_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, - u32 num_txq, - u32 num_rxq, bool enable) +static void dwxgmac3_fpe_configure(void __iomem *ioaddr, + struct stmmac_fpe_cfg *cfg, + u32 num_txq, u32 num_rxq, + bool tx_enable, bool pmac_enable) { u32 value; - if (!enable) { + if (!tx_enable) { value = readl(ioaddr + XGMAC_FPE_CTRL_STS); value &= ~XGMAC_EFPE; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 29367105df54..88cce28b2f98 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -171,7 +171,7 @@ static const struct stmmac_hwif_entry { .mac = &dwmac4_ops, .hwtimestamp = &stmmac_ptp, .mode = NULL, - .tc = &dwmac510_tc_ops, + .tc = &dwmac4_tc_ops, .mmc = &dwmac_mmc_ops, .est = &dwmac510_est_ops, .setup = dwmac4_setup, @@ -252,7 +252,7 @@ static const struct stmmac_hwif_entry { .mac = &dwxgmac210_ops, .hwtimestamp = &stmmac_ptp, .mode = NULL, - .tc = &dwmac510_tc_ops, + .tc = &dwxgmac_tc_ops, .mmc = &dwxgmac_mmc_ops, .est = &dwmac510_est_ops, .setup = dwxgmac2_setup, @@ -273,7 +273,7 @@ static const struct stmmac_hwif_entry { .mac = &dwxlgmac2_ops, .hwtimestamp = &stmmac_ptp, .mode = NULL, - .tc = &dwmac510_tc_ops, + .tc = &dwxgmac_tc_ops, .mmc = &dwxgmac_mmc_ops, .est = &dwmac510_est_ops, .setup = dwxlgmac2_setup, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 7e90f34b8c88..d5a9f01ecac5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -7,6 +7,7 @@ #include <linux/netdevice.h> #include <linux/stmmac.h> +#include <net/pkt_cls.h> #define stmmac_do_void_callback(__priv, __module, __cname, __arg0, __args...) \ ({ \ @@ -28,6 +29,8 @@ struct stmmac_extra_stats; struct stmmac_priv; struct stmmac_safety_stats; +struct stmmac_fpe_cfg; +enum stmmac_mpacket_type; struct dma_desc; struct dma_extended_desc; struct dma_edesc; @@ -419,11 +422,16 @@ struct stmmac_ops { void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr); void (*fpe_configure)(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, u32 num_txq, u32 num_rxq, - bool enable); + bool tx_enable, bool pmac_enable); void (*fpe_send_mpacket)(void __iomem *ioaddr, struct stmmac_fpe_cfg *cfg, enum stmmac_mpacket_type type); int (*fpe_irq_status)(void __iomem *ioaddr, struct net_device *dev); + int (*fpe_get_add_frag_size)(const void __iomem *ioaddr); + void (*fpe_set_add_frag_size)(void __iomem *ioaddr, u32 add_frag_size); + int (*fpe_map_preemption_class)(struct net_device *ndev, + struct netlink_ext_ack *extack, + u32 pclass); }; #define stmmac_core_init(__priv, __args...) \ @@ -528,6 +536,12 @@ struct stmmac_ops { stmmac_do_void_callback(__priv, mac, fpe_send_mpacket, __args) #define stmmac_fpe_irq_status(__priv, __args...) \ stmmac_do_callback(__priv, mac, fpe_irq_status, __args) +#define stmmac_fpe_get_add_frag_size(__priv, __args...) \ + stmmac_do_callback(__priv, mac, fpe_get_add_frag_size, __args) +#define stmmac_fpe_set_add_frag_size(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, fpe_set_add_frag_size, __args) +#define stmmac_fpe_map_preemption_class(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, fpe_map_preemption_class, __args) /* PTP and HW Timer helpers */ struct stmmac_hwtimestamp { @@ -615,6 +629,8 @@ struct stmmac_tc_ops { struct tc_etf_qopt_offload *qopt); int (*query_caps)(struct stmmac_priv *priv, struct tc_query_caps_base *base); + int (*setup_mqprio)(struct stmmac_priv *priv, + struct tc_mqprio_qopt_offload *qopt); }; #define stmmac_tc_init(__priv, __args...) \ @@ -631,6 +647,8 @@ struct stmmac_tc_ops { stmmac_do_callback(__priv, tc, setup_etf, __args) #define stmmac_tc_query_caps(__priv, __args...) \ stmmac_do_callback(__priv, tc, query_caps, __args) +#define stmmac_tc_setup_mqprio(__priv, __args...) \ + stmmac_do_callback(__priv, tc, setup_mqprio, __args) struct stmmac_counters; @@ -674,7 +692,9 @@ extern const struct stmmac_dma_ops dwmac4_dma_ops; extern const struct stmmac_ops dwmac410_ops; extern const struct stmmac_dma_ops dwmac410_dma_ops; extern const struct stmmac_ops dwmac510_ops; +extern const struct stmmac_tc_ops dwmac4_tc_ops; extern const struct stmmac_tc_ops dwmac510_tc_ops; +extern const struct stmmac_tc_ops dwxgmac_tc_ops; extern const struct stmmac_ops dwxgmac210_ops; extern const struct stmmac_ops dwxlgmac2_ops; extern const struct stmmac_dma_ops dwxgmac210_dma_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index b23b920eedb1..ea135203ff2e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -146,6 +146,32 @@ struct stmmac_channel { u32 index; }; +/* FPE link-partner hand-shaking mPacket type */ +enum stmmac_mpacket_type { + MPACKET_VERIFY = 0, + MPACKET_RESPONSE = 1, +}; + +#define STMMAC_FPE_MM_MAX_VERIFY_RETRIES 3 +#define STMMAC_FPE_MM_MAX_VERIFY_TIME_MS 128 + +struct stmmac_fpe_cfg { + /* Serialize access to MAC Merge state between ethtool requests + * and link state updates. + */ + spinlock_t lock; + + u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */ + + enum ethtool_mm_verify_status status; + struct timer_list verify_timer; + bool verify_enabled; + int verify_retries; + bool pmac_enabled; + u32 verify_time; + bool tx_enabled; +}; + struct stmmac_tc_entry { bool in_use; bool in_hw; @@ -339,11 +365,8 @@ struct stmmac_priv { struct workqueue_struct *wq; struct work_struct service_task; - /* Workqueue for handling FPE hand-shaking */ - unsigned long fpe_task_state; - struct workqueue_struct *fpe_wq; - struct work_struct fpe_task; - char wq_name[IFNAMSIZ + 4]; + /* Frame Preemption feature (FPE) */ + struct stmmac_fpe_cfg fpe_cfg; /* TC Handling */ unsigned int tc_entries_max; @@ -397,7 +420,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv); int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt); int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size); int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled); -void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable); +void stmmac_fpe_apply(struct stmmac_priv *priv); static inline bool stmmac_xdp_is_enabled(struct stmmac_priv *priv) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 220c582904f4..2a37592a6281 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -19,6 +19,7 @@ #include "stmmac.h" #include "dwmac_dma.h" #include "dwxgmac2.h" +#include "dwmac5.h" #define REG_SPACE_SIZE 0x1060 #define GMAC4_REG_SPACE_SIZE 0x116C @@ -1200,13 +1201,13 @@ static int stmmac_get_ts_info(struct net_device *dev, info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_TX_HARDWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RAW_HARDWARE; if (priv->ptp_clock) info->phc_index = ptp_clock_index(priv->ptp_clock); + else + info->phc_index = 0; info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON); @@ -1263,6 +1264,98 @@ static int stmmac_set_tunable(struct net_device *dev, return ret; } +static int stmmac_get_mm(struct net_device *ndev, + struct ethtool_mm_state *state) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + unsigned long flags; + u32 frag_size; + + if (!priv->dma_cap.fpesel) + return -EOPNOTSUPP; + + spin_lock_irqsave(&priv->fpe_cfg.lock, flags); + + state->max_verify_time = STMMAC_FPE_MM_MAX_VERIFY_TIME_MS; + state->verify_enabled = priv->fpe_cfg.verify_enabled; + state->pmac_enabled = priv->fpe_cfg.pmac_enabled; + state->verify_time = priv->fpe_cfg.verify_time; + state->tx_enabled = priv->fpe_cfg.tx_enabled; + state->verify_status = priv->fpe_cfg.status; + state->rx_min_frag_size = ETH_ZLEN; + + /* FPE active if common tx_enabled and + * (verification success or disabled(forced)) + */ + if (state->tx_enabled && + (state->verify_status == ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED || + state->verify_status == ETHTOOL_MM_VERIFY_STATUS_DISABLED)) + state->tx_active = true; + else + state->tx_active = false; + + frag_size = stmmac_fpe_get_add_frag_size(priv, priv->ioaddr); + state->tx_min_frag_size = ethtool_mm_frag_size_add_to_min(frag_size); + + spin_unlock_irqrestore(&priv->fpe_cfg.lock, flags); + + return 0; +} + +static int stmmac_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg, + struct netlink_ext_ack *extack) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg; + unsigned long flags; + u32 frag_size; + int err; + + err = ethtool_mm_frag_size_min_to_add(cfg->tx_min_frag_size, + &frag_size, extack); + if (err) + return err; + + /* Wait for the verification that's currently in progress to finish */ + timer_shutdown_sync(&fpe_cfg->verify_timer); + + spin_lock_irqsave(&fpe_cfg->lock, flags); + + fpe_cfg->verify_enabled = cfg->verify_enabled; + fpe_cfg->pmac_enabled = cfg->pmac_enabled; + fpe_cfg->verify_time = cfg->verify_time; + fpe_cfg->tx_enabled = cfg->tx_enabled; + + if (!cfg->verify_enabled) + fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_DISABLED; + + stmmac_fpe_set_add_frag_size(priv, priv->ioaddr, frag_size); + stmmac_fpe_apply(priv); + + spin_unlock_irqrestore(&fpe_cfg->lock, flags); + + return 0; +} + +static void stmmac_get_mm_stats(struct net_device *ndev, + struct ethtool_mm_stats *s) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + struct stmmac_counters *mmc = &priv->mmc; + + if (!priv->dma_cap.rmon) + return; + + stmmac_mmc_read(priv, priv->mmcaddr, mmc); + + s->MACMergeFrameAssErrorCount = mmc->mmc_rx_packet_assembly_err_cntr; + s->MACMergeFrameAssOkCount = mmc->mmc_rx_packet_assembly_ok_cntr; + s->MACMergeFrameSmdErrorCount = mmc->mmc_rx_packet_smd_err_cntr; + s->MACMergeFragCountRx = mmc->mmc_rx_fpe_fragment_cntr; + s->MACMergeFragCountTx = mmc->mmc_tx_fpe_fragment_cntr; + s->MACMergeHoldCount = mmc->mmc_tx_hold_req_cntr; +} + static const struct ethtool_ops stmmac_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, @@ -1301,6 +1394,9 @@ static const struct ethtool_ops stmmac_ethtool_ops = { .set_tunable = stmmac_set_tunable, .get_link_ksettings = stmmac_ethtool_get_link_ksettings, .set_link_ksettings = stmmac_ethtool_set_link_ksettings, + .get_mm = stmmac_get_mm, + .set_mm = stmmac_set_mm, + .get_mm_stats = stmmac_get_mm_stats, }; void stmmac_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d9fca8d1227c..d3895d7eecfc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -968,18 +968,31 @@ static void stmmac_mac_config(struct phylink_config *config, unsigned int mode, static void stmmac_fpe_link_state_handle(struct stmmac_priv *priv, bool is_up) { - struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg; - enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state; - enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state; - bool *hs_enable = &fpe_cfg->hs_enable; + struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg; + unsigned long flags; - if (is_up && *hs_enable) { - stmmac_fpe_send_mpacket(priv, priv->ioaddr, fpe_cfg, - MPACKET_VERIFY); + timer_shutdown_sync(&fpe_cfg->verify_timer); + + spin_lock_irqsave(&fpe_cfg->lock, flags); + + if (is_up && fpe_cfg->pmac_enabled) { + /* VERIFY process requires pmac enabled when NIC comes up */ + stmmac_fpe_configure(priv, priv->ioaddr, fpe_cfg, + priv->plat->tx_queues_to_use, + priv->plat->rx_queues_to_use, + false, true); + + /* New link => maybe new partner => new verification process */ + stmmac_fpe_apply(priv); } else { - *lo_state = FPE_STATE_OFF; - *lp_state = FPE_STATE_OFF; + /* No link => turn off EFPE */ + stmmac_fpe_configure(priv, priv->ioaddr, fpe_cfg, + priv->plat->tx_queues_to_use, + priv->plat->rx_queues_to_use, + false, false); } + + spin_unlock_irqrestore(&fpe_cfg->lock, flags); } static void stmmac_mac_link_down(struct phylink_config *config, @@ -3358,27 +3371,6 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv) } } -static int stmmac_fpe_start_wq(struct stmmac_priv *priv) -{ - char *name; - - clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state); - clear_bit(__FPE_REMOVING, &priv->fpe_task_state); - - name = priv->wq_name; - sprintf(name, "%s-fpe", priv->dev->name); - - priv->fpe_wq = create_singlethread_workqueue(name); - if (!priv->fpe_wq) { - netdev_err(priv->dev, "%s: Failed to create workqueue\n", name); - - return -ENOMEM; - } - netdev_info(priv->dev, "FPE workqueue start"); - - return 0; -} - /** * stmmac_hw_setup - setup mac in a usable state. * @dev : pointer to the device structure. @@ -3533,13 +3525,6 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) stmmac_set_hw_vlan_mode(priv, priv->hw); - if (priv->dma_cap.fpesel) { - stmmac_fpe_start_wq(priv); - - if (priv->plat->fpe_cfg->enable) - stmmac_fpe_handshake(priv, true); - } - return 0; } @@ -4036,18 +4021,6 @@ static int stmmac_open(struct net_device *dev) return ret; } -static void stmmac_fpe_stop_wq(struct stmmac_priv *priv) -{ - set_bit(__FPE_REMOVING, &priv->fpe_task_state); - - if (priv->fpe_wq) { - destroy_workqueue(priv->fpe_wq); - priv->fpe_wq = NULL; - } - - netdev_info(priv->dev, "FPE workqueue stop"); -} - /** * stmmac_release - close entry point of the driver * @dev : device pointer. @@ -4095,10 +4068,10 @@ static int stmmac_release(struct net_device *dev) stmmac_release_ptp(priv); - pm_runtime_put(priv->device); - if (priv->dma_cap.fpesel) - stmmac_fpe_stop_wq(priv); + timer_shutdown_sync(&priv->fpe_cfg.verify_timer); + + pm_runtime_put(priv->device); return 0; } @@ -5982,45 +5955,31 @@ static int stmmac_set_features(struct net_device *netdev, static void stmmac_fpe_event_status(struct stmmac_priv *priv, int status) { - struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg; - enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state; - enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state; - bool *hs_enable = &fpe_cfg->hs_enable; - - if (status == FPE_EVENT_UNKNOWN || !*hs_enable) - return; + struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg; - /* If LP has sent verify mPacket, LP is FPE capable */ - if ((status & FPE_EVENT_RVER) == FPE_EVENT_RVER) { - if (*lp_state < FPE_STATE_CAPABLE) - *lp_state = FPE_STATE_CAPABLE; + /* This is interrupt context, just spin_lock() */ + spin_lock(&fpe_cfg->lock); - /* If user has requested FPE enable, quickly response */ - if (*hs_enable) - stmmac_fpe_send_mpacket(priv, priv->ioaddr, - fpe_cfg, - MPACKET_RESPONSE); - } + if (!fpe_cfg->pmac_enabled || status == FPE_EVENT_UNKNOWN) + goto unlock_out; - /* If Local has sent verify mPacket, Local is FPE capable */ - if ((status & FPE_EVENT_TVER) == FPE_EVENT_TVER) { - if (*lo_state < FPE_STATE_CAPABLE) - *lo_state = FPE_STATE_CAPABLE; - } + /* LP has sent verify mPacket */ + if ((status & FPE_EVENT_RVER) == FPE_EVENT_RVER) + stmmac_fpe_send_mpacket(priv, priv->ioaddr, fpe_cfg, + MPACKET_RESPONSE); - /* If LP has sent response mPacket, LP is entering FPE ON */ - if ((status & FPE_EVENT_RRSP) == FPE_EVENT_RRSP) - *lp_state = FPE_STATE_ENTERING_ON; + /* Local has sent verify mPacket */ + if ((status & FPE_EVENT_TVER) == FPE_EVENT_TVER && + fpe_cfg->status != ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED) + fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_VERIFYING; - /* If Local has sent response mPacket, Local is entering FPE ON */ - if ((status & FPE_EVENT_TRSP) == FPE_EVENT_TRSP) - *lo_state = FPE_STATE_ENTERING_ON; + /* LP has sent response mPacket */ + if ((status & FPE_EVENT_RRSP) == FPE_EVENT_RRSP && + fpe_cfg->status == ETHTOOL_MM_VERIFY_STATUS_VERIFYING) + fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED; - if (!test_bit(__FPE_REMOVING, &priv->fpe_task_state) && - !test_and_set_bit(__FPE_TASK_SCHED, &priv->fpe_task_state) && - priv->fpe_wq) { - queue_work(priv->fpe_wq, &priv->fpe_task); - } +unlock_out: + spin_unlock(&fpe_cfg->lock); } static void stmmac_common_interrupt(struct stmmac_priv *priv) @@ -6257,6 +6216,8 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type, switch (type) { case TC_QUERY_CAPS: return stmmac_tc_query_caps(priv, priv, type_data); + case TC_SETUP_QDISC_MQPRIO: + return stmmac_tc_setup_mqprio(priv, priv, type_data); case TC_SETUP_BLOCK: return flow_block_cb_setup_simple(type_data, &stmmac_block_cb_list, @@ -7376,68 +7337,87 @@ int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size) return ret; } -#define SEND_VERIFY_MPAKCET_FMT "Send Verify mPacket lo_state=%d lp_state=%d\n" -static void stmmac_fpe_lp_task(struct work_struct *work) +/** + * stmmac_fpe_verify_timer - Timer for MAC Merge verification + * @t: timer_list struct containing private info + * + * Verify the MAC Merge capability in the local TX direction, by + * transmitting Verify mPackets up to 3 times. Wait until link + * partner responds with a Response mPacket, otherwise fail. + */ +static void stmmac_fpe_verify_timer(struct timer_list *t) { - struct stmmac_priv *priv = container_of(work, struct stmmac_priv, - fpe_task); - struct stmmac_fpe_cfg *fpe_cfg = priv->plat->fpe_cfg; - enum stmmac_fpe_state *lo_state = &fpe_cfg->lo_fpe_state; - enum stmmac_fpe_state *lp_state = &fpe_cfg->lp_fpe_state; - bool *hs_enable = &fpe_cfg->hs_enable; - bool *enable = &fpe_cfg->enable; - int retries = 20; - - while (retries-- > 0) { - /* Bail out immediately if FPE handshake is OFF */ - if (*lo_state == FPE_STATE_OFF || !*hs_enable) - break; - - if (*lo_state == FPE_STATE_ENTERING_ON && - *lp_state == FPE_STATE_ENTERING_ON) { - stmmac_fpe_configure(priv, priv->ioaddr, - fpe_cfg, - priv->plat->tx_queues_to_use, - priv->plat->rx_queues_to_use, - *enable); - - netdev_info(priv->dev, "configured FPE\n"); + struct stmmac_fpe_cfg *fpe_cfg = from_timer(fpe_cfg, t, verify_timer); + struct stmmac_priv *priv = container_of(fpe_cfg, struct stmmac_priv, + fpe_cfg); + unsigned long flags; + bool rearm = false; - *lo_state = FPE_STATE_ON; - *lp_state = FPE_STATE_ON; - netdev_info(priv->dev, "!!! BOTH FPE stations ON\n"); - break; - } + spin_lock_irqsave(&fpe_cfg->lock, flags); - if ((*lo_state == FPE_STATE_CAPABLE || - *lo_state == FPE_STATE_ENTERING_ON) && - *lp_state != FPE_STATE_ON) { - netdev_info(priv->dev, SEND_VERIFY_MPAKCET_FMT, - *lo_state, *lp_state); + switch (fpe_cfg->status) { + case ETHTOOL_MM_VERIFY_STATUS_INITIAL: + case ETHTOOL_MM_VERIFY_STATUS_VERIFYING: + if (fpe_cfg->verify_retries != 0) { stmmac_fpe_send_mpacket(priv, priv->ioaddr, - fpe_cfg, - MPACKET_VERIFY); + fpe_cfg, MPACKET_VERIFY); + rearm = true; + } else { + fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_FAILED; } - /* Sleep then retry */ - msleep(500); + + fpe_cfg->verify_retries--; + break; + + case ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED: + stmmac_fpe_configure(priv, priv->ioaddr, fpe_cfg, + priv->plat->tx_queues_to_use, + priv->plat->rx_queues_to_use, + true, true); + break; + + default: + break; } - clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state); + if (rearm) { + mod_timer(&fpe_cfg->verify_timer, + jiffies + msecs_to_jiffies(fpe_cfg->verify_time)); + } + + spin_unlock_irqrestore(&fpe_cfg->lock, flags); } -void stmmac_fpe_handshake(struct stmmac_priv *priv, bool enable) +static void stmmac_fpe_verify_timer_arm(struct stmmac_fpe_cfg *fpe_cfg) { - if (priv->plat->fpe_cfg->hs_enable != enable) { - if (enable) { - stmmac_fpe_send_mpacket(priv, priv->ioaddr, - priv->plat->fpe_cfg, - MPACKET_VERIFY); - } else { - priv->plat->fpe_cfg->lo_fpe_state = FPE_STATE_OFF; - priv->plat->fpe_cfg->lp_fpe_state = FPE_STATE_OFF; - } + if (fpe_cfg->pmac_enabled && fpe_cfg->tx_enabled && + fpe_cfg->verify_enabled && + fpe_cfg->status != ETHTOOL_MM_VERIFY_STATUS_FAILED && + fpe_cfg->status != ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED) { + timer_setup(&fpe_cfg->verify_timer, stmmac_fpe_verify_timer, 0); + mod_timer(&fpe_cfg->verify_timer, jiffies); + } +} + +void stmmac_fpe_apply(struct stmmac_priv *priv) +{ + struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg; + + /* If verification is disabled, configure FPE right away. + * Otherwise let the timer code do it. + */ + if (!fpe_cfg->verify_enabled) { + stmmac_fpe_configure(priv, priv->ioaddr, fpe_cfg, + priv->plat->tx_queues_to_use, + priv->plat->rx_queues_to_use, + fpe_cfg->tx_enabled, + fpe_cfg->pmac_enabled); + } else { + fpe_cfg->status = ETHTOOL_MM_VERIFY_STATUS_INITIAL; + fpe_cfg->verify_retries = STMMAC_FPE_MM_MAX_VERIFY_RETRIES; - priv->plat->fpe_cfg->hs_enable = enable; + if (netif_running(priv->dev)) + stmmac_fpe_verify_timer_arm(fpe_cfg); } } @@ -7555,9 +7535,6 @@ int stmmac_dvr_probe(struct device *device, INIT_WORK(&priv->service_task, stmmac_service_task); - /* Initialize Link Partner FPE workqueue */ - INIT_WORK(&priv->fpe_task, stmmac_fpe_lp_task); - /* Override with kernel parameters if supplied XXX CRS XXX * this needs to have multiple instances */ @@ -7722,6 +7699,12 @@ int stmmac_dvr_probe(struct device *device, mutex_init(&priv->lock); + priv->fpe_cfg.verify_retries = STMMAC_FPE_MM_MAX_VERIFY_RETRIES; + priv->fpe_cfg.verify_time = STMMAC_FPE_MM_MAX_VERIFY_TIME_MS; + priv->fpe_cfg.status = ETHTOOL_MM_VERIFY_STATUS_DISABLED; + timer_setup(&priv->fpe_cfg.verify_timer, stmmac_fpe_verify_timer, 0); + spin_lock_init(&priv->fpe_cfg.lock); + /* If a specific clk_csr value is passed from the platform * this means that the CSR Clock Range selection cannot be * changed at run-time and it is fixed. Viceversa the driver'll try to @@ -7895,16 +7878,8 @@ int stmmac_suspend(struct device *dev) } rtnl_unlock(); - if (priv->dma_cap.fpesel) { - /* Disable FPE */ - stmmac_fpe_configure(priv, priv->ioaddr, - priv->plat->fpe_cfg, - priv->plat->tx_queues_to_use, - priv->plat->rx_queues_to_use, false); - - stmmac_fpe_handshake(priv, false); - stmmac_fpe_stop_wq(priv); - } + if (priv->dma_cap.fpesel) + timer_shutdown_sync(&priv->fpe_cfg.verify_timer); priv->speed = SPEED_UNKNOWN; return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 996f2bcd07a2..832998bc020b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -282,16 +282,6 @@ static int tc_init(struct stmmac_priv *priv) if (ret) return -ENOMEM; - if (!priv->plat->fpe_cfg) { - priv->plat->fpe_cfg = devm_kzalloc(priv->device, - sizeof(*priv->plat->fpe_cfg), - GFP_KERNEL); - if (!priv->plat->fpe_cfg) - return -ENOMEM; - } else { - memset(priv->plat->fpe_cfg, 0, sizeof(*priv->plat->fpe_cfg)); - } - /* Fail silently as we can still use remaining features, e.g. CBS */ if (!dma_cap->frpsel) return 0; @@ -941,9 +931,9 @@ static int tc_taprio_configure(struct stmmac_priv *priv, struct tc_taprio_qopt_offload *qopt) { u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep; + struct netlink_ext_ack *extack = qopt->mqprio.extack; struct timespec64 time, current_time, qopt_time; ktime_t current_time_ns; - bool fpe = false; int i, ret = 0; u64 ctr; @@ -1028,16 +1018,12 @@ static int tc_taprio_configure(struct stmmac_priv *priv, switch (qopt->entries[i].command) { case TC_TAPRIO_CMD_SET_GATES: - if (fpe) - return -EINVAL; break; case TC_TAPRIO_CMD_SET_AND_HOLD: gates |= BIT(0); - fpe = true; break; case TC_TAPRIO_CMD_SET_AND_RELEASE: gates &= ~BIT(0); - fpe = true; break; default: return -EOPNOTSUPP; @@ -1068,16 +1054,6 @@ static int tc_taprio_configure(struct stmmac_priv *priv, tc_taprio_map_maxsdu_txq(priv, qopt); - if (fpe && !priv->dma_cap.fpesel) { - mutex_unlock(&priv->est_lock); - return -EOPNOTSUPP; - } - - /* Actual FPE register configuration will be done after FPE handshake - * is success. - */ - priv->plat->fpe_cfg->enable = fpe; - ret = stmmac_est_configure(priv, priv, priv->est, priv->plat->clk_ptp_rate); mutex_unlock(&priv->est_lock); @@ -1086,12 +1062,10 @@ static int tc_taprio_configure(struct stmmac_priv *priv, goto disable; } - netdev_info(priv->dev, "configured EST\n"); - - if (fpe) { - stmmac_fpe_handshake(priv, true); - netdev_info(priv->dev, "start FPE handshake\n"); - } + ret = stmmac_fpe_map_preemption_class(priv, priv->dev, extack, + qopt->mqprio.preemptible_tcs); + if (ret) + goto disable; return 0; @@ -1109,16 +1083,7 @@ disable: mutex_unlock(&priv->est_lock); } - priv->plat->fpe_cfg->enable = false; - stmmac_fpe_configure(priv, priv->ioaddr, - priv->plat->fpe_cfg, - priv->plat->tx_queues_to_use, - priv->plat->rx_queues_to_use, - false); - netdev_info(priv->dev, "disabled FPE\n"); - - stmmac_fpe_handshake(priv, false); - netdev_info(priv->dev, "stop FPE handshake\n"); + stmmac_fpe_map_preemption_class(priv, priv->dev, extack, 0); return ret; } @@ -1174,6 +1139,18 @@ static int tc_setup_taprio(struct stmmac_priv *priv, return err; } +static int tc_setup_taprio_without_fpe(struct stmmac_priv *priv, + struct tc_taprio_qopt_offload *qopt) +{ + if (!qopt->mqprio.preemptible_tcs) + return tc_setup_taprio(priv, qopt); + + NL_SET_ERR_MSG_MOD(qopt->mqprio.extack, + "taprio with FPE is not implemented for this MAC"); + + return -EOPNOTSUPP; +} + static int tc_setup_etf(struct stmmac_priv *priv, struct tc_etf_qopt_offload *qopt) { @@ -1198,6 +1175,13 @@ static int tc_query_caps(struct stmmac_priv *priv, struct tc_query_caps_base *base) { switch (base->type) { + case TC_SETUP_QDISC_MQPRIO: { + struct tc_mqprio_caps *caps = base->caps; + + caps->validate_queue_counts = true; + + return 0; + } case TC_SETUP_QDISC_TAPRIO: { struct tc_taprio_caps *caps = base->caps; @@ -1214,6 +1198,81 @@ static int tc_query_caps(struct stmmac_priv *priv, } } +static void stmmac_reset_tc_mqprio(struct net_device *ndev, + struct netlink_ext_ack *extack) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + + netdev_reset_tc(ndev); + netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use); + stmmac_fpe_map_preemption_class(priv, ndev, extack, 0); +} + +static int tc_setup_dwmac510_mqprio(struct stmmac_priv *priv, + struct tc_mqprio_qopt_offload *mqprio) +{ + struct netlink_ext_ack *extack = mqprio->extack; + struct tc_mqprio_qopt *qopt = &mqprio->qopt; + u32 offset, count, num_stack_tx_queues = 0; + struct net_device *ndev = priv->dev; + u32 num_tc = qopt->num_tc; + int err; + + if (!num_tc) { + stmmac_reset_tc_mqprio(ndev, extack); + return 0; + } + + err = netdev_set_num_tc(ndev, num_tc); + if (err) + return err; + + for (u32 tc = 0; tc < num_tc; tc++) { + offset = qopt->offset[tc]; + count = qopt->count[tc]; + num_stack_tx_queues += count; + + err = netdev_set_tc_queue(ndev, tc, count, offset); + if (err) + goto err_reset_tc; + } + + err = netif_set_real_num_tx_queues(ndev, num_stack_tx_queues); + if (err) + goto err_reset_tc; + + err = stmmac_fpe_map_preemption_class(priv, ndev, extack, + mqprio->preemptible_tcs); + if (err) + goto err_reset_tc; + + return 0; + +err_reset_tc: + stmmac_reset_tc_mqprio(ndev, extack); + + return err; +} + +static int tc_setup_mqprio_unimplemented(struct stmmac_priv *priv, + struct tc_mqprio_qopt_offload *mqprio) +{ + NL_SET_ERR_MSG_MOD(mqprio->extack, + "mqprio HW offload is not implemented for this MAC"); + return -EOPNOTSUPP; +} + +const struct stmmac_tc_ops dwmac4_tc_ops = { + .init = tc_init, + .setup_cls_u32 = tc_setup_cls_u32, + .setup_cbs = tc_setup_cbs, + .setup_cls = tc_setup_cls, + .setup_taprio = tc_setup_taprio_without_fpe, + .setup_etf = tc_setup_etf, + .query_caps = tc_query_caps, + .setup_mqprio = tc_setup_mqprio_unimplemented, +}; + const struct stmmac_tc_ops dwmac510_tc_ops = { .init = tc_init, .setup_cls_u32 = tc_setup_cls_u32, @@ -1222,4 +1281,16 @@ const struct stmmac_tc_ops dwmac510_tc_ops = { .setup_taprio = tc_setup_taprio, .setup_etf = tc_setup_etf, .query_caps = tc_query_caps, + .setup_mqprio = tc_setup_dwmac510_mqprio, +}; + +const struct stmmac_tc_ops dwxgmac_tc_ops = { + .init = tc_init, + .setup_cls_u32 = tc_setup_cls_u32, + .setup_cbs = tc_setup_cbs, + .setup_cls = tc_setup_cls, + .setup_taprio = tc_setup_taprio_without_fpe, + .setup_etf = tc_setup_etf, + .query_caps = tc_query_caps, + .setup_mqprio = tc_setup_mqprio_unimplemented, }; diff --git a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c index 73b6cef10401..b715af21d23a 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c +++ b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c @@ -68,9 +68,13 @@ static int emac_nway_reset(struct net_device *ndev) static int emac_get_sset_count(struct net_device *ndev, int stringset) { + struct prueth_emac *emac = netdev_priv(ndev); switch (stringset) { case ETH_SS_STATS: - return ICSSG_NUM_ETHTOOL_STATS; + if (emac->prueth->pa_stats) + return ICSSG_NUM_ETHTOOL_STATS; + else + return ICSSG_NUM_ETHTOOL_STATS - ICSSG_NUM_PA_STATS; default: return -EOPNOTSUPP; } @@ -78,6 +82,7 @@ static int emac_get_sset_count(struct net_device *ndev, int stringset) static void emac_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { + struct prueth_emac *emac = netdev_priv(ndev); u8 *p = data; int i; @@ -86,8 +91,9 @@ static void emac_get_strings(struct net_device *ndev, u32 stringset, u8 *data) for (i = 0; i < ARRAY_SIZE(icssg_all_miig_stats); i++) if (!icssg_all_miig_stats[i].standard_stats) ethtool_puts(&p, icssg_all_miig_stats[i].name); - for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) - ethtool_puts(&p, icssg_all_pa_stats[i].name); + if (emac->prueth->pa_stats) + for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) + ethtool_puts(&p, icssg_all_pa_stats[i].name); break; default: break; @@ -106,8 +112,9 @@ static void emac_get_ethtool_stats(struct net_device *ndev, if (!icssg_all_miig_stats[i].standard_stats) *(data++) = emac->stats[i]; - for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) - *(data++) = emac->pa_stats[i]; + if (emac->prueth->pa_stats) + for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) + *(data++) = emac->pa_stats[i]; } static int emac_get_ts_info(struct net_device *ndev, diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index becdda143c19..6644203d6bb7 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1185,7 +1185,7 @@ static int prueth_probe(struct platform_device *pdev) prueth->pa_stats = syscon_regmap_lookup_by_phandle(np, "ti,pa-stats"); if (IS_ERR(prueth->pa_stats)) { dev_err(dev, "couldn't get ti,pa-stats syscon regmap\n"); - return -ENODEV; + prueth->pa_stats = NULL; } if (eth0_node) { diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c index 06a15c0b2acc..8800bd3a8d07 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_stats.c +++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c @@ -42,11 +42,14 @@ void emac_update_hardware_stats(struct prueth_emac *emac) emac->stats[i] -= tx_pkt_cnt * 8; } - for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) { - reg = ICSSG_FW_STATS_BASE + icssg_all_pa_stats[i].offset * - PRUETH_NUM_MACS + slice * sizeof(u32); - regmap_read(prueth->pa_stats, reg, &val); - emac->pa_stats[i] += val; + if (prueth->pa_stats) { + for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) { + reg = ICSSG_FW_STATS_BASE + + icssg_all_pa_stats[i].offset * + PRUETH_NUM_MACS + slice * sizeof(u32); + regmap_read(prueth->pa_stats, reg, &val); + emac->pa_stats[i] += val; + } } } @@ -70,9 +73,11 @@ int emac_get_stat_by_name(struct prueth_emac *emac, char *stat_name) return emac->stats[icssg_all_miig_stats[i].offset / sizeof(u32)]; } - for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) { - if (!strcmp(icssg_all_pa_stats[i].name, stat_name)) - return emac->pa_stats[icssg_all_pa_stats[i].offset / sizeof(u32)]; + if (emac->prueth->pa_stats) { + for (i = 0; i < ARRAY_SIZE(icssg_all_pa_stats); i++) { + if (!strcmp(icssg_all_pa_stats[i].name, stat_name)) + return emac->pa_stats[icssg_all_pa_stats[i].offset / sizeof(u32)]; + } } netdev_err(emac->ndev, "Invalid stats %s\n", stat_name); diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 43d81b40f761..d64b8abcf018 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -529,8 +529,6 @@ struct skbuf_dma_descriptor { * supported, the maximum frame size would be 9k. Else it is * 1522 bytes (assuming support for basic VLAN) * @rxmem: Stores rx memory size for jumbo frame handling. - * @csum_offload_on_tx_path: Stores the checksum selection on TX side. - * @csum_offload_on_rx_path: Stores the checksum selection on RX side. * @coalesce_count_rx: Store the irq coalesce on RX side. * @coalesce_usec_rx: IRQ coalesce delay for RX * @coalesce_count_tx: Store the irq coalesce on TX side. @@ -609,9 +607,6 @@ struct axienet_local { u32 max_frm_size; u32 rxmem; - int csum_offload_on_tx_path; - int csum_offload_on_rx_path; - u32 coalesce_count_rx; u32 coalesce_usec_rx; u32 coalesce_count_tx; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 374dff70ef0d..ea7d7c03f48e 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1188,9 +1188,7 @@ static int axienet_rx_poll(struct napi_struct *napi, int budget) csumstatus == XAE_IP_UDP_CSUM_VALIDATED) { skb->ip_summed = CHECKSUM_UNNECESSARY; } - } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 && - skb->protocol == htons(ETH_P_IP) && - skb->len > 64) { + } else if (lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) { skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF); skb->ip_summed = CHECKSUM_COMPLETE; } @@ -2639,38 +2637,28 @@ static int axienet_probe(struct platform_device *pdev) if (!ret) { switch (value) { case 1: - lp->csum_offload_on_tx_path = - XAE_FEATURE_PARTIAL_TX_CSUM; lp->features |= XAE_FEATURE_PARTIAL_TX_CSUM; - /* Can checksum TCP/UDP over IPv4. */ - ndev->features |= NETIF_F_IP_CSUM; + /* Can checksum any contiguous range */ + ndev->features |= NETIF_F_HW_CSUM; break; case 2: - lp->csum_offload_on_tx_path = - XAE_FEATURE_FULL_TX_CSUM; lp->features |= XAE_FEATURE_FULL_TX_CSUM; /* Can checksum TCP/UDP over IPv4. */ ndev->features |= NETIF_F_IP_CSUM; break; - default: - lp->csum_offload_on_tx_path = XAE_NO_CSUM_OFFLOAD; } } ret = of_property_read_u32(pdev->dev.of_node, "xlnx,rxcsum", &value); if (!ret) { switch (value) { case 1: - lp->csum_offload_on_rx_path = - XAE_FEATURE_PARTIAL_RX_CSUM; lp->features |= XAE_FEATURE_PARTIAL_RX_CSUM; + ndev->features |= NETIF_F_RXCSUM; break; case 2: - lp->csum_offload_on_rx_path = - XAE_FEATURE_FULL_RX_CSUM; lp->features |= XAE_FEATURE_FULL_RX_CSUM; + ndev->features |= NETIF_F_RXCSUM; break; - default: - lp->csum_offload_on_rx_path = XAE_NO_CSUM_OFFLOAD; } } /* For supporting jumbo frames, the Axi Ethernet hardware must have diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 56df37f8d50a..aef316278eb4 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -1026,9 +1026,7 @@ static int ixp4xx_get_ts_info(struct net_device *dev, if (info->phc_index < 0) { info->so_timestamping = - SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; + SOF_TIMESTAMPING_TX_SOFTWARE; return 0; } info->so_timestamping = diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index fd02f5cbc853..b156493d7084 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -7,6 +7,7 @@ */ #include <linux/acpi.h> +#include <linux/dev_printk.h> #include <linux/fwnode_mdio.h> #include <linux/of.h> #include <linux/phy.h> @@ -104,7 +105,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, return rc; } - dev_dbg(&mdio->dev, "registered phy %p fwnode at address %i\n", + dev_dbg(&mdio->dev, "registered phy fwnode %pfw at address %i\n", child, addr); return 0; } diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c index e6f7d2bf8dde..14a23d3a27f2 100644 --- a/drivers/ptp/ptp_ines.c +++ b/drivers/ptp/ptp_ines.c @@ -562,12 +562,8 @@ static int ines_ts_info(struct mii_timestamper *mii_ts, SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RAW_HARDWARE; - info->phc_index = -1; - info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON) | diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b47c00657bd0..2e40a137dc12 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3085,8 +3085,6 @@ void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); -u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev); int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev); int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cf8f6ce06742..5803eb8a157d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1433,6 +1433,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, unsigned int skb_seq_read(unsigned int consumed, const u8 **data, struct skb_seq_state *st); void skb_abort_seq_read(struct skb_seq_state *st); +int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len); unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 338991c08f00..d79ff252cfdc 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -138,33 +138,6 @@ struct stmmac_txq_cfg { int tbs_en; }; -/* FPE link state */ -enum stmmac_fpe_state { - FPE_STATE_OFF = 0, - FPE_STATE_CAPABLE = 1, - FPE_STATE_ENTERING_ON = 2, - FPE_STATE_ON = 3, -}; - -/* FPE link-partner hand-shaking mPacket type */ -enum stmmac_mpacket_type { - MPACKET_VERIFY = 0, - MPACKET_RESPONSE = 1, -}; - -enum stmmac_fpe_task_state_t { - __FPE_REMOVING, - __FPE_TASK_SCHED, -}; - -struct stmmac_fpe_cfg { - bool enable; /* FPE enable */ - bool hs_enable; /* FPE handshake enable */ - enum stmmac_fpe_state lp_fpe_state; /* Link Partner FPE state */ - enum stmmac_fpe_state lo_fpe_state; /* Local station FPE state */ - u32 fpe_csr; /* MAC_FPE_CTRL_STS reg cache */ -}; - struct stmmac_safety_feature_cfg { u32 tsoee; u32 mrxpee; @@ -232,7 +205,6 @@ struct plat_stmmacenet_data { struct fwnode_handle *port_node; struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; - struct stmmac_fpe_cfg *fpe_cfg; struct stmmac_safety_feature_cfg *safety_feat_cfg; int clk_csr; int has_gmac; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 54cef89f6c1e..b6bfdc6416c7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -67,27 +67,27 @@ - instance of a transformer, struct xfrm_state (=SA) - template to clone xfrm_state, struct xfrm_tmpl - SPD is plain linear list of xfrm_policy rules, ordered by priority. + SPD is organized as hash table (for policies that meet minimum address prefix + length setting, net->xfrm.policy_hthresh). Other policies are stored in + lists, sorted into rbtree ordered by destination and source address networks. + See net/xfrm/xfrm_policy.c for details. + (To be compatible with existing pfkeyv2 implementations, many rules with priority of 0x7fffffff are allowed to exist and such rules are ordered in an unpredictable way, thanks to bsd folks.) - Lookup is plain linear search until the first match with selector. - If "action" is "block", then we prohibit the flow, otherwise: if "xfrms_nr" is zero, the flow passes untransformed. Otherwise, policy entry has list of up to XFRM_MAX_DEPTH transformations, described by templates xfrm_tmpl. Each template is resolved to a complete xfrm_state (see below) and we pack bundle of transformations - to a dst_entry returned to requestor. + to a dst_entry returned to requester. dst -. xfrm .-> xfrm_state #1 |---. child .-> dst -. xfrm .-> xfrm_state #2 |---. child .-> dst -. xfrm .-> xfrm_state #3 |---. child .-> NULL - Bundles are cached at xrfm_policy struct (field ->bundles). - Resolution of xrfm_tmpl ----------------------- @@ -526,6 +526,36 @@ struct xfrm_policy_queue { unsigned long timeout; }; +/** + * struct xfrm_policy - xfrm policy + * @xp_net: network namespace the policy lives in + * @bydst: hlist node for SPD hash table or rbtree list + * @byidx: hlist node for index hash table + * @lock: serialize changes to policy structure members + * @refcnt: reference count, freed once it reaches 0 + * @pos: kernel internal tie-breaker to determine age of policy + * @timer: timer + * @genid: generation, used to invalidate old policies + * @priority: priority, set by userspace + * @index: policy index (autogenerated) + * @if_id: virtual xfrm interface id + * @mark: packet mark + * @selector: selector + * @lft: liftime configuration data + * @curlft: liftime state + * @walk: list head on pernet policy list + * @polq: queue to hold packets while aqcuire operaion in progress + * @bydst_reinsert: policy tree node needs to be merged + * @type: XFRM_POLICY_TYPE_MAIN or _SUB + * @action: XFRM_POLICY_ALLOW or _BLOCK + * @flags: XFRM_POLICY_LOCALOK, XFRM_POLICY_ICMP + * @xfrm_nr: number of used templates in @xfrm_vec + * @family: protocol family + * @security: SELinux security label + * @xfrm_vec: array of templates to resolve state + * @rcu: rcu head, used to defer memory release + * @xdo: hardware offload state + */ struct xfrm_policy { possible_net_t xp_net; struct hlist_node bydst; @@ -555,7 +585,6 @@ struct xfrm_policy { u16 family; struct xfrm_sec_ctx *security; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; - struct hlist_node bydst_inexact_list; struct rcu_head rcu; struct xfrm_dev_offload xdo; @@ -1016,7 +1045,7 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); struct xfrm_if_parms { int link; /* ifindex of underlying L2 interface */ - u32 if_id; /* interface identifyer */ + u32 if_id; /* interface identifier */ bool collect_md; }; diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index a2c66b3d7f0f..858339d1c1c4 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -32,8 +32,9 @@ enum { SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), SOF_TIMESTAMPING_BIND_PHC = (1 << 15), SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), + SOF_TIMESTAMPING_OPT_RX_FILTER = (1 << 17), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_RX_FILTER, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/net/core/dev.c b/net/core/dev.c index 22c3f14d9287..8f4dead64284 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4247,13 +4247,6 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, } EXPORT_SYMBOL(dev_pick_tx_zero); -u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) -{ - return (u16)raw_smp_processor_id() % dev->real_num_tx_queues; -} -EXPORT_SYMBOL(dev_pick_tx_cpu_id); - u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a52638363ea5..038a059b5924 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4411,6 +4411,41 @@ void skb_abort_seq_read(struct skb_seq_state *st) } EXPORT_SYMBOL(skb_abort_seq_read); +/** + * skb_copy_seq_read() - copy from a skb_seq_state to a buffer + * @st: source skb_seq_state + * @offset: offset in source + * @to: destination buffer + * @len: number of bytes to copy + * + * Copy @len bytes from @offset bytes into the source @st to the destination + * buffer @to. `offset` should increase (or be unchanged) with each subsequent + * call to this function. If offset needs to decrease from the previous use `st` + * should be reset first. + * + * Return: 0 on success or -EINVAL if the copy ended early + */ +int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len) +{ + const u8 *data; + u32 sqlen; + + for (;;) { + sqlen = skb_seq_read(offset, &data, st); + if (sqlen == 0) + return -EINVAL; + if (sqlen >= len) { + memcpy(to, data, len); + return 0; + } + memcpy(to, data, sqlen); + to += sqlen; + offset += sqlen; + len -= sqlen; + } +} +EXPORT_SYMBOL(skb_copy_seq_read); + #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 1f46de394f2e..281bbac5539d 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -178,8 +178,9 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795, KSZ8795_NAME); #define KSZ9477_INGRESS_TAG_LEN 2 #define KSZ9477_PTP_TAG_LEN 4 -#define KSZ9477_PTP_TAG_INDICATION 0x80 +#define KSZ9477_PTP_TAG_INDICATION BIT(7) +#define KSZ9477_TAIL_TAG_EG_PORT_M GENMASK(2, 0) #define KSZ9477_TAIL_TAG_PRIO GENMASK(8, 7) #define KSZ9477_TAIL_TAG_OVERRIDE BIT(9) #define KSZ9477_TAIL_TAG_LOOKUP BIT(10) @@ -312,7 +313,7 @@ static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev) { /* Tag decoding */ u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; - unsigned int port = tag[0] & 7; + unsigned int port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M; unsigned int len = KSZ_EGRESS_TAG_LEN; /* Extra 4-bytes PTP timestamp */ diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 781834ef57c3..6c245e59bbc1 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -427,6 +427,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = { [const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw", [const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc", [const_ilog2(SOF_TIMESTAMPING_OPT_ID_TCP)] = "option-id-tcp", + [const_ilog2(SOF_TIMESTAMPING_OPT_RX_FILTER)] = "option-rx-filter", }; static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8a5680b4e786..e359a9161445 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2235,6 +2235,7 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, struct scm_timestamping_internal *tss) { int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); + u32 tsflags = READ_ONCE(sk->sk_tsflags); bool has_timestamping = false; if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) { @@ -2274,14 +2275,18 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, } } - if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE) + if (tsflags & SOF_TIMESTAMPING_SOFTWARE && + (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE || + !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) has_timestamping = true; else tss->ts[0] = (struct timespec64) {0}; } if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) { - if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE) + if (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE && + (tsflags & SOF_TIMESTAMPING_RX_HARDWARE || + !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) has_timestamping = true; else tss->ts[2] = (struct timespec64) {0}; diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index d2f49db70523..f2f9b75008bb 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -361,8 +361,24 @@ static const u8 besteffort[] = { static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7}; static const u8 bulk_order[] = {1, 0, 2, 3}; +/* There is a big difference in timing between the accurate values placed in the + * cache and the approximations given by a single Newton step for small count + * values, particularly when stepping from count 1 to 2 or vice versa. Hence, + * these values are calculated using eight Newton steps, using the + * implementation below. Above 16, a single Newton step gives sufficient + * accuracy in either direction, given the precision stored. + * + * The magnitude of the error when stepping up to count 2 is such as to give the + * value that *should* have been produced at count 4. + */ + #define REC_INV_SQRT_CACHE (16) -static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0}; +static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = { + ~0, ~0, 3037000500, 2479700525, + 2147483647, 1920767767, 1753413056, 1623345051, + 1518500250, 1431655765, 1358187914, 1294981364, + 1239850263, 1191209601, 1147878294, 1108955788 +}; /* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) @@ -388,47 +404,14 @@ static void cobalt_newton_step(struct cobalt_vars *vars) static void cobalt_invsqrt(struct cobalt_vars *vars) { if (vars->count < REC_INV_SQRT_CACHE) - vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count]; + vars->rec_inv_sqrt = inv_sqrt_cache[vars->count]; else cobalt_newton_step(vars); } -/* There is a big difference in timing between the accurate values placed in - * the cache and the approximations given by a single Newton step for small - * count values, particularly when stepping from count 1 to 2 or vice versa. - * Above 16, a single Newton step gives sufficient accuracy in either - * direction, given the precision stored. - * - * The magnitude of the error when stepping up to count 2 is such as to give - * the value that *should* have been produced at count 4. - */ - -static void cobalt_cache_init(void) -{ - struct cobalt_vars v; - - memset(&v, 0, sizeof(v)); - v.rec_inv_sqrt = ~0U; - cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt; - - for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) { - cobalt_newton_step(&v); - cobalt_newton_step(&v); - cobalt_newton_step(&v); - cobalt_newton_step(&v); - - cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt; - } -} - static void cobalt_vars_init(struct cobalt_vars *vars) { memset(vars, 0, sizeof(*vars)); - - if (!cobalt_rec_inv_sqrt_cache[0]) { - cobalt_cache_init(); - cobalt_rec_inv_sqrt_cache[0] = ~0; - } } /* CoDel control_law is t + interval/sqrt(count) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 2adb92b8c469..1dd362326c0a 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -887,9 +887,6 @@ int smc_pnet_net_init(struct net *net) smc_pnet_create_pnetids_list(net); - /* disable handshake limitation by default */ - net->smc.limit_smc_hs = 0; - return 0; } diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 13f2bc092db1..2fab6456f765 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -90,6 +90,15 @@ static struct ctl_table smc_table[] = { .extra1 = &conns_per_lgr_min, .extra2 = &conns_per_lgr_max, }, + { + .procname = "limit_smc_hs", + .data = &init_net.smc.limit_smc_hs, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, }; int __net_init smc_sysctl_net_init(struct net *net) @@ -121,6 +130,8 @@ int __net_init smc_sysctl_net_init(struct net *net) WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init); net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; + /* disable handshake limitation by default */ + net->smc.limit_smc_hs = 0; return 0; diff --git a/net/socket.c b/net/socket.c index 0a2bd22ec105..8d8b84fa404a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -946,11 +946,17 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, memset(&tss, 0, sizeof(tss)); tsflags = READ_ONCE(sk->sk_tsflags); - if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) && + if ((tsflags & SOF_TIMESTAMPING_SOFTWARE && + (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE || + skb_is_err_queue(skb) || + !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) && ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0)) empty = 0; if (shhwtstamps && - (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && + (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE && + (tsflags & SOF_TIMESTAMPING_RX_HARDWARE || + skb_is_err_queue(skb) || + !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) && !skb_is_swtx_tstamp(skb, false_tstamp)) { if_index = 0; if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a1894019ebd5..001ccc55ef0f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2654,51 +2654,52 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, int flags, int copied) { + struct sk_buff *read_skb = NULL, *unread_skb = NULL; struct unix_sock *u = unix_sk(sk); - if (!unix_skb_len(skb)) { - struct sk_buff *unlinked_skb = NULL; + if (likely(unix_skb_len(skb) && skb != READ_ONCE(u->oob_skb))) + return skb; - spin_lock(&sk->sk_receive_queue.lock); + spin_lock(&sk->sk_receive_queue.lock); + if (!unix_skb_len(skb)) { if (copied && (!u->oob_skb || skb == u->oob_skb)) { skb = NULL; } else if (flags & MSG_PEEK) { skb = skb_peek_next(skb, &sk->sk_receive_queue); } else { - unlinked_skb = skb; + read_skb = skb; skb = skb_peek_next(skb, &sk->sk_receive_queue); - __skb_unlink(unlinked_skb, &sk->sk_receive_queue); + __skb_unlink(read_skb, &sk->sk_receive_queue); } - spin_unlock(&sk->sk_receive_queue.lock); + if (!skb) + goto unlock; + } - consume_skb(unlinked_skb); - } else { - struct sk_buff *unlinked_skb = NULL; + if (skb != u->oob_skb) + goto unlock; - spin_lock(&sk->sk_receive_queue.lock); + if (copied) { + skb = NULL; + } else if (!(flags & MSG_PEEK)) { + WRITE_ONCE(u->oob_skb, NULL); - if (skb == u->oob_skb) { - if (copied) { - skb = NULL; - } else if (!(flags & MSG_PEEK)) { - WRITE_ONCE(u->oob_skb, NULL); - - if (!sock_flag(sk, SOCK_URGINLINE)) { - __skb_unlink(skb, &sk->sk_receive_queue); - unlinked_skb = skb; - skb = skb_peek(&sk->sk_receive_queue); - } - } else if (!sock_flag(sk, SOCK_URGINLINE)) { - skb = skb_peek_next(skb, &sk->sk_receive_queue); - } + if (!sock_flag(sk, SOCK_URGINLINE)) { + __skb_unlink(skb, &sk->sk_receive_queue); + unread_skb = skb; + skb = skb_peek(&sk->sk_receive_queue); } + } else if (!sock_flag(sk, SOCK_URGINLINE)) { + skb = skb_peek_next(skb, &sk->sk_receive_queue); + } - spin_unlock(&sk->sk_receive_queue.lock); +unlock: + spin_unlock(&sk->sk_receive_queue.lock); + + consume_skb(read_skb); + kfree_skb(unread_skb); - kfree_skb(unlinked_skb); - } return skb; } #endif @@ -3175,9 +3176,13 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) skb = skb_peek(&sk->sk_receive_queue); if (skb) { struct sk_buff *oob_skb = READ_ONCE(u->oob_skb); + struct sk_buff *next_skb; + + next_skb = skb_peek_next(skb, &sk->sk_receive_queue); if (skb == oob_skb || - (!oob_skb && !unix_skb_len(skb))) + (!unix_skb_len(skb) && + (!oob_skb || next_skb == oob_skb))) answ = 1; } diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 9a44d363ba62..f123b7c9ec82 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -328,12 +328,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, /* User explicitly requested packet offload mode and configured * policy in addition to the XFRM state. So be civil to users, * and return an error instead of taking fallback path. - * - * This WARN_ON() can be seen as a documentation for driver - * authors to do not return -EOPNOTSUPP in packet offload mode. */ - WARN_ON(err == -EOPNOTSUPP && is_packet_offload); - if (err != -EOPNOTSUPP || is_packet_offload) { + if ((err != -EOPNOTSUPP && !is_packet_offload) || is_packet_offload) { NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this state"); return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b22767c0c078..914bac03b52a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -110,7 +110,11 @@ struct xfrm_pol_inexact_node { * 4. saddr:any list from saddr tree * * This result set then needs to be searched for the policy with - * the lowest priority. If two results have same prio, youngest one wins. + * the lowest priority. If two candidates have the same priority, the + * struct xfrm_policy pos member with the lower number is used. + * + * This replicates previous single-list-search algorithm which would + * return first matching policy in the (ordered-by-priority) list. */ struct xfrm_pol_inexact_key { @@ -197,8 +201,6 @@ xfrm_policy_inexact_lookup_rcu(struct net *net, static struct xfrm_policy * xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy, bool excl); -static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, - struct xfrm_policy *policy); static bool xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand, @@ -411,7 +413,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) if (policy) { write_pnet(&policy->xp_net, net); INIT_LIST_HEAD(&policy->walk.all); - INIT_HLIST_NODE(&policy->bydst_inexact_list); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); @@ -1229,26 +1230,31 @@ xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl) return ERR_PTR(-EEXIST); } - chain = &net->xfrm.policy_inexact[dir]; - xfrm_policy_insert_inexact_list(chain, policy); - if (delpol) __xfrm_policy_inexact_prune_bin(bin, false); return delpol; } +static bool xfrm_policy_is_dead_or_sk(const struct xfrm_policy *policy) +{ + int dir; + + if (policy->walk.dead) + return true; + + dir = xfrm_policy_id2dir(policy->index); + return dir >= XFRM_POLICY_MAX; +} + static void xfrm_hash_rebuild(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.policy_hthresh.work); - unsigned int hmask; struct xfrm_policy *pol; struct xfrm_policy *policy; struct hlist_head *chain; - struct hlist_head *odst; struct hlist_node *newpos; - int i; int dir; unsigned seq; u8 lbits4, rbits4, lbits6, rbits6; @@ -1275,13 +1281,10 @@ static void xfrm_hash_rebuild(struct work_struct *work) struct xfrm_pol_inexact_bin *bin; u8 dbits, sbits; - if (policy->walk.dead) + if (xfrm_policy_is_dead_or_sk(policy)) continue; dir = xfrm_policy_id2dir(policy->index); - if (dir >= XFRM_POLICY_MAX) - continue; - if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { if (policy->family == AF_INET) { dbits = rbits4; @@ -1312,23 +1315,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) goto out_unlock; } - /* reset the bydst and inexact table in all directions */ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - struct hlist_node *n; - - hlist_for_each_entry_safe(policy, n, - &net->xfrm.policy_inexact[dir], - bydst_inexact_list) { - hlist_del_rcu(&policy->bydst); - hlist_del_init(&policy->bydst_inexact_list); - } - - hmask = net->xfrm.policy_bydst[dir].hmask; - odst = net->xfrm.policy_bydst[dir].table; - for (i = hmask; i >= 0; i--) { - hlist_for_each_entry_safe(policy, n, odst + i, bydst) - hlist_del_rcu(&policy->bydst); - } if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { /* dir out => dst = remote, src = local */ net->xfrm.policy_bydst[dir].dbits4 = rbits4; @@ -1346,14 +1333,13 @@ static void xfrm_hash_rebuild(struct work_struct *work) /* re-insert all policies by order of creation */ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { - if (policy->walk.dead) + if (xfrm_policy_is_dead_or_sk(policy)) continue; - dir = xfrm_policy_id2dir(policy->index); - if (dir >= XFRM_POLICY_MAX) { - /* skip socket policies */ - continue; - } + + hlist_del_rcu(&policy->bydst); + newpos = NULL; + dir = xfrm_policy_id2dir(policy->index); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); @@ -1520,42 +1506,6 @@ static const struct rhashtable_params xfrm_pol_inexact_params = { .automatic_shrinking = true, }; -static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, - struct xfrm_policy *policy) -{ - struct xfrm_policy *pol, *delpol = NULL; - struct hlist_node *newpos = NULL; - int i = 0; - - hlist_for_each_entry(pol, chain, bydst_inexact_list) { - if (pol->type == policy->type && - pol->if_id == policy->if_id && - !selector_cmp(&pol->selector, &policy->selector) && - xfrm_policy_mark_match(&policy->mark, pol) && - xfrm_sec_ctx_match(pol->security, policy->security) && - !WARN_ON(delpol)) { - delpol = pol; - if (policy->priority > pol->priority) - continue; - } else if (policy->priority >= pol->priority) { - newpos = &pol->bydst_inexact_list; - continue; - } - if (delpol) - break; - } - - if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET) - hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos); - else - hlist_add_head_rcu(&policy->bydst_inexact_list, chain); - - hlist_for_each_entry(pol, chain, bydst_inexact_list) { - pol->pos = i; - i++; - } -} - static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy, bool excl) @@ -2295,10 +2245,52 @@ out: return pol; } +static u32 xfrm_gen_pos_slow(struct net *net) +{ + struct xfrm_policy *policy; + u32 i = 0; + + /* oldest entry is last in list */ + list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { + if (!xfrm_policy_is_dead_or_sk(policy)) + policy->pos = ++i; + } + + return i; +} + +static u32 xfrm_gen_pos(struct net *net) +{ + const struct xfrm_policy *policy; + u32 i = 0; + + /* most recently added policy is at the head of the list */ + list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) { + if (xfrm_policy_is_dead_or_sk(policy)) + continue; + + if (policy->pos == UINT_MAX) + return xfrm_gen_pos_slow(net); + + i = policy->pos + 1; + break; + } + + return i; +} + static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); + switch (dir) { + case XFRM_POLICY_IN: + case XFRM_POLICY_FWD: + case XFRM_POLICY_OUT: + pol->pos = xfrm_gen_pos(net); + break; + } + list_add(&pol->walk.all, &net->xfrm.policy_all); net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); @@ -2315,7 +2307,6 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, /* Socket policies are not hashed. */ if (!hlist_unhashed(&pol->bydst)) { hlist_del_rcu(&pol->bydst); - hlist_del_init(&pol->bydst_inexact_list); hlist_del(&pol->byidx); } @@ -4438,63 +4429,50 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); #endif #ifdef CONFIG_XFRM_MIGRATE -static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, - const struct xfrm_selector *sel_tgt) -{ - if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { - if (sel_tgt->family == sel_cmp->family && - xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, - sel_cmp->family) && - xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, - sel_cmp->family) && - sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && - sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { - return true; - } - } else { - if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { - return true; - } - } - return false; -} - static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, u8 dir, u8 type, struct net *net, u32 if_id) { - struct xfrm_policy *pol, *ret = NULL; - struct hlist_head *chain; - u32 priority = ~0U; + struct xfrm_policy *pol; + struct flowi fl; - spin_lock_bh(&net->xfrm.xfrm_policy_lock); - chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); - hlist_for_each_entry(pol, chain, bydst) { - if ((if_id == 0 || pol->if_id == if_id) && - xfrm_migrate_selector_match(sel, &pol->selector) && - pol->type == type) { - ret = pol; - priority = ret->priority; - break; - } - } - chain = &net->xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, chain, bydst_inexact_list) { - if ((pol->priority >= priority) && ret) - break; + memset(&fl, 0, sizeof(fl)); - if ((if_id == 0 || pol->if_id == if_id) && - xfrm_migrate_selector_match(sel, &pol->selector) && - pol->type == type) { - ret = pol; + fl.flowi_proto = sel->proto; + + switch (sel->family) { + case AF_INET: + fl.u.ip4.saddr = sel->saddr.a4; + fl.u.ip4.daddr = sel->daddr.a4; + if (sel->proto == IPSEC_ULPROTO_ANY) break; - } + fl.u.flowi4_oif = sel->ifindex; + fl.u.ip4.fl4_sport = sel->sport; + fl.u.ip4.fl4_dport = sel->dport; + break; + case AF_INET6: + fl.u.ip6.saddr = sel->saddr.in6; + fl.u.ip6.daddr = sel->daddr.in6; + if (sel->proto == IPSEC_ULPROTO_ANY) + break; + fl.u.flowi6_oif = sel->ifindex; + fl.u.ip6.fl4_sport = sel->sport; + fl.u.ip6.fl4_dport = sel->dport; + break; + default: + return ERR_PTR(-EAFNOSUPPORT); } - xfrm_pol_hold(ret); + rcu_read_lock(); - spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id); + if (IS_ERR_OR_NULL(pol)) + goto out_unlock; - return ret; + if (!xfrm_pol_hold_rcu(pol)) + pol = NULL; +out_unlock: + rcu_read_unlock(); + return pol; } static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) @@ -4631,9 +4609,9 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, /* Stage 1 - find policy */ pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id); - if (!pol) { + if (IS_ERR_OR_NULL(pol)) { NL_SET_ERR_MSG(extack, "Target policy not found"); - err = -ENOENT; + err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT; goto out; } diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index a5f1c0c27dff..3b7df5477317 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -65,10 +65,11 @@ TARGETS += net/af_unix TARGETS += net/forwarding TARGETS += net/hsr TARGETS += net/mptcp -TARGETS += net/openvswitch -TARGETS += net/tcp_ao TARGETS += net/netfilter +TARGETS += net/openvswitch +TARGETS += net/packetdrill TARGETS += net/rds +TARGETS += net/tcp_ao TARGETS += nsfs TARGETS += perf_events TARGETS += pidfd diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 74954f6a8f94..2c3c58e65a41 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -111,8 +111,11 @@ run_one() stdbuf="/usr/bin/stdbuf --output=L " fi eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}" - cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args" - if [ ! -x "$TEST" ]; then + if [ -x "$TEST" ]; then + cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args" + elif [ -x "./ksft_runner.sh" ]; then + cmd="$stdbuf ./ksft_runner.sh ./$BASENAME_TEST" + else echo "# Warning: file $TEST is not executable" if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ] diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 27362e40eb37..22a5d6a7c3f3 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -56,7 +56,7 @@ TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh TEST_PROGS += netns-sysctl.sh -TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 535eb2c3d7d1..3ed3882a93b8 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -525,6 +525,29 @@ TEST_F(msg_oob, ex_oob_drop_2) } } +TEST_F(msg_oob, ex_oob_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + TEST_F(msg_oob, ex_oob_ahead_break) { sendpair("hello", 5, MSG_OOB); diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 776d43a6922d..2bd0c1eb70c5 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -284,7 +284,7 @@ echo "b" | \ ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10000 -chk_msk_nr 2 "after MPC handshake " +chk_msk_nr 2 "after MPC handshake" chk_last_time_info 10000 chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index b77fb7065bfb..57325d57e4c6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -345,9 +345,11 @@ do_transfer() local addr_port addr_port=$(printf "%s:%d" ${connect_addr} ${port}) - local result_msg - result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" - mptcp_lib_print_title "${result_msg}" + local pretty_title + pretty_title="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" + mptcp_lib_print_title "${pretty_title}" + + local tap_title="${connector_ns:0:3} ${cl_proto} -> ${listener_ns:0:3} (${addr_port}) ${srv_proto}" if $capture; then local capuser @@ -431,7 +433,6 @@ do_transfer() local duration duration=$((stop-start)) - result_msg+=" # time=${duration}ms" printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" @@ -444,7 +445,7 @@ do_transfer() echo cat "$capout" - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}" return 1 fi @@ -544,12 +545,12 @@ do_transfer() if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then mptcp_lib_pr_ok "${extra:1}" - mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_pass "${TEST_GROUP}: ${tap_title}" else if [ -n "${extra}" ]; then mptcp_lib_print_warn "${extra:1}" fi - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}" fi cat "$capout" @@ -848,6 +849,8 @@ stop_if_error() make_file "$cin" "client" make_file "$sin" "server" +mptcp_lib_subtests_last_ts_reset + check_mptcp_disabled stop_if_error "The kernel configuration is not valid for MPTCP" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 43f8a9bd84c4..3564cd06643c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3959,9 +3959,11 @@ if [ ${#tests[@]} -eq 0 ]; then tests=("${all_tests_names[@]}") fi +mptcp_lib_subtests_last_ts_reset for subtests in "${tests[@]}"; do "${subtests}" done +append_prev_results if [ ${ret} -ne 0 ]; then echo @@ -3972,7 +3974,6 @@ if [ ${ret} -ne 0 ]; then echo fi -append_prev_results mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 4578a331041e..975d4d4c862a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -29,6 +29,7 @@ declare -rx MPTCP_LIB_AF_INET6=10 MPTCP_LIB_SUBTESTS=() MPTCP_LIB_SUBTESTS_DUPLICATED=0 MPTCP_LIB_SUBTEST_FLAKY=0 +MPTCP_LIB_SUBTESTS_LAST_TS_MS= MPTCP_LIB_TEST_COUNTER=0 MPTCP_LIB_TEST_FORMAT="%02u %-50s" MPTCP_LIB_IP_MPTCP=0 @@ -205,6 +206,11 @@ mptcp_lib_kversion_ge() { mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}" } +mptcp_lib_subtests_last_ts_reset() { + MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)" +} +mptcp_lib_subtests_last_ts_reset + __mptcp_lib_result_check_duplicated() { local subtest @@ -219,13 +225,22 @@ __mptcp_lib_result_check_duplicated() { __mptcp_lib_result_add() { local result="${1}" + local time="time=" + local ts_prev_ms shift local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) __mptcp_lib_result_check_duplicated "${*}" - MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}") + # not to add two '#' + [[ "${*}" != *"#"* ]] && time="# ${time}" + + ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}" + mptcp_lib_subtests_last_ts_reset + time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms" + + MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}") } # $1: test name diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 68899a303a1a..5e8d5b83e2d0 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -349,6 +349,7 @@ init make_file "$cin" "client" 1 make_file "$sin" "server" 1 trap cleanup EXIT +mptcp_lib_subtests_last_ts_reset run_tests $ns1 $ns2 10.0.1.1 run_tests $ns1 $ns2 dead:beef:1::1 diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 2757378b1b13..2e6648a2b2c0 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -137,6 +137,8 @@ check() fi } +mptcp_lib_subtests_last_ts_reset + check "show_endpoints" "" "defaults addr list" default_limits="$(get_limits)" diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index f74e1c3c126d..8fa77c8e9b65 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -286,6 +286,7 @@ while getopts "bcdhi" option;do done setup +mptcp_lib_subtests_last_ts_reset run_test 10 10 0 0 "balanced bwidth" run_test 10 10 1 25 "balanced bwidth with unbalanced delay" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 9cb05978269d..3651f73451cf 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -150,6 +150,7 @@ mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid server_evts=$(mktemp) mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid sleep 0.5 +mptcp_lib_subtests_last_ts_reset print_title "Init" print_test "Created network namespaces ns1, ns2" diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile new file mode 100644 index 000000000000..870f7258dc8d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_INCLUDES := ksft_runner.sh \ + defaults.sh \ + ../../kselftest/ktap_helpers.sh + +TEST_PROGS := $(wildcard *.pkt) + +include ../../lib.mk diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config new file mode 100644 index 000000000000..0d402830f18d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/config @@ -0,0 +1,5 @@ +CONFIG_IPV6=y +CONFIG_NET_SCH_FIFO=y +CONFIG_PROC_SYSCTL=y +CONFIG_TCP_MD5SIG=y +CONFIG_TUN=y diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh new file mode 100755 index 000000000000..1095a7b22f44 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/defaults.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Set standard production config values that relate to TCP behavior. + +# Flush old cached data (fastopen cookies). +ip tcp_metrics flush all > /dev/null 2>&1 + +# TCP min, default, and max receive and send buffer sizes. +sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))" +sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304" + +# TCP timestamps. +sysctl -q net.ipv4.tcp_timestamps=1 + +# TCP SYN(ACK) retry thresholds +sysctl -q net.ipv4.tcp_syn_retries=5 +sysctl -q net.ipv4.tcp_synack_retries=5 + +# TCP Forward RTO-Recovery, RFC 5682. +sysctl -q net.ipv4.tcp_frto=2 + +# TCP Selective Acknowledgements (SACK) +sysctl -q net.ipv4.tcp_sack=1 + +# TCP Duplicate Selective Acknowledgements (DSACK) +sysctl -q net.ipv4.tcp_dsack=1 + +# TCP FACK (Forward Acknowldgement) +sysctl -q net.ipv4.tcp_fack=0 + +# TCP reordering degree ("dupthresh" threshold for entering Fast Recovery). +sysctl -q net.ipv4.tcp_reordering=3 + +# TCP congestion control. +sysctl -q net.ipv4.tcp_congestion_control=cubic + +# TCP slow start after idle. +sysctl -q net.ipv4.tcp_slow_start_after_idle=0 + +# TCP RACK and TLP. +sysctl -q net.ipv4.tcp_early_retrans=4 net.ipv4.tcp_recovery=1 + +# TCP method for deciding when to defer sending to accumulate big TSO packets. +sysctl -q net.ipv4.tcp_tso_win_divisor=3 + +# TCP Explicit Congestion Notification (ECN) +sysctl -q net.ipv4.tcp_ecn=0 + +sysctl -q net.ipv4.tcp_pacing_ss_ratio=200 +sysctl -q net.ipv4.tcp_pacing_ca_ratio=120 +sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 + +sysctl -q net.ipv4.tcp_fastopen=0x70403 +sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 + +sysctl -q net.ipv4.tcp_syncookies=1 + +# Override the default qdisc on the tun device. +# Many tests fail with timing errors if the default +# is FQ and that paces their flows. +tc qdisc add dev tun0 root pfifo + diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh new file mode 100755 index 000000000000..2f62caccbbbc --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh" + +readonly ipv4_args=('--ip_version=ipv4 ' + '--local_ip=192.168.0.1 ' + '--gateway_ip=192.168.0.1 ' + '--netmask_ip=255.255.0.0 ' + '--remote_ip=192.0.2.1 ' + '-D CMSG_LEVEL_IP=SOL_IP ' + '-D CMSG_TYPE_RECVERR=IP_RECVERR ') + +readonly ipv6_args=('--ip_version=ipv6 ' + '--mtu=1520 ' + '--local_ip=fd3d:0a0b:17d6::1 ' + '--gateway_ip=fd3d:0a0b:17d6:8888::1 ' + '--remote_ip=fd3d:fa7b:d17d::1 ' + '-D CMSG_LEVEL_IP=SOL_IPV6 ' + '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ') + +if [ $# -ne 1 ]; then + ktap_exit_fail_msg "usage: $0 <script>" + exit "$KSFT_FAIL" +fi +script="$1" + +if [ -z "$(which packetdrill)" ]; then + ktap_skip_all "packetdrill not found in PATH" + exit "$KSFT_SKIP" +fi + +ktap_print_header +ktap_set_plan 2 + +packetdrill ${ipv4_args[@]} $(basename $script) > /dev/null \ + && ktap_test_pass "ipv4" || ktap_test_fail "ipv4" +packetdrill ${ipv6_args[@]} $(basename $script) > /dev/null \ + && ktap_test_pass "ipv6" || ktap_test_fail "ipv6" + +ktap_finished diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt new file mode 100644 index 000000000000..df49c67645ac --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_INQ and TCP_CM_INQ on the client side. +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Connect to the server and enable TCP_INQ. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 setsockopt(3, SOL_TCP, TCP_INQ, [1], 4) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 5792 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +// Now we have 10K of data ready on the socket. + +0 < . 1:10001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 10001 <nop,nop,TS val 200 ecr 700> + +// We read 1K and we should have 9K ready to read. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 1000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=9000}]}, 0) = 1000 +// We read 9K and we should have no further data ready to read. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 9000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=0}]}, 0) = 9000 + +// Server sends more data and closes the connections. + +0 < F. 10001:20001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 20002 <nop,nop,TS val 200 ecr 700> + +// We read 10K and we should have one "fake" byte because the connection is +// closed. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 10000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=1}]}, 0) = 10000 +// Now, receive EOF. + +0 read(3, ..., 2000) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt new file mode 100644 index 000000000000..04a5e2590c62 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_INQ and TCP_CM_INQ on the server side. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + +// Accept the connection and enable TCP_INQ. + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_INQ, [1], 4) = 0 + +// Now we have 10K of data ready on the socket. + +0 < . 1:10001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 10001 + +// We read 2K and we should have 8K ready to read. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=8000}]}, 0) = 2000 +// We read 8K and we should have no further data ready to read. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 8000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=0}]}, 0) = 8000 +// Client sends more data and closes the connections. + +0 < F. 10001:20001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 20002 + +// We read 10K and we should have one "fake" byte because the connection is +// closed. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 10000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=1}]}, 0) = 10000 +// Now, receive error. + +0 read(3, ..., 2000) = -1 ENOTCONN (Transport endpoint is not connected) diff --git a/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt new file mode 100644 index 000000000000..25dfef95d3f8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test what happens when client does not provide MD5 on SYN, +// but then does on the ACK that completes the three-way handshake. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// Ooh, weird: client provides MD5 option on the ACK: + +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop> + +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop> + +// The TCP listener refcount should be 2, but on buggy kernels it can be 0: + +0 `grep " 0A " /proc/net/tcp /proc/net/tcp6 | grep ":1F90"` + +// Now here comes the legit ACK: + +.01 < . 1:1(0) ack 1 win 514 + +// Make sure the connection is OK: + +0 accept(3, ..., ...) = 4 + + +.01 write(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index 9eb42570294d..16ac4df55fdb 100644 --- a/tools/testing/selftests/net/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -57,6 +57,8 @@ static struct sof_flag sof_flags[] = { SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE), SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE), SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE), + SOF_FLAG(SOF_TIMESTAMPING_OPT_RX_FILTER), + SOF_FLAG(SOF_TIMESTAMPING_RAW_HARDWARE), }; static struct socket_type socket_types[] = { @@ -98,6 +100,22 @@ static struct test_case test_cases[] = { {} }, { + { .so_timestamping = SOF_TIMESTAMPING_RAW_HARDWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + {} + }, + { + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + {} + }, + { + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_RX_SOFTWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + { .swtstamp = true } + }, + { { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE }, { .swtstamp = true } diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index ec60a16c9307..d626f22f9550 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -356,8 +356,12 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) } } - if (batch > 1) + if (batch > 1) { fprintf(stderr, "batched %d timestamps\n", batch); + } else if (!batch) { + fprintf(stderr, "Failed to report timestamps\n"); + test_failed = true; + } } static int recv_errmsg(int fd) diff --git a/tools/testing/selftests/net/xfrm_policy_add_speed.sh b/tools/testing/selftests/net/xfrm_policy_add_speed.sh new file mode 100755 index 000000000000..2fab29d3cb91 --- /dev/null +++ b/tools/testing/selftests/net/xfrm_policy_add_speed.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +source lib.sh + +timeout=4m +ret=0 +tmp=$(mktemp) +cleanup() { + cleanup_all_ns + rm -f "$tmp" +} + +trap cleanup EXIT + +maxpolicies=100000 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && maxpolicies=10000 + +do_dummies4() { + local dir="$1" + local max="$2" + + local policies + local pfx + pfx=30 + policies=0 + + ip netns exec "$ns" ip xfrm policy flush + + for i in $(seq 1 100);do + local s + local d + for j in $(seq 1 255);do + s=$((i+0)) + d=$((i+100)) + + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.0/30 dst 10.$d.$j.$a/$pfx dir $dir action block + done + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.$a/30 dst 10.$d.$j.0/$pfx dir $dir action block + done + done + done +} + +setup_ns ns + +do_bench() +{ + local max="$1" + + start=$(date +%s%3N) + do_dummies4 "out" "$max" > "$tmp" + if ! timeout "$timeout" ip netns exec "$ns" ip -batch "$tmp";then + echo "WARNING: policy insertion cancelled after $timeout" + ret=1 + fi + stop=$(date +%s%3N) + + result=$((stop-start)) + + policies=$(wc -l < "$tmp") + printf "Inserted %-06s policies in $result ms\n" $policies + + have=$(ip netns exec "$ns" ip xfrm policy show | grep "action block" | wc -l) + if [ "$have" -ne "$policies" ]; then + echo "WARNING: mismatch, have $have policies, expected $policies" + ret=1 + fi +} + +p=100 +while [ $p -le "$maxpolicies" ]; do + do_bench "$p" + p="${p}0" +done + +exit $ret |