diff options
Diffstat (limited to 'drivers/net/ethernet/intel')
72 files changed, 2861 insertions, 1091 deletions
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 61e60e4de600..da6e303ad99b 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -4229,8 +4229,6 @@ process_skb: */ p = buffer_info->rxbuf.page; if (length <= copybreak) { - u8 *vaddr; - if (likely(!(netdev->features & NETIF_F_RXFCS))) length -= 4; skb = e1000_alloc_rx_skb(adapter, @@ -4238,10 +4236,9 @@ process_skb: if (!skb) break; - vaddr = kmap_atomic(p); - memcpy(skb_tail_pointer(skb), vaddr, - length); - kunmap_atomic(vaddr); + memcpy(skb_tail_pointer(skb), + page_address(p), length); + /* re-use the page, so don't erase * buffer_info->rxbuf.page */ diff --git a/drivers/net/ethernet/intel/e1000e/Makefile b/drivers/net/ethernet/intel/e1000e/Makefile index 44e58b6e7660..0baa15503c38 100644 --- a/drivers/net/ethernet/intel/e1000e/Makefile +++ b/drivers/net/ethernet/intel/e1000e/Makefile @@ -5,6 +5,9 @@ # Makefile for the Intel(R) PRO/1000 ethernet driver # +ccflags-y += -I$(src) +subdir-ccflags-y += -I$(src) + obj-$(CONFIG_E1000E) += e1000e.o e1000e-objs := 82571.o ich8lan.o 80003es2lan.o \ diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index e8a9a9610ac6..a187582d2299 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -116,7 +116,8 @@ enum e1000_boards { board_pch_spt, board_pch_cnp, board_pch_tgp, - board_pch_adp + board_pch_adp, + board_pch_mtp }; struct e1000_ps_page { @@ -504,6 +505,7 @@ extern const struct e1000_info e1000_pch_spt_info; extern const struct e1000_info e1000_pch_cnp_info; extern const struct e1000_info e1000_pch_tgp_info; extern const struct e1000_info e1000_pch_adp_info; +extern const struct e1000_info e1000_pch_mtp_info; extern const struct e1000_info e1000_es2_info; void e1000e_ptp_init(struct e1000_adapter *adapter); diff --git a/drivers/net/ethernet/intel/e1000e/e1000e_trace.h b/drivers/net/ethernet/intel/e1000e/e1000e_trace.h new file mode 100644 index 000000000000..19d3cf4d924e --- /dev/null +++ b/drivers/net/ethernet/intel/e1000e/e1000e_trace.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2022, Intel Corporation. */ +/* Modeled on trace-events-sample.h */ +/* The trace subsystem name for e1000e will be "e1000e_trace". + * + * This file is named e1000e_trace.h. + * + * Since this include file's name is different from the trace + * subsystem name, we'll have to define TRACE_INCLUDE_FILE at the end + * of this file. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM e1000e_trace + +#if !defined(_TRACE_E1000E_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_E1000E_TRACE_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(e1000e_trace_mac_register, + TP_PROTO(uint32_t reg), + TP_ARGS(reg), + TP_STRUCT__entry(__field(uint32_t, reg)), + TP_fast_assign(__entry->reg = reg;), + TP_printk("event: TraceHub e1000e mac register: 0x%08x", + __entry->reg) +); + +#endif +/* This must be outside ifdef _E1000E_TRACE_H */ +/* This trace include file is not located in the .../include/trace + * with the kernel tracepoint definitions, because we're a loadable + * module. + */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE e1000e_trace + +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 51a5afe9df2f..59e82d131d88 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -908,6 +908,7 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data) case e1000_pch_adp: case e1000_pch_mtp: case e1000_pch_lnp: + case e1000_pch_ptp: mask |= BIT(18); break; default: @@ -1575,6 +1576,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter) case e1000_pch_adp: case e1000_pch_mtp: case e1000_pch_lnp: + case e1000_pch_ptp: fext_nvm11 = er32(FEXTNVM11); fext_nvm11 &= ~E1000_FEXTNVM11_DISABLE_MULR_FIX; ew32(FEXTNVM11, fext_nvm11); diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index bcf680e83811..29f9fae35f42 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -114,6 +114,14 @@ struct e1000_hw; #define E1000_DEV_ID_PCH_LNP_I219_V20 0x550F #define E1000_DEV_ID_PCH_LNP_I219_LM21 0x5510 #define E1000_DEV_ID_PCH_LNP_I219_V21 0x5511 +#define E1000_DEV_ID_PCH_ARL_I219_LM24 0x57A0 +#define E1000_DEV_ID_PCH_ARL_I219_V24 0x57A1 +#define E1000_DEV_ID_PCH_PTP_I219_LM25 0x57B3 +#define E1000_DEV_ID_PCH_PTP_I219_V25 0x57B4 +#define E1000_DEV_ID_PCH_PTP_I219_LM26 0x57B5 +#define E1000_DEV_ID_PCH_PTP_I219_V26 0x57B6 +#define E1000_DEV_ID_PCH_PTP_I219_LM27 0x57B7 +#define E1000_DEV_ID_PCH_PTP_I219_V27 0x57B8 #define E1000_REVISION_4 4 @@ -141,6 +149,7 @@ enum e1000_mac_type { e1000_pch_adp, e1000_pch_mtp, e1000_pch_lnp, + e1000_pch_ptp, }; enum e1000_media_type { diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 9466f65a6da7..0c7fd10312c8 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -322,6 +322,7 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) case e1000_pch_adp: case e1000_pch_mtp: case e1000_pch_lnp: + case e1000_pch_ptp: if (e1000_phy_is_accessible_pchlan(hw)) break; @@ -468,6 +469,7 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) case e1000_pch_adp: case e1000_pch_mtp: case e1000_pch_lnp: + case e1000_pch_ptp: /* In case the PHY needs to be in mdio slow mode, * set slow mode and try to get the PHY id again. */ @@ -714,6 +716,7 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) case e1000_pch_adp: case e1000_pch_mtp: case e1000_pch_lnp: + case e1000_pch_ptp: case e1000_pchlan: /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan; @@ -1681,6 +1684,7 @@ static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter) case e1000_pch_adp: case e1000_pch_mtp: case e1000_pch_lnp: + case e1000_pch_ptp: rc = e1000_init_phy_params_pchlan(hw); break; default: @@ -2137,6 +2141,7 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) case e1000_pch_adp: case e1000_pch_mtp: case e1000_pch_lnp: + case e1000_pch_ptp: sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M; break; default: @@ -3182,6 +3187,7 @@ static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) case e1000_pch_adp: case e1000_pch_mtp: case e1000_pch_lnp: + case e1000_pch_ptp: bank1_offset = nvm->flash_bank_size; act_offset = E1000_ICH_NVM_SIG_WORD; @@ -4122,6 +4128,7 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) case e1000_pch_adp: case e1000_pch_mtp: case e1000_pch_lnp: + case e1000_pch_ptp: word = NVM_COMPAT; valid_csum_mask = NVM_COMPAT_VALID_CSUM; break; @@ -6041,3 +6048,23 @@ const struct e1000_info e1000_pch_adp_info = { .phy_ops = &ich8_phy_ops, .nvm_ops = &spt_nvm_ops, }; + +const struct e1000_info e1000_pch_mtp_info = { + .mac = e1000_pch_mtp, + .flags = FLAG_IS_ICH + | FLAG_HAS_WOL + | FLAG_HAS_HW_TIMESTAMP + | FLAG_HAS_CTRLEXT_ON_LOAD + | FLAG_HAS_AMT + | FLAG_HAS_FLASH + | FLAG_HAS_JUMBO_FRAMES + | FLAG_APME_IN_WUC, + .flags2 = FLAG2_HAS_PHY_STATS + | FLAG2_HAS_EEE, + .pba = 26, + .max_hw_frame_size = 9022, + .get_variants = e1000_get_variants_ich8lan, + .mac_ops = &ich8_mac_ops, + .phy_ops = &ich8_phy_ops, + .nvm_ops = &spt_nvm_ops, +}; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 55cf2f62bb30..04acd1a992fa 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -28,6 +28,8 @@ #include <linux/suspend.h> #include "e1000.h" +#define CREATE_TRACE_POINTS +#include "e1000e_trace.h" char e1000e_driver_name[] = "e1000e"; @@ -53,6 +55,7 @@ static const struct e1000_info *e1000_info_tbl[] = { [board_pch_cnp] = &e1000_pch_cnp_info, [board_pch_tgp] = &e1000_pch_tgp_info, [board_pch_adp] = &e1000_pch_adp_info, + [board_pch_mtp] = &e1000_pch_mtp_info, }; struct e1000_reg_info { @@ -1388,26 +1391,18 @@ static bool e1000_clean_rx_irq_ps(struct e1000_ring *rx_ring, int *work_done, /* page alloc/put takes too long and effects small * packet throughput, so unsplit small packets and - * save the alloc/put only valid in softirq (napi) - * context to call kmap_* + * save the alloc/put */ if (l1 && (l1 <= copybreak) && ((length + l1) <= adapter->rx_ps_bsize0)) { - u8 *vaddr; - ps_page = &buffer_info->ps_pages[0]; - /* there is no documentation about how to call - * kmap_atomic, so we can't hold the mapping - * very long - */ dma_sync_single_for_cpu(&pdev->dev, ps_page->dma, PAGE_SIZE, DMA_FROM_DEVICE); - vaddr = kmap_atomic(ps_page->page); - memcpy(skb_tail_pointer(skb), vaddr, l1); - kunmap_atomic(vaddr); + memcpy(skb_tail_pointer(skb), + page_address(ps_page->page), l1); dma_sync_single_for_device(&pdev->dev, ps_page->dma, PAGE_SIZE, @@ -1607,11 +1602,9 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_ring *rx_ring, int *work_done, */ if (length <= copybreak && skb_tailroom(skb) >= length) { - u8 *vaddr; - vaddr = kmap_atomic(buffer_info->page); - memcpy(skb_tail_pointer(skb), vaddr, + memcpy(skb_tail_pointer(skb), + page_address(buffer_info->page), length); - kunmap_atomic(vaddr); /* re-use the page, so don't erase * buffer_info->page */ @@ -3552,6 +3545,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) case e1000_pch_adp: case e1000_pch_mtp: case e1000_pch_lnp: + case e1000_pch_ptp: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { /* Stable 24MHz frequency */ incperiod = INCPERIOD_24MHZ; @@ -4067,6 +4061,7 @@ void e1000e_reset(struct e1000_adapter *adapter) case e1000_pch_adp: case e1000_pch_mtp: case e1000_pch_lnp: + case e1000_pch_ptp: fc->refresh_time = 0xFFFF; fc->pause_time = 0xFFFF; @@ -6348,6 +6343,7 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) mac_data = er32(H2ME); mac_data |= E1000_H2ME_START_DPG; mac_data &= ~E1000_H2ME_EXIT_DPG; + trace_e1000e_trace_mac_register(mac_data); ew32(H2ME, mac_data); } else { /* Request driver configure the device to S0ix */ @@ -6502,6 +6498,7 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) mac_data = er32(H2ME); mac_data &= ~E1000_H2ME_START_DPG; mac_data |= E1000_H2ME_EXIT_DPG; + trace_e1000e_trace_mac_register(mac_data); ew32(H2ME, mac_data); /* Poll up to 2.5 seconds for ME to unconfigure DPG. @@ -7905,14 +7902,22 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_adp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_adp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_adp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_adp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_adp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_adp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_adp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_adp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_adp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_adp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ARL_I219_LM24), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ARL_I219_V24), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM25), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V25), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM26), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V26), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM27), board_pch_mtp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V27), board_pch_mtp }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 0e488e4fa5c1..def4566a916f 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -29,17 +29,11 @@ static int e1000e_phc_adjfine(struct ptp_clock_info *ptp, long delta) struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter, ptp_clock_info); struct e1000_hw *hw = &adapter->hw; - bool neg_adj = false; unsigned long flags; - u64 adjustment; - u32 timinca, incvalue; + u64 incvalue; + u32 timinca; s32 ret_val; - if (delta < 0) { - neg_adj = true; - delta = -delta; - } - /* Get the System Time Register SYSTIM base frequency */ ret_val = e1000e_get_base_timinca(adapter, &timinca); if (ret_val) @@ -48,11 +42,7 @@ static int e1000e_phc_adjfine(struct ptp_clock_info *ptp, long delta) spin_lock_irqsave(&adapter->systim_lock, flags); incvalue = timinca & E1000_TIMINCA_INCVALUE_MASK; - - adjustment = mul_u64_u64_div_u64(incvalue, (u64)delta, - 1000000ULL << 16); - - incvalue = neg_adj ? (incvalue - adjustment) : (incvalue + adjustment); + incvalue = adjust_by_scaled_ppm(incvalue, delta); timinca &= ~E1000_TIMINCA_INCVALUE_MASK; timinca |= incvalue; @@ -297,6 +287,7 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) case e1000_pch_adp: case e1000_pch_mtp: case e1000_pch_lnp: + case e1000_pch_ptp: if ((hw->mac.type < e1000_pch_lpt) || (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) { adapter->ptp_clock_info.max_adj = 24000000 - 1; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 2cca9e84e31e..34ab5ff9823b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1229,10 +1229,10 @@ static void fm10k_get_stats64(struct net_device *netdev, continue; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; @@ -1245,10 +1245,10 @@ static void fm10k_get_stats64(struct net_device *netdev, continue; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 9a60d6b207f7..60e351665c70 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -992,6 +992,7 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; + int irq_num; /* IRQ assigned to this q_vector */ } ____cacheline_internodealigned_in_smp; /* lan device */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 60f9e0a6aaca..3357d65a906b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1795,9 +1795,11 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_an_advt_reg); /* Set Loopback mode (0x0618) */ struct i40e_aqc_set_lb_mode { __le16 lb_mode; -#define I40E_AQ_LB_PHY_LOCAL 0x01 -#define I40E_AQ_LB_PHY_REMOTE 0x02 -#define I40E_AQ_LB_MAC_LOCAL 0x04 +#define I40E_LEGACY_LOOPBACK_NVM_VER 0x6000 +#define I40E_AQ_LB_MAC_LOCAL 0x01 +#define I40E_AQ_LB_PHY_LOCAL 0x05 +#define I40E_AQ_LB_PHY_REMOTE 0x06 +#define I40E_AQ_LB_MAC_LOCAL_LEGACY 0x04 u8 reserved[14]; }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 4f01e2a6b6bb..8f764ff5c990 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1831,6 +1831,32 @@ i40e_status i40e_aq_set_phy_int_mask(struct i40e_hw *hw, } /** + * i40e_aq_set_mac_loopback + * @hw: pointer to the HW struct + * @ena_lpbk: Enable or Disable loopback + * @cmd_details: pointer to command details structure or NULL + * + * Enable/disable loopback on a given port + */ +i40e_status i40e_aq_set_mac_loopback(struct i40e_hw *hw, bool ena_lpbk, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_lb_mode *cmd = + (struct i40e_aqc_set_lb_mode *)&desc.params.raw; + + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_lb_modes); + if (ena_lpbk) { + if (hw->nvm.version <= I40E_LEGACY_LOOPBACK_NVM_VER) + cmd->lb_mode = cpu_to_le16(I40E_AQ_LB_MAC_LOCAL_LEGACY); + else + cmd->lb_mode = cpu_to_le16(I40E_AQ_LB_MAC_LOCAL); + } + + return i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); +} + +/** * i40e_aq_set_phy_debug * @hw: pointer to the hw struct * @cmd_flags: debug command flags diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index f6fa63e4253c..887a735fe2a7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -154,7 +154,7 @@ __i40e_add_ethtool_stats(u64 **data, void *pointer, * @ring: the ring to copy * * Queue statistics must be copied while protected by - * u64_stats_fetch_begin_irq, so we can't directly use i40e_add_ethtool_stats. + * u64_stats_fetch_begin, so we can't directly use i40e_add_ethtool_stats. * Assumes that queue stats are defined in i40e_gstrings_queue_stats. If the * ring pointer is null, zero out the queue stat values and update the data * pointer. Otherwise safely copy the stats from the ring into the supplied @@ -172,16 +172,16 @@ i40e_add_queue_stats(u64 **data, struct i40e_ring *ring) /* To avoid invalid statistics values, ensure that we keep retrying * the copy until we get a consistent value according to - * u64_stats_fetch_retry_irq. But first, make sure our ring is + * u64_stats_fetch_retry. But first, make sure our ring is * non-null before attempting to access its syncp. */ do { - start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp); + start = !ring ? 0 : u64_stats_fetch_begin(&ring->syncp); for (i = 0; i < size; i++) { i40e_add_one_ethtool_stat(&(*data)[i], ring, &stats[i]); } - } while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (ring && u64_stats_fetch_retry(&ring->syncp, start)); /* Once we successfully copy the stats in, update the data pointer */ *data += size; @@ -1287,8 +1287,10 @@ static int i40e_set_link_ksettings(struct net_device *netdev, * trying to set something that we do not support. */ if (memcmp(©_ks.base, &safe_ks.base, - sizeof(struct ethtool_link_settings))) + sizeof(struct ethtool_link_settings))) { + netdev_err(netdev, "Only speed and autoneg are supported.\n"); return -EOPNOTSUPP; + } while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) { timeout--; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6416322d7c18..94feea3b2599 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -419,10 +419,10 @@ static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring, unsigned int start; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; @@ -472,10 +472,10 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, if (!ring) continue; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; @@ -897,10 +897,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) continue; do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); tx_b += bytes; tx_p += packets; tx_restart += p->tx_stats.restart_queue; @@ -915,10 +915,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) continue; do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); rx_b += bytes; rx_p += packets; rx_buf += p->rx_stats.alloc_buff_failed; @@ -935,10 +935,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) continue; do { - start = u64_stats_fetch_begin_irq(&p->syncp); + start = u64_stats_fetch_begin(&p->syncp); packets = p->stats.packets; bytes = p->stats.bytes; - } while (u64_stats_fetch_retry_irq(&p->syncp, start)); + } while (u64_stats_fetch_retry(&p->syncp, start)); tx_b += bytes; tx_p += packets; tx_restart += p->tx_stats.restart_queue; @@ -3694,6 +3694,24 @@ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi) } /** + * i40e_calculate_vsi_rx_buf_len - Calculates buffer length + * + * @vsi: VSI to calculate rx_buf_len from + */ +static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi) +{ + if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) + return I40E_RXBUFFER_2048; + +#if (PAGE_SIZE < 8192) + if (!I40E_2K_TOO_SMALL_WITH_PADDING && vsi->netdev->mtu <= ETH_DATA_LEN) + return I40E_RXBUFFER_1536 - NET_IP_ALIGN; +#endif + + return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048; +} + +/** * i40e_vsi_configure_rx - Configure the VSI for Rx * @vsi: the VSI being configured * @@ -3704,20 +3722,14 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi) int err = 0; u16 i; - if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) { - vsi->max_frame = I40E_MAX_RXBUFFER; - vsi->rx_buf_len = I40E_RXBUFFER_2048; + vsi->max_frame = I40E_MAX_RXBUFFER; + vsi->rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi); + #if (PAGE_SIZE < 8192) - } else if (!I40E_2K_TOO_SMALL_WITH_PADDING && - (vsi->netdev->mtu <= ETH_DATA_LEN)) { + if (vsi->netdev && !I40E_2K_TOO_SMALL_WITH_PADDING && + vsi->netdev->mtu <= ETH_DATA_LEN) vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN; - vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN; #endif - } else { - vsi->max_frame = I40E_MAX_RXBUFFER; - vsi->rx_buf_len = (PAGE_SIZE < 8192) ? I40E_RXBUFFER_3072 : - I40E_RXBUFFER_2048; - } /* set up individual rings */ for (i = 0; i < vsi->num_queue_pairs && !err; i++) @@ -4123,6 +4135,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) } /* register for affinity change notifications */ + q_vector->irq_num = irq_num; q_vector->affinity_notify.notify = i40e_irq_affinity_notify; q_vector->affinity_notify.release = i40e_irq_affinity_release; irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); @@ -12937,6 +12950,29 @@ static void i40e_clear_rss_lut(struct i40e_vsi *vsi) } /** + * i40e_set_loopback - turn on/off loopback mode on underlying PF + * @vsi: ptr to VSI + * @ena: flag to indicate the on/off setting + */ +static int i40e_set_loopback(struct i40e_vsi *vsi, bool ena) +{ + bool if_running = netif_running(vsi->netdev) && + !test_and_set_bit(__I40E_VSI_DOWN, vsi->state); + int ret; + + if (if_running) + i40e_down(vsi); + + ret = i40e_aq_set_mac_loopback(&vsi->back->hw, ena, NULL); + if (ret) + netdev_err(vsi->netdev, "Failed to toggle loopback state\n"); + if (if_running) + i40e_up(vsi); + + return ret; +} + +/** * i40e_set_features - set the netdev feature flags * @netdev: ptr to the netdev being adjusted * @features: the feature set that the stack is suggesting @@ -12976,6 +13012,9 @@ static int i40e_set_features(struct net_device *netdev, if (need_reset) i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); + if ((features ^ netdev->features) & NETIF_F_LOOPBACK) + return i40e_set_loopback(vsi, !!(features & NETIF_F_LOOPBACK)); + return 0; } @@ -13282,7 +13321,7 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, int i; /* Don't allow frames that span over multiple buffers */ - if (frame_size > vsi->rx_buf_len) { + if (frame_size > i40e_calculate_vsi_rx_buf_len(vsi)) { NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); return -EINVAL; } @@ -13738,7 +13777,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; - netdev->hw_features |= hw_features; + netdev->hw_features |= hw_features | NETIF_F_LOOPBACK; netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID; diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index ebdcde6f1aeb..9a71121420c3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -105,6 +105,9 @@ enum i40e_status_code i40e_aq_set_phy_config(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures, bool atomic_reset); +i40e_status i40e_aq_set_mac_loopback(struct i40e_hw *hw, + bool ena_lpbk, + struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_set_phy_int_mask(struct i40e_hw *hw, u16 mask, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_clear_pxe_mode(struct i40e_hw *hw, diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index ffea0c9c82f1..c37abbb3cd06 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -347,23 +347,12 @@ static int i40e_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) { struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); struct i40e_hw *hw = &pf->hw; - u64 adj, freq, diff; - int neg_adj = 0; - - if (scaled_ppm < 0) { - neg_adj = 1; - scaled_ppm = -scaled_ppm; - } + u64 adj, base_adj; smp_mb(); /* Force any pending update before accessing. */ - freq = I40E_PTP_40GB_INCVAL * READ_ONCE(pf->ptp_adj_mult); - diff = mul_u64_u64_div_u64(freq, (u64)scaled_ppm, - 1000000ULL << 16); + base_adj = I40E_PTP_40GB_INCVAL * READ_ONCE(pf->ptp_adj_mult); - if (neg_adj) - adj = I40E_PTP_40GB_INCVAL - diff; - else - adj = I40E_PTP_40GB_INCVAL + diff; + adj = adjust_by_scaled_ppm(base_adj, scaled_ppm); wr32(hw, I40E_PRTTSYN_INC_L, adj & 0xFFFFFFFF); wr32(hw, I40E_PRTTSYN_INC_H, adj >> 32); diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h index b5b12299931f..79d587ad5409 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_trace.h +++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h @@ -55,6 +55,55 @@ * being built from shared code. */ +#define NO_DEV "(i40e no_device)" + +TRACE_EVENT(i40e_napi_poll, + + TP_PROTO(struct napi_struct *napi, struct i40e_q_vector *q, int budget, + int budget_per_ring, unsigned int rx_cleaned, unsigned int tx_cleaned, + bool rx_clean_complete, bool tx_clean_complete), + + TP_ARGS(napi, q, budget, budget_per_ring, rx_cleaned, tx_cleaned, + rx_clean_complete, tx_clean_complete), + + TP_STRUCT__entry( + __field(int, budget) + __field(int, budget_per_ring) + __field(unsigned int, rx_cleaned) + __field(unsigned int, tx_cleaned) + __field(int, rx_clean_complete) + __field(int, tx_clean_complete) + __field(int, irq_num) + __field(int, curr_cpu) + __string(qname, q->name) + __string(dev_name, napi->dev ? napi->dev->name : NO_DEV) + __bitmask(irq_affinity, nr_cpumask_bits) + ), + + TP_fast_assign( + __entry->budget = budget; + __entry->budget_per_ring = budget_per_ring; + __entry->rx_cleaned = rx_cleaned; + __entry->tx_cleaned = tx_cleaned; + __entry->rx_clean_complete = rx_clean_complete; + __entry->tx_clean_complete = tx_clean_complete; + __entry->irq_num = q->irq_num; + __entry->curr_cpu = get_cpu(); + __assign_str(qname, q->name); + __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV); + __assign_bitmask(irq_affinity, cpumask_bits(&q->affinity_mask), + nr_cpumask_bits); + ), + + TP_printk("i40e_napi_poll on dev %s q %s irq %d irq_mask %s curr_cpu %d " + "budget %d bpr %d rx_cleaned %u tx_cleaned %u " + "rx_clean_complete %d tx_clean_complete %d", + __get_str(dev_name), __get_str(qname), __entry->irq_num, + __get_bitmask(irq_affinity), __entry->curr_cpu, __entry->budget, + __entry->budget_per_ring, __entry->rx_cleaned, __entry->tx_cleaned, + __entry->rx_clean_complete, __entry->tx_clean_complete) +); + /* Events related to a vsi & ring */ DECLARE_EVENT_CLASS( i40e_tx_template, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index b97c95f89fa0..924f972b91fa 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi) * @vsi: the VSI we care about * @tx_ring: Tx ring to clean * @napi_budget: Used to determine if we are in netpoll + * @tx_cleaned: Out parameter set to the number of TXes cleaned * * Returns true if there's any budget left (e.g. the clean is finished) **/ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, - struct i40e_ring *tx_ring, int napi_budget) + struct i40e_ring *tx_ring, int napi_budget, + unsigned int *tx_cleaned) { int i = tx_ring->next_to_clean; struct i40e_tx_buffer *tx_buf; @@ -1048,6 +1050,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, } } + *tx_cleaned = total_packets; return !!budget; } @@ -2422,6 +2425,7 @@ static void i40e_inc_ntc(struct i40e_ring *rx_ring) * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: rx descriptor ring to transact packets on * @budget: Total limit on number of packets to process + * @rx_cleaned: Out parameter of the number of packets processed * * This function provides a "bounce buffer" approach to Rx interrupt * processing. The advantage to this is that on systems that have @@ -2430,7 +2434,8 @@ static void i40e_inc_ntc(struct i40e_ring *rx_ring) * * Returns amount of work completed **/ -static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) +static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget, + unsigned int *rx_cleaned) { unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); @@ -2567,6 +2572,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets); + *rx_cleaned = total_rx_packets; + /* guarantee a trip back through this routine if there was a failure */ return failure ? budget : (int)total_rx_packets; } @@ -2689,6 +2696,10 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) container_of(napi, struct i40e_q_vector, napi); struct i40e_vsi *vsi = q_vector->vsi; struct i40e_ring *ring; + bool tx_clean_complete = true; + bool rx_clean_complete = true; + unsigned int tx_cleaned = 0; + unsigned int rx_cleaned = 0; bool clean_complete = true; bool arm_wb = false; int budget_per_ring; @@ -2705,10 +2716,10 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) i40e_for_each_ring(ring, q_vector->tx) { bool wd = ring->xsk_pool ? i40e_clean_xdp_tx_irq(vsi, ring) : - i40e_clean_tx_irq(vsi, ring, budget); + i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned); if (!wd) { - clean_complete = false; + clean_complete = tx_clean_complete = false; continue; } arm_wb |= ring->arm_wb; @@ -2733,14 +2744,18 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) i40e_for_each_ring(ring, q_vector->rx) { int cleaned = ring->xsk_pool ? i40e_clean_rx_irq_zc(ring, budget_per_ring) : - i40e_clean_rx_irq(ring, budget_per_ring); + i40e_clean_rx_irq(ring, budget_per_ring, &rx_cleaned); work_done += cleaned; /* if we clean as many as budgeted, we must not be done */ if (cleaned >= budget_per_ring) - clean_complete = false; + clean_complete = rx_clean_complete = false; } + if (!i40e_enabled_xdp_vsi(vsi)) + trace_i40e_napi_poll(napi, q_vector, budget, budget_per_ring, rx_cleaned, + tx_cleaned, rx_clean_complete, tx_clean_complete); + /* If work not completed, return budget and polling will return */ if (!clean_complete) { int cpu_id = smp_processor_id(); diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index a056e1545615..d79ead5e8d0c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -147,7 +147,7 @@ __iavf_add_ethtool_stats(u64 **data, void *pointer, * @ring: the ring to copy * * Queue statistics must be copied while protected by - * u64_stats_fetch_begin_irq, so we can't directly use iavf_add_ethtool_stats. + * u64_stats_fetch_begin, so we can't directly use iavf_add_ethtool_stats. * Assumes that queue stats are defined in iavf_gstrings_queue_stats. If the * ring pointer is null, zero out the queue stat values and update the data * pointer. Otherwise safely copy the stats from the ring into the supplied @@ -165,14 +165,14 @@ iavf_add_queue_stats(u64 **data, struct iavf_ring *ring) /* To avoid invalid statistics values, ensure that we keep retrying * the copy until we get a consistent value according to - * u64_stats_fetch_retry_irq. But first, make sure our ring is + * u64_stats_fetch_retry. But first, make sure our ring is * non-null before attempting to access its syncp. */ do { - start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp); + start = !ring ? 0 : u64_stats_fetch_begin(&ring->syncp); for (i = 0; i < size; i++) iavf_add_one_ethtool_stat(&(*data)[i], ring, &stats[i]); - } while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (ring && u64_stats_fetch_retry(&ring->syncp, start)); /* Once we successfully copy the stats in, update the data pointer */ *data += size; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index f71e132ede09..c4e451ef7942 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -4827,7 +4827,7 @@ static void iavf_shutdown(struct pci_dev *pdev) iavf_close(netdev); if (iavf_lock_timeout(&adapter->crit_lock, 5000)) - dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__); + dev_warn(&adapter->pdev->dev, "%s: failed to acquire crit_lock\n", __func__); /* Prevent the watchdog from running. */ iavf_change_state(adapter, __IAVF_REMOVE); adapter->aq_required = 0; @@ -5088,7 +5088,7 @@ static void iavf_remove(struct pci_dev *pdev) } mutex_lock(&adapter->crit_lock); - dev_info(&adapter->pdev->dev, "Remove device\n"); + dev_info(&adapter->pdev->dev, "Removing device\n"); iavf_change_state(adapter, __IAVF_REMOVE); iavf_request_reset(adapter); diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 001500afc4a6..2f0b604abc5e 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -137,6 +137,21 @@ */ #define ICE_BW_KBPS_DIVISOR 125 +/* Default recipes have priority 4 and below, hence priority values between 5..7 + * can be used as filter priority for advanced switch filter (advanced switch + * filters need new recipe to be created for specified extraction sequence + * because default recipe extraction sequence does not represent custom + * extraction) + */ +#define ICE_SWITCH_FLTR_PRIO_QUEUE 7 +/* prio 6 is reserved for future use (e.g. switch filter with L3 fields + + * (Optional: IP TOS/TTL) + L4 fields + (optionally: TCP fields such as + * SYN/FIN/RST)) + */ +#define ICE_SWITCH_FLTR_PRIO_RSVD 6 +#define ICE_SWITCH_FLTR_PRIO_VSI 5 +#define ICE_SWITCH_FLTR_PRIO_QGRP ICE_SWITCH_FLTR_PRIO_VSI + /* Macro for each VSI in a PF */ #define ice_for_each_vsi(pf, i) \ for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++) @@ -305,6 +320,11 @@ enum ice_vsi_state { ICE_VSI_STATE_NBITS /* must be last */ }; +struct ice_vsi_stats { + struct ice_ring_stats **tx_ring_stats; /* Tx ring stats array */ + struct ice_ring_stats **rx_ring_stats; /* Rx ring stats array */ +}; + /* struct that defines a VSI, associated with a dev */ struct ice_vsi { struct net_device *netdev; @@ -358,6 +378,7 @@ struct ice_vsi { /* VSI stats */ struct rtnl_link_stats64 net_stats; + struct rtnl_link_stats64 net_stats_prev; struct ice_eth_stats eth_stats; struct ice_eth_stats eth_stats_prev; @@ -525,6 +546,7 @@ struct ice_pf { u16 ctrl_vsi_idx; /* control VSI index in pf->vsi array */ struct ice_vsi **vsi; /* VSIs created by the driver */ + struct ice_vsi_stats **vsi_stats; struct ice_sw *first_sw; /* first switch created by firmware */ u16 eswitch_mode; /* current mode of eswitch */ struct ice_vfs vfs; @@ -594,6 +616,8 @@ struct ice_pf { u16 num_dmac_chnl_fltrs; struct hlist_head tc_flower_fltr_list; + u64 supported_rxdids; + __le64 nvm_phy_type_lo; /* NVM PHY type low */ __le64 nvm_phy_type_hi; /* NVM PHY type high */ struct ice_link_default_override_tlv link_dflt_override; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 1bdc70aa979d..958c1e435232 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -848,9 +848,9 @@ struct ice_aqc_txsched_elem { u8 generic; #define ICE_AQC_ELEM_GENERIC_MODE_M 0x1 #define ICE_AQC_ELEM_GENERIC_PRIO_S 0x1 -#define ICE_AQC_ELEM_GENERIC_PRIO_M (0x7 << ICE_AQC_ELEM_GENERIC_PRIO_S) +#define ICE_AQC_ELEM_GENERIC_PRIO_M GENMASK(3, 1) #define ICE_AQC_ELEM_GENERIC_SP_S 0x4 -#define ICE_AQC_ELEM_GENERIC_SP_M (0x1 << ICE_AQC_ELEM_GENERIC_SP_S) +#define ICE_AQC_ELEM_GENERIC_SP_M GENMASK(4, 4) #define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S 0x5 #define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_M \ (0x3 << ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index e864634d66bc..554095b25f44 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -389,7 +389,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) * Indicates the starting address of the descriptor queue defined in * 128 Byte units. */ - rlan_ctx.base = ring->dma >> 7; + rlan_ctx.base = ring->dma >> ICE_RLAN_BASE_S; rlan_ctx.qlen = ring->count; diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 039342a0ed15..d02b55b6aa9c 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1105,6 +1105,9 @@ int ice_init_hw(struct ice_hw *hw) hw->evb_veb = true; + /* init xarray for identifying scheduling nodes uniquely */ + xa_init_flags(&hw->port_info->sched_node_ids, XA_FLAGS_ALLOC); + /* Query the allocated resources for Tx scheduler */ status = ice_sched_query_res_alloc(hw); if (status) { @@ -2945,8 +2948,8 @@ bool ice_is_100m_speed_supported(struct ice_hw *hw) * Note: In the structure of [phy_type_low, phy_type_high], there should * be one bit set, as this function will convert one PHY type to its * speed. - * If no bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned - * If more than one bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned + * If no bit gets set, ICE_AQ_LINK_SPEED_UNKNOWN will be returned + * If more than one bit gets set, ICE_AQ_LINK_SPEED_UNKNOWN will be returned */ static u16 ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high) @@ -4600,7 +4603,7 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, q_ctx->q_teid = le32_to_cpu(node.node_teid); /* add a leaf node into scheduler tree queue layer */ - status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node); + status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node, NULL); if (!status) status = ice_sched_replay_q_bw(pi, q_ctx); @@ -4835,7 +4838,7 @@ ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc, for (i = 0; i < num_qsets; i++) { node.node_teid = buf->rdma_qsets[i].qset_teid; ret = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, - &node); + &node, NULL); if (ret) break; qset_teid[i] = le32_to_cpu(node.node_teid); @@ -5512,3 +5515,40 @@ bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw) ICE_FW_API_REPORT_DFLT_CFG_MIN, ICE_FW_API_REPORT_DFLT_CFG_PATCH); } + +/* each of the indexes into the following array match the speed of a return + * value from the list of AQ returned speeds like the range: + * ICE_AQ_LINK_SPEED_10MB .. ICE_AQ_LINK_SPEED_100GB excluding + * ICE_AQ_LINK_SPEED_UNKNOWN which is BIT(15) and maps to BIT(14) in this + * array. The array is defined as 15 elements long because the link_speed + * returned by the firmware is a 16 bit * value, but is indexed + * by [fls(speed) - 1] + */ +static const u32 ice_aq_to_link_speed[15] = { + SPEED_10, /* BIT(0) */ + SPEED_100, + SPEED_1000, + SPEED_2500, + SPEED_5000, + SPEED_10000, + SPEED_20000, + SPEED_25000, + SPEED_40000, + SPEED_50000, + SPEED_100000, /* BIT(10) */ + 0, + 0, + 0, + 0 /* BIT(14) */ +}; + +/** + * ice_get_link_speed - get integer speed from table + * @index: array index from fls(aq speed) - 1 + * + * Returns: u32 value containing integer speed + */ +u32 ice_get_link_speed(u16 index) +{ + return ice_aq_to_link_speed[index]; +} diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 8b6712b92e84..4c6a0b5c9304 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -163,6 +163,7 @@ int ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length, bool write, struct ice_sq_cd *cd); +u32 ice_get_link_speed(u16 index); int ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap, diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index 0b146a0d4205..6be02f9b0b8c 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -1580,7 +1580,7 @@ ice_update_port_tc_tree_cfg(struct ice_port_info *pi, /* new TC */ status = ice_sched_query_elem(pi->hw, teid2, &elem); if (!status) - status = ice_sched_add_node(pi, 1, &elem); + status = ice_sched_add_node(pi, 1, &elem, NULL); if (status) break; /* update the TC number */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index add90e75f05c..4f24d441c35e 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -3,6 +3,7 @@ #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" +#include "ice_devlink.h" /** * ice_dcb_get_ena_tc - return bitmap of enabled TCs @@ -364,6 +365,12 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) /* Enable DCB tagging only when more than one TC */ if (ice_dcb_get_num_tc(new_cfg) > 1) { dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n"); + if (pf->hw.port_info->is_custom_tx_enabled) { + dev_err(dev, "Custom Tx scheduler feature enabled, can't configure DCB\n"); + return -EBUSY; + } + ice_tear_down_devlink_rate_tree(pf); + set_bit(ICE_FLAG_DCB_ENA, pf->flags); } else { dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n"); @@ -874,6 +881,9 @@ void ice_update_dcb_stats(struct ice_pf *pf) prev_ps = &pf->stats_prev; cur_ps = &pf->stats; + if (ice_is_reset_in_progress(pf->state)) + pf->stat_prev_loaded = false; + for (i = 0; i < 8; i++) { ice_stat_update32(hw, GLPRT_PXOFFRXC(port, i), pf->stat_prev_loaded, diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index e6ec20079ced..8286e47b4bae 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -8,6 +8,7 @@ #include "ice_devlink.h" #include "ice_eswitch.h" #include "ice_fw_update.h" +#include "ice_dcb_lib.h" static int ice_active_port_option = -1; @@ -310,12 +311,6 @@ static int ice_devlink_info_get(struct devlink *devlink, } } - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Unable to set driver name"); - goto out_free_ctx; - } - ice_info_get_dsn(pf, ctx); err = devlink_info_serial_number_put(req, ctx->buf); @@ -713,6 +708,490 @@ ice_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port, return ice_devlink_port_split(devlink, port, 1, extack); } +/** + * ice_tear_down_devlink_rate_tree - removes devlink-rate exported tree + * @pf: pf struct + * + * This function tears down tree exported during VF's creation. + */ +void ice_tear_down_devlink_rate_tree(struct ice_pf *pf) +{ + struct devlink *devlink; + struct ice_vf *vf; + unsigned int bkt; + + devlink = priv_to_devlink(pf); + + devl_lock(devlink); + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { + if (vf->devlink_port.devlink_rate) + devl_rate_leaf_destroy(&vf->devlink_port); + } + mutex_unlock(&pf->vfs.table_lock); + + devl_rate_nodes_destroy(devlink); + devl_unlock(devlink); +} + +/** + * ice_enable_custom_tx - try to enable custom Tx feature + * @pf: pf struct + * + * This function tries to enable custom Tx feature, + * it's not possible to enable it, if DCB or ADQ is active. + */ +static bool ice_enable_custom_tx(struct ice_pf *pf) +{ + struct ice_port_info *pi = ice_get_main_vsi(pf)->port_info; + struct device *dev = ice_pf_to_dev(pf); + + if (pi->is_custom_tx_enabled) + /* already enabled, return true */ + return true; + + if (ice_is_adq_active(pf)) { + dev_err(dev, "ADQ active, can't modify Tx scheduler tree\n"); + return false; + } + + if (ice_is_dcb_active(pf)) { + dev_err(dev, "DCB active, can't modify Tx scheduler tree\n"); + return false; + } + + pi->is_custom_tx_enabled = true; + + return true; +} + +/** + * ice_traverse_tx_tree - traverse Tx scheduler tree + * @devlink: devlink struct + * @node: current node, used for recursion + * @tc_node: tc_node struct, that is treated as a root + * @pf: pf struct + * + * This function traverses Tx scheduler tree and exports + * entire structure to the devlink-rate. + */ +static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node *node, + struct ice_sched_node *tc_node, struct ice_pf *pf) +{ + struct devlink_rate *rate_node = NULL; + struct ice_vf *vf; + int i; + + if (node->parent == tc_node) { + /* create root node */ + rate_node = devl_rate_node_create(devlink, node, node->name, NULL); + } else if (node->vsi_handle && + pf->vsi[node->vsi_handle]->vf) { + vf = pf->vsi[node->vsi_handle]->vf; + if (!vf->devlink_port.devlink_rate) + /* leaf nodes doesn't have children + * so we don't set rate_node + */ + devl_rate_leaf_create(&vf->devlink_port, node, + node->parent->rate_node); + } else if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF && + node->parent->rate_node) { + rate_node = devl_rate_node_create(devlink, node, node->name, + node->parent->rate_node); + } + + if (rate_node && !IS_ERR(rate_node)) + node->rate_node = rate_node; + + for (i = 0; i < node->num_children; i++) + ice_traverse_tx_tree(devlink, node->children[i], tc_node, pf); +} + +/** + * ice_devlink_rate_init_tx_topology - export Tx scheduler tree to devlink rate + * @devlink: devlink struct + * @vsi: main vsi struct + * + * This function finds a root node, then calls ice_traverse_tx tree, which + * traverses the tree and exports it's contents to devlink rate. + */ +int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi) +{ + struct ice_port_info *pi = vsi->port_info; + struct ice_sched_node *tc_node; + struct ice_pf *pf = vsi->back; + int i; + + tc_node = pi->root->children[0]; + mutex_lock(&pi->sched_lock); + devl_lock(devlink); + for (i = 0; i < tc_node->num_children; i++) + ice_traverse_tx_tree(devlink, tc_node->children[i], tc_node, pf); + devl_unlock(devlink); + mutex_unlock(&pi->sched_lock); + + return 0; +} + +/** + * ice_set_object_tx_share - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @bw: bandwidth in bytes per second + * @extack: extended netdev ack structure + * + * This function sets ICE_MIN_BW scheduling BW limit. + */ +static int ice_set_object_tx_share(struct ice_port_info *pi, struct ice_sched_node *node, + u64 bw, struct netlink_ext_ack *extack) +{ + int status; + + mutex_lock(&pi->sched_lock); + /* converts bytes per second to kilo bits per second */ + node->tx_share = div_u64(bw, 125); + status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW, node->tx_share); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_share"); + + return status; +} + +/** + * ice_set_object_tx_max - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @bw: bandwidth in bytes per second + * @extack: extended netdev ack structure + * + * This function sets ICE_MAX_BW scheduling BW limit. + */ +static int ice_set_object_tx_max(struct ice_port_info *pi, struct ice_sched_node *node, + u64 bw, struct netlink_ext_ack *extack) +{ + int status; + + mutex_lock(&pi->sched_lock); + /* converts bytes per second value to kilo bits per second */ + node->tx_max = div_u64(bw, 125); + status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW, node->tx_max); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_max"); + + return status; +} + +/** + * ice_set_object_tx_priority - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @priority: value representing priority for strict priority arbitration + * @extack: extended netdev ack structure + * + * This function sets priority of node among siblings. + */ +static int ice_set_object_tx_priority(struct ice_port_info *pi, struct ice_sched_node *node, + u32 priority, struct netlink_ext_ack *extack) +{ + int status; + + if (node->tx_priority >= 8) { + NL_SET_ERR_MSG_MOD(extack, "Priority should be less than 8"); + return -EINVAL; + } + + mutex_lock(&pi->sched_lock); + node->tx_priority = priority; + status = ice_sched_set_node_priority(pi, node, node->tx_priority); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_priority"); + + return status; +} + +/** + * ice_set_object_tx_weight - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @weight: value represeting relative weight for WFQ arbitration + * @extack: extended netdev ack structure + * + * This function sets node weight for WFQ algorithm. + */ +static int ice_set_object_tx_weight(struct ice_port_info *pi, struct ice_sched_node *node, + u32 weight, struct netlink_ext_ack *extack) +{ + int status; + + if (node->tx_weight > 200 || node->tx_weight < 1) { + NL_SET_ERR_MSG_MOD(extack, "Weight must be between 1 and 200"); + return -EINVAL; + } + + mutex_lock(&pi->sched_lock); + node->tx_weight = weight; + status = ice_sched_set_node_weight(pi, node, node->tx_weight); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_weight"); + + return status; +} + +/** + * ice_get_pi_from_dev_rate - get port info from devlink_rate + * @rate_node: devlink struct instance + * + * This function returns corresponding port_info struct of devlink_rate + */ +static struct ice_port_info *ice_get_pi_from_dev_rate(struct devlink_rate *rate_node) +{ + struct ice_pf *pf = devlink_priv(rate_node->devlink); + + return ice_get_main_vsi(pf)->port_info; +} + +static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node; + struct ice_port_info *pi; + + pi = ice_get_pi_from_dev_rate(rate_node); + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + /* preallocate memory for ice_sched_node */ + node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL); + *priv = node; + + return 0; +} + +static int ice_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node, *tc_node; + struct ice_port_info *pi; + + pi = ice_get_pi_from_dev_rate(rate_node); + tc_node = pi->root->children[0]; + node = priv; + + if (!rate_node->parent || !node || tc_node == node || !extack) + return 0; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + /* can't allow to delete a node with children */ + if (node->num_children) + return -EINVAL; + + mutex_lock(&pi->sched_lock); + ice_free_sched_node(pi, node); + mutex_unlock(&pi->sched_lock); + + return 0; +} + +static int ice_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_leaf), + node, tx_max, extack); +} + +static int ice_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_leaf), node, + tx_share, extack); +} + +static int ice_devlink_rate_leaf_tx_priority_set(struct devlink_rate *rate_leaf, void *priv, + u32 tx_priority, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_leaf), node, + tx_priority, extack); +} + +static int ice_devlink_rate_leaf_tx_weight_set(struct devlink_rate *rate_leaf, void *priv, + u32 tx_weight, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_leaf), node, + tx_weight, extack); +} + +static int ice_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_node), + node, tx_max, extack); +} + +static int ice_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_node), + node, tx_share, extack); +} + +static int ice_devlink_rate_node_tx_priority_set(struct devlink_rate *rate_node, void *priv, + u32 tx_priority, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_node), + node, tx_priority, extack); +} + +static int ice_devlink_rate_node_tx_weight_set(struct devlink_rate *rate_node, void *priv, + u32 tx_weight, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_node), + node, tx_weight, extack); +} + +static int ice_devlink_set_parent(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) +{ + struct ice_port_info *pi = ice_get_pi_from_dev_rate(devlink_rate); + struct ice_sched_node *tc_node, *node, *parent_node; + u16 num_nodes_added; + u32 first_node_teid; + u32 node_teid; + int status; + + tc_node = pi->root->children[0]; + node = priv; + + if (!extack) + return 0; + + if (!ice_enable_custom_tx(devlink_priv(devlink_rate->devlink))) + return -EBUSY; + + if (!parent) { + if (!node || tc_node == node || node->num_children) + return -EINVAL; + + mutex_lock(&pi->sched_lock); + ice_free_sched_node(pi, node); + mutex_unlock(&pi->sched_lock); + + return 0; + } + + parent_node = parent_priv; + + /* if the node doesn't exist, create it */ + if (!node->parent) { + mutex_lock(&pi->sched_lock); + status = ice_sched_add_elems(pi, tc_node, parent_node, + parent_node->tx_sched_layer + 1, + 1, &num_nodes_added, &first_node_teid, + &node); + mutex_unlock(&pi->sched_lock); + + if (status) { + NL_SET_ERR_MSG_MOD(extack, "Can't add a new node"); + return status; + } + + if (devlink_rate->tx_share) + ice_set_object_tx_share(pi, node, devlink_rate->tx_share, extack); + if (devlink_rate->tx_max) + ice_set_object_tx_max(pi, node, devlink_rate->tx_max, extack); + if (devlink_rate->tx_priority) + ice_set_object_tx_priority(pi, node, devlink_rate->tx_priority, extack); + if (devlink_rate->tx_weight) + ice_set_object_tx_weight(pi, node, devlink_rate->tx_weight, extack); + } else { + node_teid = le32_to_cpu(node->info.node_teid); + mutex_lock(&pi->sched_lock); + status = ice_sched_move_nodes(pi, parent_node, 1, &node_teid); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't move existing node to a new parent"); + } + + return status; +} + static const struct devlink_ops ice_devlink_ops = { .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, .reload_actions = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), @@ -725,6 +1204,22 @@ static const struct devlink_ops ice_devlink_ops = { .eswitch_mode_set = ice_eswitch_mode_set, .info_get = ice_devlink_info_get, .flash_update = ice_devlink_flash_update, + + .rate_node_new = ice_devlink_rate_node_new, + .rate_node_del = ice_devlink_rate_node_del, + + .rate_leaf_tx_max_set = ice_devlink_rate_leaf_tx_max_set, + .rate_leaf_tx_share_set = ice_devlink_rate_leaf_tx_share_set, + .rate_leaf_tx_priority_set = ice_devlink_rate_leaf_tx_priority_set, + .rate_leaf_tx_weight_set = ice_devlink_rate_leaf_tx_weight_set, + + .rate_node_tx_max_set = ice_devlink_rate_node_tx_max_set, + .rate_node_tx_share_set = ice_devlink_rate_node_tx_share_set, + .rate_node_tx_priority_set = ice_devlink_rate_node_tx_priority_set, + .rate_node_tx_weight_set = ice_devlink_rate_node_tx_weight_set, + + .rate_leaf_parent_set = ice_devlink_set_parent, + .rate_node_parent_set = ice_devlink_set_parent, }; static int @@ -1033,12 +1528,7 @@ int ice_devlink_create_pf_port(struct ice_pf *pf) */ void ice_devlink_destroy_pf_port(struct ice_pf *pf) { - struct devlink_port *devlink_port; - - devlink_port = &pf->devlink_port; - - devlink_port_type_clear(devlink_port); - devlink_port_unregister(devlink_port); + devlink_port_unregister(&pf->devlink_port); } /** @@ -1094,31 +1584,28 @@ int ice_devlink_create_vf_port(struct ice_vf *vf) */ void ice_devlink_destroy_vf_port(struct ice_vf *vf) { - struct devlink_port *devlink_port; - - devlink_port = &vf->devlink_port; - - devlink_port_type_clear(devlink_port); - devlink_port_unregister(devlink_port); + devl_rate_leaf_destroy(&vf->devlink_port); + devlink_port_unregister(&vf->devlink_port); } #define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024) +static const struct devlink_region_ops ice_nvm_region_ops; +static const struct devlink_region_ops ice_sram_region_ops; + /** * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents * @devlink: the devlink instance - * @ops: the devlink region being snapshotted + * @ops: the devlink region to snapshot * @extack: extended ACK response structure * @data: on exit points to snapshot data buffer * - * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for - * the nvm-flash devlink region. It captures a snapshot of the full NVM flash - * contents, including both banks of flash. This snapshot can later be viewed - * via the devlink-region interface. + * This function is called in response to a DEVLINK_CMD_REGION_NEW for either + * the nvm-flash or shadow-ram region. * - * It captures the flash using the FLASH_ONLY bit set when reading via - * firmware, so it does not read the current Shadow RAM contents. For that, - * use the shadow-ram region. + * It captures a snapshot of the NVM or Shadow RAM flash contents. This + * snapshot can then later be viewed via the DEVLINK_CMD_REGION_READ netlink + * interface. * * @returns zero on success, and updates the data pointer. Returns a non-zero * error code on failure. @@ -1130,17 +1617,27 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, struct ice_pf *pf = devlink_priv(devlink); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; + bool read_shadow_ram; u8 *nvm_data, *tmp, i; u32 nvm_size, left; s8 num_blks; int status; - nvm_size = hw->flash.flash_size; + if (ops == &ice_nvm_region_ops) { + read_shadow_ram = false; + nvm_size = hw->flash.flash_size; + } else if (ops == &ice_sram_region_ops) { + read_shadow_ram = true; + nvm_size = hw->flash.sr_words * 2u; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); + return -EOPNOTSUPP; + } + nvm_data = vzalloc(nvm_size); if (!nvm_data) return -ENOMEM; - num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE); tmp = nvm_data; left = nvm_size; @@ -1164,7 +1661,7 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, } status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE, - &read_sz, tmp, false); + &read_sz, tmp, read_shadow_ram); if (status) { dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", read_sz, status, hw->adminq.sq_last_status); @@ -1185,62 +1682,69 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, } /** - * ice_devlink_sram_snapshot - Capture a snapshot of the Shadow RAM contents + * ice_devlink_nvm_read - Read a portion of NVM flash contents * @devlink: the devlink instance - * @ops: the devlink region being snapshotted + * @ops: the devlink region to snapshot * @extack: extended ACK response structure - * @data: on exit points to snapshot data buffer + * @offset: the offset to start at + * @size: the amount to read + * @data: the data buffer to read into * - * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for - * the shadow-ram devlink region. It captures a snapshot of the shadow ram - * contents. This snapshot can later be viewed via the devlink-region - * interface. + * This function is called in response to DEVLINK_CMD_REGION_READ to directly + * read a section of the NVM contents. + * + * It reads from either the nvm-flash or shadow-ram region contents. * * @returns zero on success, and updates the data pointer. Returns a non-zero * error code on failure. */ -static int -ice_devlink_sram_snapshot(struct devlink *devlink, - const struct devlink_region_ops __always_unused *ops, - struct netlink_ext_ack *extack, u8 **data) +static int ice_devlink_nvm_read(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data) { struct ice_pf *pf = devlink_priv(devlink); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - u8 *sram_data; - u32 sram_size; - int err; + bool read_shadow_ram; + u64 nvm_size; + int status; - sram_size = hw->flash.sr_words * 2u; - sram_data = vzalloc(sram_size); - if (!sram_data) - return -ENOMEM; + if (ops == &ice_nvm_region_ops) { + read_shadow_ram = false; + nvm_size = hw->flash.flash_size; + } else if (ops == &ice_sram_region_ops) { + read_shadow_ram = true; + nvm_size = hw->flash.sr_words * 2u; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); + return -EOPNOTSUPP; + } - err = ice_acquire_nvm(hw, ICE_RES_READ); - if (err) { + if (offset + size >= nvm_size) { + NL_SET_ERR_MSG_MOD(extack, "Cannot read beyond the region size"); + return -ERANGE; + } + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) { dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", - err, hw->adminq.sq_last_status); + status, hw->adminq.sq_last_status); NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); - vfree(sram_data); - return err; + return -EIO; } - /* Read from the Shadow RAM, rather than directly from NVM */ - err = ice_read_flat_nvm(hw, 0, &sram_size, sram_data, true); - if (err) { + status = ice_read_flat_nvm(hw, (u32)offset, &size, data, + read_shadow_ram); + if (status) { dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", - sram_size, err, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, - "Failed to read Shadow RAM contents"); + size, status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); ice_release_nvm(hw); - vfree(sram_data); - return err; + return -EIO; } - ice_release_nvm(hw); - *data = sram_data; - return 0; } @@ -1292,12 +1796,14 @@ static const struct devlink_region_ops ice_nvm_region_ops = { .name = "nvm-flash", .destructor = vfree, .snapshot = ice_devlink_nvm_snapshot, + .read = ice_devlink_nvm_read, }; static const struct devlink_region_ops ice_sram_region_ops = { .name = "shadow-ram", .destructor = vfree, - .snapshot = ice_devlink_sram_snapshot, + .snapshot = ice_devlink_nvm_snapshot, + .read = ice_devlink_nvm_read, }; static const struct devlink_region_ops ice_devcaps_region_ops = { diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h index fe006d9946f8..6ec96779f52e 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.h +++ b/drivers/net/ethernet/intel/ice/ice_devlink.h @@ -18,4 +18,7 @@ void ice_devlink_destroy_vf_port(struct ice_vf *vf); void ice_devlink_init_regions(struct ice_pf *pf); void ice_devlink_destroy_regions(struct ice_pf *pf); +int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi); +void ice_tear_down_devlink_rate_tree(struct ice_pf *pf); + #endif /* _ICE_DEVLINK_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index b7be84bbe72d..4191994d8f3a 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -151,6 +151,175 @@ static const u32 ice_regs_dump_list[] = { QINT_RQCTL(0), PFINT_OICR_ENA, QRX_ITR(0), +#define GLDCB_TLPM_PCI_DM 0x000A0180 + GLDCB_TLPM_PCI_DM, +#define GLDCB_TLPM_TC2PFC 0x000A0194 + GLDCB_TLPM_TC2PFC, +#define TCDCB_TLPM_WAIT_DM(_i) (0x000A0080 + ((_i) * 4)) + TCDCB_TLPM_WAIT_DM(0), + TCDCB_TLPM_WAIT_DM(1), + TCDCB_TLPM_WAIT_DM(2), + TCDCB_TLPM_WAIT_DM(3), + TCDCB_TLPM_WAIT_DM(4), + TCDCB_TLPM_WAIT_DM(5), + TCDCB_TLPM_WAIT_DM(6), + TCDCB_TLPM_WAIT_DM(7), + TCDCB_TLPM_WAIT_DM(8), + TCDCB_TLPM_WAIT_DM(9), + TCDCB_TLPM_WAIT_DM(10), + TCDCB_TLPM_WAIT_DM(11), + TCDCB_TLPM_WAIT_DM(12), + TCDCB_TLPM_WAIT_DM(13), + TCDCB_TLPM_WAIT_DM(14), + TCDCB_TLPM_WAIT_DM(15), + TCDCB_TLPM_WAIT_DM(16), + TCDCB_TLPM_WAIT_DM(17), + TCDCB_TLPM_WAIT_DM(18), + TCDCB_TLPM_WAIT_DM(19), + TCDCB_TLPM_WAIT_DM(20), + TCDCB_TLPM_WAIT_DM(21), + TCDCB_TLPM_WAIT_DM(22), + TCDCB_TLPM_WAIT_DM(23), + TCDCB_TLPM_WAIT_DM(24), + TCDCB_TLPM_WAIT_DM(25), + TCDCB_TLPM_WAIT_DM(26), + TCDCB_TLPM_WAIT_DM(27), + TCDCB_TLPM_WAIT_DM(28), + TCDCB_TLPM_WAIT_DM(29), + TCDCB_TLPM_WAIT_DM(30), + TCDCB_TLPM_WAIT_DM(31), +#define GLPCI_WATMK_CLNT_PIPEMON 0x000BFD90 + GLPCI_WATMK_CLNT_PIPEMON, +#define GLPCI_CUR_CLNT_COMMON 0x000BFD84 + GLPCI_CUR_CLNT_COMMON, +#define GLPCI_CUR_CLNT_PIPEMON 0x000BFD88 + GLPCI_CUR_CLNT_PIPEMON, +#define GLPCI_PCIERR 0x0009DEB0 + GLPCI_PCIERR, +#define GLPSM_DEBUG_CTL_STATUS 0x000B0600 + GLPSM_DEBUG_CTL_STATUS, +#define GLPSM0_DEBUG_FIFO_OVERFLOW_DETECT 0x000B0680 + GLPSM0_DEBUG_FIFO_OVERFLOW_DETECT, +#define GLPSM0_DEBUG_FIFO_UNDERFLOW_DETECT 0x000B0684 + GLPSM0_DEBUG_FIFO_UNDERFLOW_DETECT, +#define GLPSM0_DEBUG_DT_OUT_OF_WINDOW 0x000B0688 + GLPSM0_DEBUG_DT_OUT_OF_WINDOW, +#define GLPSM0_DEBUG_INTF_HW_ERROR_DETECT 0x000B069C + GLPSM0_DEBUG_INTF_HW_ERROR_DETECT, +#define GLPSM0_DEBUG_MISC_HW_ERROR_DETECT 0x000B06A0 + GLPSM0_DEBUG_MISC_HW_ERROR_DETECT, +#define GLPSM1_DEBUG_FIFO_OVERFLOW_DETECT 0x000B0E80 + GLPSM1_DEBUG_FIFO_OVERFLOW_DETECT, +#define GLPSM1_DEBUG_FIFO_UNDERFLOW_DETECT 0x000B0E84 + GLPSM1_DEBUG_FIFO_UNDERFLOW_DETECT, +#define GLPSM1_DEBUG_SRL_FIFO_OVERFLOW_DETECT 0x000B0E88 + GLPSM1_DEBUG_SRL_FIFO_OVERFLOW_DETECT, +#define GLPSM1_DEBUG_SRL_FIFO_UNDERFLOW_DETECT 0x000B0E8C + GLPSM1_DEBUG_SRL_FIFO_UNDERFLOW_DETECT, +#define GLPSM1_DEBUG_MISC_HW_ERROR_DETECT 0x000B0E90 + GLPSM1_DEBUG_MISC_HW_ERROR_DETECT, +#define GLPSM2_DEBUG_FIFO_OVERFLOW_DETECT 0x000B1680 + GLPSM2_DEBUG_FIFO_OVERFLOW_DETECT, +#define GLPSM2_DEBUG_FIFO_UNDERFLOW_DETECT 0x000B1684 + GLPSM2_DEBUG_FIFO_UNDERFLOW_DETECT, +#define GLPSM2_DEBUG_MISC_HW_ERROR_DETECT 0x000B1688 + GLPSM2_DEBUG_MISC_HW_ERROR_DETECT, +#define GLTDPU_TCLAN_COMP_BOB(_i) (0x00049ADC + ((_i) * 4)) + GLTDPU_TCLAN_COMP_BOB(1), + GLTDPU_TCLAN_COMP_BOB(2), + GLTDPU_TCLAN_COMP_BOB(3), + GLTDPU_TCLAN_COMP_BOB(4), + GLTDPU_TCLAN_COMP_BOB(5), + GLTDPU_TCLAN_COMP_BOB(6), + GLTDPU_TCLAN_COMP_BOB(7), + GLTDPU_TCLAN_COMP_BOB(8), +#define GLTDPU_TCB_CMD_BOB(_i) (0x0004975C + ((_i) * 4)) + GLTDPU_TCB_CMD_BOB(1), + GLTDPU_TCB_CMD_BOB(2), + GLTDPU_TCB_CMD_BOB(3), + GLTDPU_TCB_CMD_BOB(4), + GLTDPU_TCB_CMD_BOB(5), + GLTDPU_TCB_CMD_BOB(6), + GLTDPU_TCB_CMD_BOB(7), + GLTDPU_TCB_CMD_BOB(8), +#define GLTDPU_PSM_UPDATE_BOB(_i) (0x00049B5C + ((_i) * 4)) + GLTDPU_PSM_UPDATE_BOB(1), + GLTDPU_PSM_UPDATE_BOB(2), + GLTDPU_PSM_UPDATE_BOB(3), + GLTDPU_PSM_UPDATE_BOB(4), + GLTDPU_PSM_UPDATE_BOB(5), + GLTDPU_PSM_UPDATE_BOB(6), + GLTDPU_PSM_UPDATE_BOB(7), + GLTDPU_PSM_UPDATE_BOB(8), +#define GLTCB_CMD_IN_BOB(_i) (0x000AE288 + ((_i) * 4)) + GLTCB_CMD_IN_BOB(1), + GLTCB_CMD_IN_BOB(2), + GLTCB_CMD_IN_BOB(3), + GLTCB_CMD_IN_BOB(4), + GLTCB_CMD_IN_BOB(5), + GLTCB_CMD_IN_BOB(6), + GLTCB_CMD_IN_BOB(7), + GLTCB_CMD_IN_BOB(8), +#define GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(_i) (0x000FC148 + ((_i) * 4)) + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(1), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(2), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(3), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(4), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(5), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(6), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(7), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(8), +#define GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(_i) (0x000FC248 + ((_i) * 4)) + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(1), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(2), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(3), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(4), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(5), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(6), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(7), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(8), +#define GLLAN_TCLAN_CACHE_CTL_BOB_CTL(_i) (0x000FC1C8 + ((_i) * 4)) + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(1), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(2), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(3), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(4), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(5), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(6), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(7), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(8), +#define GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(_i) (0x000FC188 + ((_i) * 4)) + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(1), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(2), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(3), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(4), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(5), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(6), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(7), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(8), +#define GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(_i) (0x000FC288 + ((_i) * 4)) + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(1), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(2), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(3), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(4), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(5), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(6), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(7), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(8), +#define PRTDCB_TCUPM_REG_CM(_i) (0x000BC360 + ((_i) * 4)) + PRTDCB_TCUPM_REG_CM(0), + PRTDCB_TCUPM_REG_CM(1), + PRTDCB_TCUPM_REG_CM(2), + PRTDCB_TCUPM_REG_CM(3), +#define PRTDCB_TCUPM_REG_DM(_i) (0x000BC3A0 + ((_i) * 4)) + PRTDCB_TCUPM_REG_DM(0), + PRTDCB_TCUPM_REG_DM(1), + PRTDCB_TCUPM_REG_DM(2), + PRTDCB_TCUPM_REG_DM(3), +#define PRTDCB_TLPM_REG_DM(_i) (0x000A0000 + ((_i) * 4)) + PRTDCB_TLPM_REG_DM(0), + PRTDCB_TLPM_REG_DM(1), + PRTDCB_TLPM_REG_DM(2), + PRTDCB_TLPM_REG_DM(3), }; struct ice_priv_flag { @@ -1375,9 +1544,9 @@ __ice_get_ethtool_stats(struct net_device *netdev, ice_for_each_alloc_txq(vsi, j) { tx_ring = READ_ONCE(vsi->tx_rings[j]); - if (tx_ring) { - data[i++] = tx_ring->stats.pkts; - data[i++] = tx_ring->stats.bytes; + if (tx_ring && tx_ring->ring_stats) { + data[i++] = tx_ring->ring_stats->stats.pkts; + data[i++] = tx_ring->ring_stats->stats.bytes; } else { data[i++] = 0; data[i++] = 0; @@ -1386,9 +1555,9 @@ __ice_get_ethtool_stats(struct net_device *netdev, ice_for_each_alloc_rxq(vsi, j) { rx_ring = READ_ONCE(vsi->rx_rings[j]); - if (rx_ring) { - data[i++] = rx_ring->stats.pkts; - data[i++] = rx_ring->stats.bytes; + if (rx_ring && rx_ring->ring_stats) { + data[i++] = rx_ring->ring_stats->stats.pkts; + data[i++] = rx_ring->ring_stats->stats.bytes; } else { data[i++] = 0; data[i++] = 0; diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index d16738a3d3a7..a92dc9a16035 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -110,6 +110,9 @@ #define PRTDCB_TUP2TC 0x001D26C0 #define GL_PREEXT_L2_PMASK0(_i) (0x0020F0FC + ((_i) * 4)) #define GL_PREEXT_L2_PMASK1(_i) (0x0020F108 + ((_i) * 4)) +#define GLFLXP_RXDID_FLAGS(_i, _j) (0x0045D000 + ((_i) * 4 + (_j) * 256)) +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S 0 +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M ICE_M(0x3F, 0) #define GLFLXP_RXDID_FLX_WRD_0(_i) (0x0045c800 + ((_i) * 4)) #define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S 0 #define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M ICE_M(0xFF, 0) diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index b3baf7c3f910..89f986a75cc8 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -908,17 +908,5 @@ static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype) return ice_ptype_lkup[ptype]; } -#define ICE_LINK_SPEED_UNKNOWN 0 -#define ICE_LINK_SPEED_10MBPS 10 -#define ICE_LINK_SPEED_100MBPS 100 -#define ICE_LINK_SPEED_1000MBPS 1000 -#define ICE_LINK_SPEED_2500MBPS 2500 -#define ICE_LINK_SPEED_5000MBPS 5000 -#define ICE_LINK_SPEED_10000MBPS 10000 -#define ICE_LINK_SPEED_20000MBPS 20000 -#define ICE_LINK_SPEED_25000MBPS 25000 -#define ICE_LINK_SPEED_40000MBPS 40000 -#define ICE_LINK_SPEED_50000MBPS 50000 -#define ICE_LINK_SPEED_100000MBPS 100000 #endif /* _ICE_LAN_TX_RX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 7276badfa19e..94aa834cd9a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -448,6 +448,49 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d } /** + * ice_vsi_alloc_stat_arrays - Allocate statistics arrays + * @vsi: VSI pointer + */ +static int ice_vsi_alloc_stat_arrays(struct ice_vsi *vsi) +{ + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf = vsi->back; + + if (vsi->type == ICE_VSI_CHNL) + return 0; + if (!pf->vsi_stats) + return -ENOENT; + + vsi_stat = kzalloc(sizeof(*vsi_stat), GFP_KERNEL); + if (!vsi_stat) + return -ENOMEM; + + vsi_stat->tx_ring_stats = + kcalloc(vsi->alloc_txq, sizeof(*vsi_stat->tx_ring_stats), + GFP_KERNEL); + if (!vsi_stat->tx_ring_stats) + goto err_alloc_tx; + + vsi_stat->rx_ring_stats = + kcalloc(vsi->alloc_rxq, sizeof(*vsi_stat->rx_ring_stats), + GFP_KERNEL); + if (!vsi_stat->rx_ring_stats) + goto err_alloc_rx; + + pf->vsi_stats[vsi->idx] = vsi_stat; + + return 0; + +err_alloc_rx: + kfree(vsi_stat->rx_ring_stats); +err_alloc_tx: + kfree(vsi_stat->tx_ring_stats); + kfree(vsi_stat); + pf->vsi_stats[vsi->idx] = NULL; + return -ENOMEM; +} + +/** * ice_vsi_alloc - Allocates the next available struct VSI in the PF * @pf: board private structure * @vsi_type: type of VSI @@ -560,6 +603,11 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, if (vsi->type == ICE_VSI_CTRL && vf) vf->ctrl_vsi_idx = vsi->idx; + + /* allocate memory for Tx/Rx ring stat pointers */ + if (ice_vsi_alloc_stat_arrays(vsi)) + goto err_rings; + goto unlock_pf; err_rings: @@ -1536,6 +1584,106 @@ err_out: } /** + * ice_vsi_free_stats - Free the ring statistics structures + * @vsi: VSI pointer + */ +static void ice_vsi_free_stats(struct ice_vsi *vsi) +{ + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf = vsi->back; + int i; + + if (vsi->type == ICE_VSI_CHNL) + return; + if (!pf->vsi_stats) + return; + + vsi_stat = pf->vsi_stats[vsi->idx]; + if (!vsi_stat) + return; + + ice_for_each_alloc_txq(vsi, i) { + if (vsi_stat->tx_ring_stats[i]) { + kfree_rcu(vsi_stat->tx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL); + } + } + + ice_for_each_alloc_rxq(vsi, i) { + if (vsi_stat->rx_ring_stats[i]) { + kfree_rcu(vsi_stat->rx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL); + } + } + + kfree(vsi_stat->tx_ring_stats); + kfree(vsi_stat->rx_ring_stats); + kfree(vsi_stat); + pf->vsi_stats[vsi->idx] = NULL; +} + +/** + * ice_vsi_alloc_ring_stats - Allocates Tx and Rx ring stats for the VSI + * @vsi: VSI which is having stats allocated + */ +static int ice_vsi_alloc_ring_stats(struct ice_vsi *vsi) +{ + struct ice_ring_stats **tx_ring_stats; + struct ice_ring_stats **rx_ring_stats; + struct ice_vsi_stats *vsi_stats; + struct ice_pf *pf = vsi->back; + u16 i; + + vsi_stats = pf->vsi_stats[vsi->idx]; + tx_ring_stats = vsi_stats->tx_ring_stats; + rx_ring_stats = vsi_stats->rx_ring_stats; + + /* Allocate Tx ring stats */ + ice_for_each_alloc_txq(vsi, i) { + struct ice_ring_stats *ring_stats; + struct ice_tx_ring *ring; + + ring = vsi->tx_rings[i]; + ring_stats = tx_ring_stats[i]; + + if (!ring_stats) { + ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL); + if (!ring_stats) + goto err_out; + + WRITE_ONCE(tx_ring_stats[i], ring_stats); + } + + ring->ring_stats = ring_stats; + } + + /* Allocate Rx ring stats */ + ice_for_each_alloc_rxq(vsi, i) { + struct ice_ring_stats *ring_stats; + struct ice_rx_ring *ring; + + ring = vsi->rx_rings[i]; + ring_stats = rx_ring_stats[i]; + + if (!ring_stats) { + ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL); + if (!ring_stats) + goto err_out; + + WRITE_ONCE(rx_ring_stats[i], ring_stats); + } + + ring->ring_stats = ring_stats; + } + + return 0; + +err_out: + ice_vsi_free_stats(vsi); + return -ENOMEM; +} + +/** * ice_vsi_manage_rss_lut - disable/enable RSS * @vsi: the VSI being changed * @ena: boolean value indicating if this is an enable or disable request @@ -1795,11 +1943,15 @@ void ice_update_eth_stats(struct ice_vsi *vsi) { struct ice_eth_stats *prev_es, *cur_es; struct ice_hw *hw = &vsi->back->hw; + struct ice_pf *pf = vsi->back; u16 vsi_num = vsi->vsi_num; /* HW absolute index of a VSI */ prev_es = &vsi->eth_stats_prev; cur_es = &vsi->eth_stats; + if (ice_is_reset_in_progress(pf->state)) + vsi->stat_offsets_loaded = false; + ice_stat_update40(hw, GLV_GORCL(vsi_num), vsi->stat_offsets_loaded, &prev_es->rx_bytes, &cur_es->rx_bytes); @@ -2576,6 +2728,10 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (ret) goto unroll_vector_base; + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto unroll_vector_base; + ice_vsi_map_rings_to_vectors(vsi); /* ICE_VSI_CTRL does not need RSS so skip RSS processing */ @@ -2614,6 +2770,9 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (ret) goto unroll_vector_base; + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto unroll_vector_base; /* Do not exit if configuring RSS had an issue, at least * receive traffic on first queue. Hence no need to capture * return value @@ -2627,6 +2786,11 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, ret = ice_vsi_alloc_rings(vsi); if (ret) goto unroll_vsi_init; + + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto unroll_vector_base; + break; default: /* clean up the resources and exit */ @@ -2686,6 +2850,7 @@ unroll_vector_base: unroll_alloc_q_vector: ice_vsi_free_q_vectors(vsi); unroll_vsi_init: + ice_vsi_free_stats(vsi); ice_vsi_delete(vsi); unroll_get_qs: ice_vsi_put_qs(vsi); @@ -3077,7 +3242,7 @@ int ice_vsi_release(struct ice_vsi *vsi) vsi->agg_node && vsi->agg_node->valid) vsi->agg_node->num_vsis--; ice_vsi_clear_rings(vsi); - + ice_vsi_free_stats(vsi); ice_vsi_put_qs(vsi); /* retain SW VSI data structure since it is needed to unregister and @@ -3205,6 +3370,47 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, } /** + * ice_vsi_realloc_stat_arrays - Frees unused stat structures + * @vsi: VSI pointer + * @prev_txq: Number of Tx rings before ring reallocation + * @prev_rxq: Number of Rx rings before ring reallocation + */ +static int +ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq) +{ + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf = vsi->back; + int i; + + if (!prev_txq || !prev_rxq) + return 0; + if (vsi->type == ICE_VSI_CHNL) + return 0; + + vsi_stat = pf->vsi_stats[vsi->idx]; + + if (vsi->num_txq < prev_txq) { + for (i = vsi->num_txq; i < prev_txq; i++) { + if (vsi_stat->tx_ring_stats[i]) { + kfree_rcu(vsi_stat->tx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL); + } + } + } + + if (vsi->num_rxq < prev_rxq) { + for (i = vsi->num_rxq; i < prev_rxq; i++) { + if (vsi_stat->rx_ring_stats[i]) { + kfree_rcu(vsi_stat->rx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL); + } + } + } + + return 0; +} + +/** * ice_vsi_rebuild - Rebuild VSI after reset * @vsi: VSI to be rebuild * @init_vsi: is this an initialization or a reconfigure of the VSI @@ -3215,10 +3421,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_coalesce_stored *coalesce; + int ret, i, prev_txq, prev_rxq; int prev_num_q_vectors = 0; enum ice_vsi_type vtype; struct ice_pf *pf; - int ret, i; if (!vsi) return -EINVAL; @@ -3237,6 +3443,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); + prev_txq = vsi->num_txq; + prev_rxq = vsi->num_rxq; + ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ret = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx); if (ret) @@ -3303,7 +3512,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) if (ret) goto err_vectors; + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto err_vectors; + ice_vsi_map_rings_to_vectors(vsi); + + vsi->stat_offsets_loaded = false; if (ice_is_xdp_ena_vsi(vsi)) { ret = ice_vsi_determine_xdp_res(vsi); if (ret) @@ -3340,6 +3555,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) if (ret) goto err_vectors; + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto err_vectors; + + vsi->stat_offsets_loaded = false; break; case ICE_VSI_CHNL: if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { @@ -3387,6 +3607,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) return ice_schedule_reset(pf, ICE_RESET_PFR); } } + + if (ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq)) + goto err_vectors; + ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors); kfree(coalesce); @@ -3728,9 +3952,9 @@ static void ice_update_ring_stats(struct ice_q_stats *stats, u64 pkts, u64 bytes */ void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) { - u64_stats_update_begin(&tx_ring->syncp); - ice_update_ring_stats(&tx_ring->stats, pkts, bytes); - u64_stats_update_end(&tx_ring->syncp); + u64_stats_update_begin(&tx_ring->ring_stats->syncp); + ice_update_ring_stats(&tx_ring->ring_stats->stats, pkts, bytes); + u64_stats_update_end(&tx_ring->ring_stats->syncp); } /** @@ -3741,9 +3965,9 @@ void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) */ void ice_update_rx_ring_stats(struct ice_rx_ring *rx_ring, u64 pkts, u64 bytes) { - u64_stats_update_begin(&rx_ring->syncp); - ice_update_ring_stats(&rx_ring->stats, pkts, bytes); - u64_stats_update_end(&rx_ring->syncp); + u64_stats_update_begin(&rx_ring->ring_stats->syncp); + ice_update_ring_stats(&rx_ring->ring_stats->stats, pkts, bytes); + u64_stats_update_end(&rx_ring->ring_stats->syncp); } /** @@ -3850,33 +4074,11 @@ int ice_clear_dflt_vsi(struct ice_vsi *vsi) */ int ice_get_link_speed_mbps(struct ice_vsi *vsi) { - switch (vsi->port_info->phy.link_info.link_speed) { - case ICE_AQ_LINK_SPEED_100GB: - return SPEED_100000; - case ICE_AQ_LINK_SPEED_50GB: - return SPEED_50000; - case ICE_AQ_LINK_SPEED_40GB: - return SPEED_40000; - case ICE_AQ_LINK_SPEED_25GB: - return SPEED_25000; - case ICE_AQ_LINK_SPEED_20GB: - return SPEED_20000; - case ICE_AQ_LINK_SPEED_10GB: - return SPEED_10000; - case ICE_AQ_LINK_SPEED_5GB: - return SPEED_5000; - case ICE_AQ_LINK_SPEED_2500MB: - return SPEED_2500; - case ICE_AQ_LINK_SPEED_1000MB: - return SPEED_1000; - case ICE_AQ_LINK_SPEED_100MB: - return SPEED_100; - case ICE_AQ_LINK_SPEED_10MB: - return SPEED_10; - case ICE_AQ_LINK_SPEED_UNKNOWN: - default: - return 0; - } + unsigned int link_speed; + + link_speed = vsi->port_info->phy.link_info.link_speed; + + return (int)ice_get_link_speed(fls(link_speed) - 1); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ca2898467dcb..a9a7f8b52140 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -130,12 +130,17 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) ice_for_each_txq(vsi, i) { struct ice_tx_ring *tx_ring = vsi->tx_rings[i]; + struct ice_ring_stats *ring_stats; if (!tx_ring) continue; if (ice_ring_ch_enabled(tx_ring)) continue; + ring_stats = tx_ring->ring_stats; + if (!ring_stats) + continue; + if (tx_ring->desc) { /* If packet counter has not changed the queue is * likely stalled, so force an interrupt for this @@ -144,8 +149,8 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) * prev_pkt would be negative if there was no * pending work. */ - packets = tx_ring->stats.pkts & INT_MAX; - if (tx_ring->tx_stats.prev_pkt == packets) { + packets = ring_stats->stats.pkts & INT_MAX; + if (ring_stats->tx_stats.prev_pkt == packets) { /* Trigger sw interrupt to revive the queue */ ice_trigger_sw_intr(hw, tx_ring->q_vector); continue; @@ -155,7 +160,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) * to ice_get_tx_pending() */ smp_rmb(); - tx_ring->tx_stats.prev_pkt = + ring_stats->tx_stats.prev_pkt = ice_get_tx_pending(tx_ring) ? packets : -1; } } @@ -299,20 +304,6 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m) } /** - * ice_get_devlink_port - Get devlink port from netdev - * @netdev: the netdevice structure - */ -static struct devlink_port *ice_get_devlink_port(struct net_device *netdev) -{ - struct ice_pf *pf = ice_netdev_to_pf(netdev); - - if (!ice_is_switchdev_running(pf)) - return NULL; - - return &pf->devlink_port; -} - -/** * ice_vsi_sync_fltr - Update the VSI filter list to the HW * @vsi: ptr to the VSI * @@ -1120,8 +1111,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, if (link_up == old_link && link_speed == old_link_speed) return 0; - if (!ice_is_e810(&pf->hw)) - ice_ptp_link_change(pf, pf->hw.pf_id, link_up); + ice_ptp_link_change(pf, pf->hw.pf_id, link_up); if (ice_is_dcb_active(pf)) { if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) @@ -2560,13 +2550,20 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) ice_for_each_xdp_txq(vsi, i) { u16 xdp_q_idx = vsi->alloc_txq + i; + struct ice_ring_stats *ring_stats; struct ice_tx_ring *xdp_ring; xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL); - if (!xdp_ring) goto free_xdp_rings; + ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL); + if (!ring_stats) { + ice_free_tx_ring(xdp_ring); + goto free_xdp_rings; + } + + xdp_ring->ring_stats = ring_stats; xdp_ring->q_index = xdp_q_idx; xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; xdp_ring->vsi = vsi; @@ -2589,9 +2586,13 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) return 0; free_xdp_rings: - for (; i >= 0; i--) - if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) + for (; i >= 0; i--) { + if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) { + kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu); + vsi->xdp_rings[i]->ring_stats = NULL; ice_free_tx_ring(vsi->xdp_rings[i]); + } + } return -ENOMEM; } @@ -2792,6 +2793,8 @@ free_qmap: synchronize_rcu(); ice_free_tx_ring(vsi->xdp_rings[i]); } + kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu); + vsi->xdp_rings[i]->ring_stats = NULL; kfree_rcu(vsi->xdp_rings[i], rcu); vsi->xdp_rings[i] = NULL; } @@ -4603,6 +4606,7 @@ static int ice_register_netdev(struct ice_pf *pf) if (err) goto err_devlink_create; + SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port); err = register_netdev(vsi->netdev); if (err) goto err_register_netdev; @@ -4611,8 +4615,6 @@ static int ice_register_netdev(struct ice_pf *pf) netif_carrier_off(vsi->netdev); netif_tx_stop_all_queues(vsi->netdev); - devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev); - return 0; err_register_netdev: ice_devlink_destroy_pf_port(pf); @@ -4771,11 +4773,18 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_init_pf_unroll; } + pf->vsi_stats = devm_kcalloc(dev, pf->num_alloc_vsi, + sizeof(*pf->vsi_stats), GFP_KERNEL); + if (!pf->vsi_stats) { + err = -ENOMEM; + goto err_init_vsi_unroll; + } + err = ice_init_interrupt_scheme(pf); if (err) { dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err); err = -EIO; - goto err_init_vsi_unroll; + goto err_init_vsi_stats_unroll; } /* In case of MSIX we are going to setup the misc vector right here @@ -4956,6 +4965,9 @@ err_msix_misc_unroll: ice_free_irq_msix_misc(pf); err_init_interrupt_unroll: ice_clear_interrupt_scheme(pf); +err_init_vsi_stats_unroll: + devm_kfree(dev, pf->vsi_stats); + pf->vsi_stats = NULL; err_init_vsi_unroll: devm_kfree(dev, pf->vsi); err_init_pf_unroll: @@ -5078,6 +5090,8 @@ static void ice_remove(struct pci_dev *pdev) continue; ice_vsi_free_q_vectors(pf->vsi[i]); } + devm_kfree(&pdev->dev, pf->vsi_stats); + pf->vsi_stats = NULL; ice_deinit_pf(pf); ice_devlink_destroy_regions(pf); ice_deinit_hw(&pf->hw); @@ -6325,8 +6339,7 @@ static int ice_up_complete(struct ice_vsi *vsi) ice_print_link_msg(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); - if (!ice_is_e810(&pf->hw)) - ice_ptp_link_change(pf, pf->hw.pf_id, true); + ice_ptp_link_change(pf, pf->hw.pf_id, true); } /* Perform an initial read of the statistics registers now to @@ -6370,10 +6383,10 @@ ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, unsigned int start; do { - start = u64_stats_fetch_begin_irq(syncp); + start = u64_stats_fetch_begin(syncp); *pkts = stats.pkts; *bytes = stats.bytes; - } while (u64_stats_fetch_retry_irq(syncp, start)); + } while (u64_stats_fetch_retry(syncp, start)); } /** @@ -6395,14 +6408,16 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, u64 pkts = 0, bytes = 0; ring = READ_ONCE(rings[i]); - if (!ring) + if (!ring || !ring->ring_stats) continue; - ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); + ice_fetch_u64_stats_per_ring(&ring->ring_stats->syncp, + ring->ring_stats->stats, &pkts, + &bytes); vsi_stats->tx_packets += pkts; vsi_stats->tx_bytes += bytes; - vsi->tx_restart += ring->tx_stats.restart_q; - vsi->tx_busy += ring->tx_stats.tx_busy; - vsi->tx_linearize += ring->tx_stats.tx_linearize; + vsi->tx_restart += ring->ring_stats->tx_stats.restart_q; + vsi->tx_busy += ring->ring_stats->tx_stats.tx_busy; + vsi->tx_linearize += ring->ring_stats->tx_stats.tx_linearize; } } @@ -6412,6 +6427,7 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, */ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) { + struct rtnl_link_stats64 *net_stats, *stats_prev; struct rtnl_link_stats64 *vsi_stats; u64 pkts, bytes; int i; @@ -6436,12 +6452,16 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) /* update Rx rings counters */ ice_for_each_rxq(vsi, i) { struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]); + struct ice_ring_stats *ring_stats; - ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); + ring_stats = ring->ring_stats; + ice_fetch_u64_stats_per_ring(&ring_stats->syncp, + ring_stats->stats, &pkts, + &bytes); vsi_stats->rx_packets += pkts; vsi_stats->rx_bytes += bytes; - vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; - vsi->rx_page_failed += ring->rx_stats.alloc_page_failed; + vsi->rx_buf_failed += ring_stats->rx_stats.alloc_buf_failed; + vsi->rx_page_failed += ring_stats->rx_stats.alloc_page_failed; } /* update XDP Tx rings counters */ @@ -6451,10 +6471,28 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) rcu_read_unlock(); - vsi->net_stats.tx_packets = vsi_stats->tx_packets; - vsi->net_stats.tx_bytes = vsi_stats->tx_bytes; - vsi->net_stats.rx_packets = vsi_stats->rx_packets; - vsi->net_stats.rx_bytes = vsi_stats->rx_bytes; + net_stats = &vsi->net_stats; + stats_prev = &vsi->net_stats_prev; + + /* clear prev counters after reset */ + if (vsi_stats->tx_packets < stats_prev->tx_packets || + vsi_stats->rx_packets < stats_prev->rx_packets) { + stats_prev->tx_packets = 0; + stats_prev->tx_bytes = 0; + stats_prev->rx_packets = 0; + stats_prev->rx_bytes = 0; + } + + /* update netdev counters */ + net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets; + net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes; + net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets; + net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes; + + stats_prev->tx_packets = vsi_stats->tx_packets; + stats_prev->tx_bytes = vsi_stats->tx_bytes; + stats_prev->rx_packets = vsi_stats->rx_packets; + stats_prev->rx_bytes = vsi_stats->rx_bytes; kfree(vsi_stats); } @@ -6516,6 +6554,9 @@ void ice_update_pf_stats(struct ice_pf *pf) prev_ps = &pf->stats_prev; cur_ps = &pf->stats; + if (ice_is_reset_in_progress(pf->state)) + pf->stat_prev_loaded = false; + ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded, &prev_ps->eth.rx_bytes, &cur_ps->eth.rx_bytes); @@ -6730,8 +6771,7 @@ int ice_down(struct ice_vsi *vsi) if (vsi->netdev && vsi->type == ICE_VSI_PF) { vlan_err = ice_vsi_del_vlan_zero(vsi); - if (!ice_is_e810(&vsi->back->hw)) - ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); + ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); } else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { @@ -8283,7 +8323,7 @@ static void ice_rem_all_chnl_fltrs(struct ice_pf *pf) rule.rid = fltr->rid; rule.rule_id = fltr->rule_id; - rule.vsi_handle = fltr->dest_id; + rule.vsi_handle = fltr->dest_vsi_handle; status = ice_rem_adv_rule_by_id(&pf->hw, &rule); if (status) { if (status == -ENOENT) @@ -8595,6 +8635,12 @@ static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data) switch (mode) { case TC_MQPRIO_MODE_CHANNEL: + if (pf->hw.port_info->is_custom_tx_enabled) { + dev_err(dev, "Custom Tx scheduler feature enabled, can't configure ADQ\n"); + return -EBUSY; + } + ice_tear_down_devlink_rate_tree(pf); + ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt); if (ret) { netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n", @@ -9108,5 +9154,4 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_bpf = ice_xdp, .ndo_xdp_xmit = ice_xdp_xmit, .ndo_xsk_wakeup = ice_xsk_wakeup, - .ndo_get_devlink_port = ice_get_devlink_port, }; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 0f668468d141..d63161d73eb1 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -600,6 +600,23 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) } /** + * ice_ptp_is_tx_tracker_up - Check if Tx tracker is ready for new timestamps + * @tx: the PTP Tx timestamp tracker to check + * + * Check that a given PTP Tx timestamp tracker is up, i.e. that it is ready + * to accept new timestamp requests. + * + * Assumes the tx->lock spinlock is already held. + */ +static bool +ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx) +{ + lockdep_assert_held(&tx->lock); + + return tx->init && !tx->calibrating; +} + +/** * ice_ptp_tx_tstamp - Process Tx timestamps for a port * @tx: the PTP Tx timestamp tracker * @@ -608,11 +625,13 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) * * If a given index has a valid timestamp, perform the following steps: * - * 1) copy the timestamp out of the PHY register - * 4) clear the timestamp valid bit in the PHY register - * 5) unlock the index by clearing the associated in_use bit. - * 2) extend the 40b timestamp value to get a 64bit timestamp - * 3) send that timestamp to the stack + * 1) check that the timestamp request is not stale + * 2) check that a timestamp is ready and available in the PHY memory bank + * 3) read and copy the timestamp out of the PHY register + * 4) unlock the index by clearing the associated in_use bit + * 5) check if the timestamp is stale, and discard if so + * 6) extend the 40 bit timestamp value to get a 64 bit timestamp value + * 7) send this 64 bit timestamp to the stack * * Returns true if all timestamps were handled, and false if any slots remain * without a timestamp. @@ -623,24 +642,45 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) * interrupt. In some cases hardware might not interrupt us again when the * timestamp is captured. * - * Note that we only take the tracking lock when clearing the bit and when - * checking if we need to re-queue this task. The only place where bits can be - * set is the hard xmit routine where an SKB has a request flag set. The only - * places where we clear bits are this work function, or the periodic cleanup - * thread. If the cleanup thread clears a bit we're processing we catch it - * when we lock to clear the bit and then grab the SKB pointer. If a Tx thread - * starts a new timestamp, we might not begin processing it right away but we - * will notice it at the end when we re-queue the task. If a Tx thread starts - * a new timestamp just after this function exits without re-queuing, - * the interrupt when the timestamp finishes should trigger. Avoiding holding - * the lock for the entire function is important in order to ensure that Tx - * threads do not get blocked while waiting for the lock. + * Note that we do not hold the tracking lock while reading the Tx timestamp. + * This is because reading the timestamp requires taking a mutex that might + * sleep. + * + * The only place where we set in_use is when a new timestamp is initiated + * with a slot index. This is only called in the hard xmit routine where an + * SKB has a request flag set. The only places where we clear this bit is this + * function, or during teardown when the Tx timestamp tracker is being + * removed. A timestamp index will never be re-used until the in_use bit for + * that index is cleared. + * + * If a Tx thread starts a new timestamp, we might not begin processing it + * right away but we will notice it at the end when we re-queue the task. + * + * If a Tx thread starts a new timestamp just after this function exits, the + * interrupt for that timestamp should re-trigger this function once + * a timestamp is ready. + * + * In cases where the PTP hardware clock was directly adjusted, some + * timestamps may not be able to safely use the timestamp extension math. In + * this case, software will set the stale bit for any outstanding Tx + * timestamps when the clock is adjusted. Then this function will discard + * those captured timestamps instead of sending them to the stack. + * + * If a Tx packet has been waiting for more than 2 seconds, it is not possible + * to correctly extend the timestamp using the cached PHC time. It is + * extremely unlikely that a packet will ever take this long to timestamp. If + * we detect a Tx timestamp request that has waited for this long we assume + * the packet will never be sent by hardware and discard it without reading + * the timestamp register. */ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) { struct ice_ptp_port *ptp_port; - bool ts_handled = true; + bool more_timestamps; struct ice_pf *pf; + struct ice_hw *hw; + u64 tstamp_ready; + int err; u8 idx; if (!tx->init) @@ -648,44 +688,86 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) ptp_port = container_of(tx, struct ice_ptp_port, tx); pf = ptp_port_to_pf(ptp_port); + hw = &pf->hw; + + /* Read the Tx ready status first */ + err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready); + if (err) + return false; for_each_set_bit(idx, tx->in_use, tx->len) { struct skb_shared_hwtstamps shhwtstamps = {}; - u8 phy_idx = idx + tx->quad_offset; - u64 raw_tstamp, tstamp; + u8 phy_idx = idx + tx->offset; + u64 raw_tstamp = 0, tstamp; + bool drop_ts = false; struct sk_buff *skb; - int err; + + /* Drop packets which have waited for more than 2 seconds */ + if (time_is_before_jiffies(tx->tstamps[idx].start + 2 * HZ)) { + drop_ts = true; + + /* Count the number of Tx timestamps that timed out */ + pf->ptp.tx_hwtstamp_timeouts++; + } + + /* Only read a timestamp from the PHY if its marked as ready + * by the tstamp_ready register. This avoids unnecessary + * reading of timestamps which are not yet valid. This is + * important as we must read all timestamps which are valid + * and only timestamps which are valid during each interrupt. + * If we do not, the hardware logic for generating a new + * interrupt can get stuck on some devices. + */ + if (!(tstamp_ready & BIT_ULL(phy_idx))) { + if (drop_ts) + goto skip_ts_read; + + continue; + } ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx); - err = ice_read_phy_tstamp(&pf->hw, tx->quad, phy_idx, - &raw_tstamp); + err = ice_read_phy_tstamp(hw, tx->block, phy_idx, &raw_tstamp); if (err) continue; ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx); - /* Check if the timestamp is invalid or stale */ - if (!(raw_tstamp & ICE_PTP_TS_VALID) || + /* For PHYs which don't implement a proper timestamp ready + * bitmap, verify that the timestamp value is different + * from the last cached timestamp. If it is not, skip this for + * now assuming it hasn't yet been captured by hardware. + */ + if (!drop_ts && tx->verify_cached && raw_tstamp == tx->tstamps[idx].cached_tstamp) continue; - /* The timestamp is valid, so we'll go ahead and clear this - * index and then send the timestamp up to the stack. - */ + /* Discard any timestamp value without the valid bit set */ + if (!(raw_tstamp & ICE_PTP_TS_VALID)) + drop_ts = true; + +skip_ts_read: spin_lock(&tx->lock); - tx->tstamps[idx].cached_tstamp = raw_tstamp; + if (tx->verify_cached && raw_tstamp) + tx->tstamps[idx].cached_tstamp = raw_tstamp; clear_bit(idx, tx->in_use); skb = tx->tstamps[idx].skb; tx->tstamps[idx].skb = NULL; + if (test_and_clear_bit(idx, tx->stale)) + drop_ts = true; spin_unlock(&tx->lock); - /* it's (unlikely but) possible we raced with the cleanup - * thread for discarding old timestamp requests. + /* It is unlikely but possible that the SKB will have been + * flushed at this point due to link change or teardown. */ if (!skb) continue; + if (drop_ts) { + dev_kfree_skb_any(skb); + continue; + } + /* Extend the timestamp using cached PHC time */ tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp); if (tstamp) { @@ -701,11 +783,10 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) * poll for remaining timestamps. */ spin_lock(&tx->lock); - if (!bitmap_empty(tx->in_use, tx->len)) - ts_handled = false; + more_timestamps = tx->init && !bitmap_empty(tx->in_use, tx->len); spin_unlock(&tx->lock); - return ts_handled; + return !more_timestamps; } /** @@ -713,26 +794,33 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) * @tx: Tx tracking structure to initialize * * Assumes that the length has already been initialized. Do not call directly, - * use the ice_ptp_init_tx_e822 or ice_ptp_init_tx_e810 instead. + * use the ice_ptp_init_tx_* instead. */ static int ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx) { - tx->tstamps = kcalloc(tx->len, sizeof(*tx->tstamps), GFP_KERNEL); - if (!tx->tstamps) - return -ENOMEM; + unsigned long *in_use, *stale; + struct ice_tx_tstamp *tstamps; + + tstamps = kcalloc(tx->len, sizeof(*tstamps), GFP_KERNEL); + in_use = bitmap_zalloc(tx->len, GFP_KERNEL); + stale = bitmap_zalloc(tx->len, GFP_KERNEL); + + if (!tstamps || !in_use || !stale) { + kfree(tstamps); + bitmap_free(in_use); + bitmap_free(stale); - tx->in_use = bitmap_zalloc(tx->len, GFP_KERNEL); - if (!tx->in_use) { - kfree(tx->tstamps); - tx->tstamps = NULL; return -ENOMEM; } - spin_lock_init(&tx->lock); - + tx->tstamps = tstamps; + tx->in_use = in_use; + tx->stale = stale; tx->init = 1; + spin_lock_init(&tx->lock); + return 0; } @@ -740,31 +828,71 @@ ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx) * ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker * @pf: Board private structure * @tx: the tracker to flush + * + * Called during teardown when a Tx tracker is being removed. */ static void ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) { + struct ice_hw *hw = &pf->hw; + u64 tstamp_ready; + int err; u8 idx; - for (idx = 0; idx < tx->len; idx++) { - u8 phy_idx = idx + tx->quad_offset; + err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready); + if (err) { + dev_dbg(ice_pf_to_dev(pf), "Failed to get the Tx tstamp ready bitmap for block %u, err %d\n", + tx->block, err); + + /* If we fail to read the Tx timestamp ready bitmap just + * skip clearing the PHY timestamps. + */ + tstamp_ready = 0; + } + + for_each_set_bit(idx, tx->in_use, tx->len) { + u8 phy_idx = idx + tx->offset; + struct sk_buff *skb; + + /* In case this timestamp is ready, we need to clear it. */ + if (!hw->reset_ongoing && (tstamp_ready & BIT_ULL(phy_idx))) + ice_clear_phy_tstamp(hw, tx->block, phy_idx); spin_lock(&tx->lock); - if (tx->tstamps[idx].skb) { - dev_kfree_skb_any(tx->tstamps[idx].skb); - tx->tstamps[idx].skb = NULL; - pf->ptp.tx_hwtstamp_flushed++; - } + skb = tx->tstamps[idx].skb; + tx->tstamps[idx].skb = NULL; clear_bit(idx, tx->in_use); + clear_bit(idx, tx->stale); spin_unlock(&tx->lock); - /* Clear any potential residual timestamp in the PHY block */ - if (!pf->hw.reset_ongoing) - ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx); + /* Count the number of Tx timestamps flushed */ + pf->ptp.tx_hwtstamp_flushed++; + + /* Free the SKB after we've cleared the bit */ + dev_kfree_skb_any(skb); } } /** + * ice_ptp_mark_tx_tracker_stale - Mark unfinished timestamps as stale + * @tx: the tracker to mark + * + * Mark currently outstanding Tx timestamps as stale. This prevents sending + * their timestamp value to the stack. This is required to prevent extending + * the 40bit hardware timestamp incorrectly. + * + * This should be called when the PTP clock is modified such as after a set + * time request. + */ +static void +ice_ptp_mark_tx_tracker_stale(struct ice_ptp_tx *tx) +{ + spin_lock(&tx->lock); + bitmap_or(tx->stale, tx->stale, tx->in_use, tx->len); + spin_unlock(&tx->lock); +} + +/** * ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker * @pf: Board private structure * @tx: Tx tracking structure to release @@ -774,7 +902,12 @@ ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) static void ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) { + spin_lock(&tx->lock); tx->init = 0; + spin_unlock(&tx->lock); + + /* wait for potentially outstanding interrupt to complete */ + synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); ice_ptp_flush_tx_tracker(pf, tx); @@ -784,6 +917,9 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) bitmap_free(tx->in_use); tx->in_use = NULL; + bitmap_free(tx->stale); + tx->stale = NULL; + tx->len = 0; } @@ -801,9 +937,10 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) static int ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) { - tx->quad = port / ICE_PORTS_PER_QUAD; - tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT; - tx->len = INDEX_PER_PORT; + tx->block = port / ICE_PORTS_PER_QUAD; + tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E822; + tx->len = INDEX_PER_PORT_E822; + tx->verify_cached = 0; return ice_ptp_alloc_tx_tracker(tx); } @@ -819,59 +956,19 @@ ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) static int ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) { - tx->quad = pf->hw.port_info->lport; - tx->quad_offset = 0; - tx->len = INDEX_PER_QUAD; + tx->block = pf->hw.port_info->lport; + tx->offset = 0; + tx->len = INDEX_PER_PORT_E810; + /* The E810 PHY does not provide a timestamp ready bitmap. Instead, + * verify new timestamps against cached copy of the last read + * timestamp. + */ + tx->verify_cached = 1; return ice_ptp_alloc_tx_tracker(tx); } /** - * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped - * @pf: pointer to the PF struct - * @tx: PTP Tx tracker to clean up - * - * Loop through the Tx timestamp requests and see if any of them have been - * waiting for a long time. Discard any SKBs that have been waiting for more - * than 2 seconds. This is long enough to be reasonably sure that the - * timestamp will never be captured. This might happen if the packet gets - * discarded before it reaches the PHY timestamping block. - */ -static void ice_ptp_tx_tstamp_cleanup(struct ice_pf *pf, struct ice_ptp_tx *tx) -{ - struct ice_hw *hw = &pf->hw; - u8 idx; - - if (!tx->init) - return; - - for_each_set_bit(idx, tx->in_use, tx->len) { - struct sk_buff *skb; - u64 raw_tstamp; - - /* Check if this SKB has been waiting for too long */ - if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ)) - continue; - - /* Read tstamp to be able to use this register again */ - ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset, - &raw_tstamp); - - spin_lock(&tx->lock); - skb = tx->tstamps[idx].skb; - tx->tstamps[idx].skb = NULL; - clear_bit(idx, tx->in_use); - spin_unlock(&tx->lock); - - /* Count the number of Tx timestamps which have timed out */ - pf->ptp.tx_hwtstamp_timeouts++; - - /* Free the SKB after we've cleared the bit */ - dev_kfree_skb_any(skb); - } -} - -/** * ice_ptp_update_cached_phctime - Update the cached PHC time values * @pf: Board specific private structure * @@ -941,20 +1038,13 @@ static int ice_ptp_update_cached_phctime(struct ice_pf *pf) * @pf: Board specific private structure * * This function must be called when the cached PHC time is no longer valid, - * such as after a time adjustment. It discards any outstanding Tx timestamps, - * and updates the cached PHC time for both the PF and Rx rings. If updating - * the PHC time cannot be done immediately, a warning message is logged and - * the work item is scheduled. - * - * These steps are required in order to ensure that we do not accidentally - * report a timestamp extended by the wrong PHC cached copy. Note that we - * do not directly update the cached timestamp here because it is possible - * this might produce an error when ICE_CFG_BUSY is set. If this occurred, we - * would have to try again. During that time window, timestamps might be - * requested and returned with an invalid extension. Thus, on failure to - * immediately update the cached PHC time we would need to zero the value - * anyways. For this reason, we just zero the value immediately and queue the - * update work item. + * such as after a time adjustment. It marks any currently outstanding Tx + * timestamps as stale and updates the cached PHC time for both the PF and Rx + * rings. + * + * If updating the PHC time cannot be done immediately, a warning message is + * logged and the work item is scheduled immediately to minimize the window + * with a wrong cached timestamp. */ static void ice_ptp_reset_cached_phctime(struct ice_pf *pf) { @@ -978,8 +1068,12 @@ static void ice_ptp_reset_cached_phctime(struct ice_pf *pf) msecs_to_jiffies(10)); } - /* Flush any outstanding Tx timestamps */ - ice_ptp_flush_tx_tracker(pf, &pf->ptp.port.tx); + /* Mark any outstanding timestamps as stale, since they might have + * been captured in hardware before the time update. This could lead + * to us extending them with the wrong cached value resulting in + * incorrect timestamp values. + */ + ice_ptp_mark_tx_tracker_stale(&pf->ptp.port.tx); } /** @@ -1060,19 +1154,6 @@ static u64 ice_base_incval(struct ice_pf *pf) } /** - * ice_ptp_reset_ts_memory_quad - Reset timestamp memory for one quad - * @pf: The PF private data structure - * @quad: The quad (0-4) - */ -static void ice_ptp_reset_ts_memory_quad(struct ice_pf *pf, int quad) -{ - struct ice_hw *hw = &pf->hw; - - ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M); - ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M); -} - -/** * ice_ptp_check_tx_fifo - Check whether Tx FIFO is in an OK state * @port: PTP port for which Tx FIFO is checked */ @@ -1124,7 +1205,7 @@ static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port) dev_dbg(ice_pf_to_dev(pf), "Port %d Tx FIFO still not empty; resetting quad %d\n", port->port_num, quad); - ice_ptp_reset_ts_memory_quad(pf, quad); + ice_ptp_reset_ts_memory_quad_e822(hw, quad); port->tx_fifo_busy_cnt = FIFO_OK; return 0; } @@ -1133,130 +1214,49 @@ static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port) } /** - * ice_ptp_check_tx_offset_valid - Check if the Tx PHY offset is valid - * @port: the PTP port to check - * - * Checks whether the Tx offset for the PHY associated with this port is - * valid. Returns 0 if the offset is valid, and a non-zero error code if it is - * not. - */ -static int ice_ptp_check_tx_offset_valid(struct ice_ptp_port *port) -{ - struct ice_pf *pf = ptp_port_to_pf(port); - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - u32 val; - int err; - - err = ice_ptp_check_tx_fifo(port); - if (err) - return err; - - err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_TX_OV_STATUS, - &val); - if (err) { - dev_err(dev, "Failed to read TX_OV_STATUS for port %d, err %d\n", - port->port_num, err); - return -EAGAIN; - } - - if (!(val & P_REG_TX_OV_STATUS_OV_M)) - return -EAGAIN; - - return 0; -} - -/** - * ice_ptp_check_rx_offset_valid - Check if the Rx PHY offset is valid - * @port: the PTP port to check - * - * Checks whether the Rx offset for the PHY associated with this port is - * valid. Returns 0 if the offset is valid, and a non-zero error code if it is - * not. - */ -static int ice_ptp_check_rx_offset_valid(struct ice_ptp_port *port) -{ - struct ice_pf *pf = ptp_port_to_pf(port); - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - int err; - u32 val; - - err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_RX_OV_STATUS, - &val); - if (err) { - dev_err(dev, "Failed to read RX_OV_STATUS for port %d, err %d\n", - port->port_num, err); - return err; - } - - if (!(val & P_REG_RX_OV_STATUS_OV_M)) - return -EAGAIN; - - return 0; -} - -/** - * ice_ptp_check_offset_valid - Check port offset valid bit - * @port: Port for which offset valid bit is checked - * - * Returns 0 if both Tx and Rx offset are valid, and -EAGAIN if one of the - * offset is not ready. - */ -static int ice_ptp_check_offset_valid(struct ice_ptp_port *port) -{ - int tx_err, rx_err; - - /* always check both Tx and Rx offset validity */ - tx_err = ice_ptp_check_tx_offset_valid(port); - rx_err = ice_ptp_check_rx_offset_valid(port); - - if (tx_err || rx_err) - return -EAGAIN; - - return 0; -} - -/** - * ice_ptp_wait_for_offset_valid - Check for valid Tx and Rx offsets + * ice_ptp_wait_for_offsets - Check for valid Tx and Rx offsets * @work: Pointer to the kthread_work structure for this task * - * Check whether both the Tx and Rx offsets are valid for enabling the vernier - * calibration. + * Check whether hardware has completed measuring the Tx and Rx offset values + * used to configure and enable vernier timestamp calibration. + * + * Once the offset in either direction is measured, configure the associated + * registers with the calibrated offset values and enable timestamping. The Tx + * and Rx directions are configured independently as soon as their associated + * offsets are known. * - * Once we have valid offsets from hardware, update the total Tx and Rx - * offsets, and exit bypass mode. This enables more precise timestamps using - * the extra data measured during the vernier calibration process. + * This function reschedules itself until both Tx and Rx calibration have + * completed. */ -static void ice_ptp_wait_for_offset_valid(struct kthread_work *work) +static void ice_ptp_wait_for_offsets(struct kthread_work *work) { struct ice_ptp_port *port; - int err; - struct device *dev; struct ice_pf *pf; struct ice_hw *hw; + int tx_err; + int rx_err; port = container_of(work, struct ice_ptp_port, ov_work.work); pf = ptp_port_to_pf(port); hw = &pf->hw; - dev = ice_pf_to_dev(pf); - if (ice_is_reset_in_progress(pf->state)) - return; - - if (ice_ptp_check_offset_valid(port)) { - /* Offsets not ready yet, try again later */ + if (ice_is_reset_in_progress(pf->state)) { + /* wait for device driver to complete reset */ kthread_queue_delayed_work(pf->ptp.kworker, &port->ov_work, msecs_to_jiffies(100)); return; } - /* Offsets are valid, so it is safe to exit bypass mode */ - err = ice_phy_exit_bypass_e822(hw, port->port_num); - if (err) { - dev_warn(dev, "Failed to exit bypass mode for PHY port %u, err %d\n", - port->port_num, err); + tx_err = ice_ptp_check_tx_fifo(port); + if (!tx_err) + tx_err = ice_phy_cfg_tx_offset_e822(hw, port->port_num); + rx_err = ice_phy_cfg_rx_offset_e822(hw, port->port_num); + if (tx_err || rx_err) { + /* Tx and/or Rx offset not yet configured, try again later */ + kthread_queue_delayed_work(pf->ptp.kworker, + &port->ov_work, + msecs_to_jiffies(100)); return; } } @@ -1317,16 +1317,20 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) kthread_cancel_delayed_work_sync(&ptp_port->ov_work); /* temporarily disable Tx timestamps while calibrating PHY offset */ + spin_lock(&ptp_port->tx.lock); ptp_port->tx.calibrating = true; + spin_unlock(&ptp_port->tx.lock); ptp_port->tx_fifo_busy_cnt = 0; - /* Start the PHY timer in bypass mode */ - err = ice_start_phy_timer_e822(hw, port, true); + /* Start the PHY timer in Vernier mode */ + err = ice_start_phy_timer_e822(hw, port); if (err) goto out_unlock; /* Enable Tx timestamps right away */ + spin_lock(&ptp_port->tx.lock); ptp_port->tx.calibrating = false; + spin_unlock(&ptp_port->tx.lock); kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work, 0); @@ -1341,45 +1345,33 @@ out_unlock: } /** - * ice_ptp_link_change - Set or clear port registers for timestamping + * ice_ptp_link_change - Reconfigure PTP after link status change * @pf: Board private structure * @port: Port for which the PHY start is set * @linkup: Link is up or down */ -int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) { struct ice_ptp_port *ptp_port; - if (!test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) - return 0; + if (!test_bit(ICE_FLAG_PTP, pf->flags)) + return; - if (port >= ICE_NUM_EXTERNAL_PORTS) - return -EINVAL; + if (WARN_ON_ONCE(port >= ICE_NUM_EXTERNAL_PORTS)) + return; ptp_port = &pf->ptp.port; - if (ptp_port->port_num != port) - return -EINVAL; + if (WARN_ON_ONCE(ptp_port->port_num != port)) + return; - /* Update cached link err for this port immediately */ + /* Update cached link status for this port immediately */ ptp_port->link_up = linkup; - if (!test_bit(ICE_FLAG_PTP, pf->flags)) - /* PTP is not setup */ - return -EAGAIN; - - return ice_ptp_port_phy_restart(ptp_port); -} - -/** - * ice_ptp_reset_ts_memory - Reset timestamp memory for all quads - * @pf: The PF private data structure - */ -static void ice_ptp_reset_ts_memory(struct ice_pf *pf) -{ - int quad; + /* E810 devices do not need to reconfigure the PHY */ + if (ice_is_e810(&pf->hw)) + return; - quad = pf->hw.port_info->lport / ICE_PORTS_PER_QUAD; - ice_ptp_reset_ts_memory_quad(pf, quad); + ice_ptp_port_phy_restart(ptp_port); } /** @@ -1397,7 +1389,7 @@ static int ice_ptp_tx_ena_intr(struct ice_pf *pf, bool ena, u32 threshold) int quad; u32 val; - ice_ptp_reset_ts_memory(pf); + ice_ptp_reset_ts_memory(hw); for (quad = 0; quad < ICE_MAX_QUAD; quad++) { err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG, @@ -1447,24 +1439,10 @@ static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm) { struct ice_pf *pf = ptp_info_to_pf(info); struct ice_hw *hw = &pf->hw; - u64 incval, diff; - int neg_adj = 0; + u64 incval; int err; - incval = ice_base_incval(pf); - - if (scaled_ppm < 0) { - neg_adj = 1; - scaled_ppm = -scaled_ppm; - } - - diff = mul_u64_u64_div_u64(incval, (u64)scaled_ppm, - 1000000ULL << 16); - if (neg_adj) - incval -= diff; - else - incval += diff; - + incval = adjust_by_scaled_ppm(ice_base_incval(pf), scaled_ppm); err = ice_ptp_write_incval_locked(hw, incval); if (err) { dev_err(ice_pf_to_dev(pf), "PTP failed to set incval, err %d\n", @@ -2346,11 +2324,14 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) { u8 idx; - /* Check if this tracker is initialized */ - if (!tx->init || tx->calibrating) + spin_lock(&tx->lock); + + /* Check that this tracker is accepting new timestamp requests */ + if (!ice_ptp_is_tx_tracker_up(tx)) { + spin_unlock(&tx->lock); return -1; + } - spin_lock(&tx->lock); /* Find and set the first available index */ idx = find_first_zero_bit(tx->in_use, tx->len); if (idx < tx->len) { @@ -2359,6 +2340,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) * requests. */ set_bit(idx, tx->in_use); + clear_bit(idx, tx->stale); tx->tstamps[idx].start = jiffies; tx->tstamps[idx].skb = skb_get(skb); skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; @@ -2373,7 +2355,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) if (idx >= tx->len) return -1; else - return idx + tx->quad_offset; + return idx + tx->offset; } /** @@ -2398,8 +2380,6 @@ static void ice_ptp_periodic_work(struct kthread_work *work) err = ice_ptp_update_cached_phctime(pf); - ice_ptp_tx_tstamp_cleanup(pf, &pf->ptp.port.tx); - /* Run twice a second or reschedule if phc update failed */ kthread_queue_delayed_work(ptp->kworker, &ptp->work, msecs_to_jiffies(err ? 10 : 500)); @@ -2476,7 +2456,7 @@ pfr: err = ice_ptp_init_tx_e810(pf, &ptp->port.tx); } else { kthread_init_delayed_work(&ptp->port.ov_work, - ice_ptp_wait_for_offset_valid); + ice_ptp_wait_for_offsets); err = ice_ptp_init_tx_e822(pf, &ptp->port.tx, ptp->port.port_num); } @@ -2639,7 +2619,7 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) return ice_ptp_init_tx_e810(pf, &ptp_port->tx); kthread_init_delayed_work(&ptp_port->ov_work, - ice_ptp_wait_for_offset_valid); + ice_ptp_wait_for_offsets); return ice_ptp_init_tx_e822(pf, &ptp_port->tx, ptp_port->port_num); } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 028349295b71..9cda2f43e0e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -93,9 +93,14 @@ struct ice_perout_channel { * we discard old requests that were not fulfilled within a 2 second time * window. * Timestamp values in the PHY are read only and do not get cleared except at - * hardware reset or when a new timestamp value is captured. The cached_tstamp - * field is used to detect the case where a new timestamp has not yet been - * captured, ensuring that we avoid sending stale timestamp data to the stack. + * hardware reset or when a new timestamp value is captured. + * + * Some PHY types do not provide a "ready" bitmap indicating which timestamp + * indexes are valid. In these cases, we use a cached_tstamp to keep track of + * the last timestamp we read for a given index. If the current timestamp + * value is the same as the cached value, we assume a new timestamp hasn't + * been captured. This avoids reporting stale timestamps to the stack. This is + * only done if the verify_cached flag is set in ice_ptp_tx structure. */ struct ice_tx_tstamp { struct sk_buff *skb; @@ -105,30 +110,35 @@ struct ice_tx_tstamp { /** * struct ice_ptp_tx - Tracking structure for all Tx timestamp requests on a port - * @lock: lock to prevent concurrent write to in_use bitmap + * @lock: lock to prevent concurrent access to fields of this struct * @tstamps: array of len to store outstanding requests * @in_use: bitmap of len to indicate which slots are in use - * @quad: which quad the timestamps are captured in - * @quad_offset: offset into timestamp block of the quad to get the real index + * @stale: bitmap of len to indicate slots which have stale timestamps + * @block: which memory block (quad or port) the timestamps are captured in + * @offset: offset into timestamp block to get the real index * @len: length of the tstamps and in_use fields. * @init: if true, the tracker is initialized; * @calibrating: if true, the PHY is calibrating the Tx offset. During this * window, timestamps are temporarily disabled. + * @verify_cached: if true, verify new timestamp differs from last read value */ struct ice_ptp_tx { spinlock_t lock; /* lock protecting in_use bitmap */ struct ice_tx_tstamp *tstamps; unsigned long *in_use; - u8 quad; - u8 quad_offset; + unsigned long *stale; + u8 block; + u8 offset; u8 len; - u8 init; - u8 calibrating; + u8 init : 1; + u8 calibrating : 1; + u8 verify_cached : 1; }; /* Quad and port information for initializing timestamp blocks */ #define INDEX_PER_QUAD 64 -#define INDEX_PER_PORT (INDEX_PER_QUAD / ICE_PORTS_PER_QUAD) +#define INDEX_PER_PORT_E822 16 +#define INDEX_PER_PORT_E810 64 /** * struct ice_ptp_port - data used to initialize an external port for PTP @@ -256,7 +266,7 @@ void ice_ptp_reset(struct ice_pf *pf); void ice_ptp_prepare_for_reset(struct ice_pf *pf); void ice_ptp_init(struct ice_pf *pf); void ice_ptp_release(struct ice_pf *pf); -int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup); +void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup); #else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) { @@ -291,7 +301,8 @@ static inline void ice_ptp_reset(struct ice_pf *pf) { } static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { } static inline void ice_ptp_init(struct ice_pf *pf) { } static inline void ice_ptp_release(struct ice_pf *pf) { } -static inline int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) -{ return 0; } +static inline void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +{ +} #endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ #endif /* _ICE_PTP_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 772b1f566d6e..a38614d21ea8 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -656,6 +656,32 @@ ice_clear_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx) } /** + * ice_ptp_reset_ts_memory_quad_e822 - Clear all timestamps from the quad block + * @hw: pointer to the HW struct + * @quad: the quad to read from + * + * Clear all timestamps from the PHY quad block that is shared between the + * internal PHYs on the E822 devices. + */ +void ice_ptp_reset_ts_memory_quad_e822(struct ice_hw *hw, u8 quad) +{ + ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M); + ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M); +} + +/** + * ice_ptp_reset_ts_memory_e822 - Clear all timestamps from all quad blocks + * @hw: pointer to the HW struct + */ +static void ice_ptp_reset_ts_memory_e822(struct ice_hw *hw) +{ + unsigned int quad; + + for (quad = 0; quad < ICE_MAX_QUAD; quad++) + ice_ptp_reset_ts_memory_quad_e822(hw, quad); +} + +/** * ice_read_cgu_reg_e822 - Read a CGU register * @hw: pointer to the HW struct * @addr: Register address to read @@ -1715,21 +1741,48 @@ ice_calc_fixed_tx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) * adjust Tx timestamps by. This is calculated by combining some known static * latency along with the Vernier offset computations done by hardware. * - * This function must be called only after the offset registers are valid, - * i.e. after the Vernier calibration wait has passed, to ensure that the PHY - * has measured the offset. + * This function will not return successfully until the Tx offset calculations + * have been completed, which requires waiting until at least one packet has + * been transmitted by the device. It is safe to call this function + * periodically until calibration succeeds, as it will only program the offset + * once. * * To avoid overflow, when calculating the offset based on the known static * latency values, we use measurements in 1/100th of a nanosecond, and divide * the TUs per second up front. This avoids overflow while allowing * calculation of the adjustment using integer arithmetic. + * + * Returns zero on success, -EBUSY if the hardware vernier offset + * calibration has not completed, or another error code on failure. */ -static int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) +int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) { enum ice_ptp_link_spd link_spd; enum ice_ptp_fec_mode fec_mode; u64 total_offset, val; int err; + u32 reg; + + /* Nothing to do if we've already programmed the offset */ + err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OR, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OR for port %u, err %d\n", + port, err); + return err; + } + + if (reg) + return 0; + + err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n", + port, err); + return err; + } + + if (!(reg & P_REG_TX_OV_STATUS_OV_M)) + return -EBUSY; err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); if (err) @@ -1783,46 +1836,8 @@ static int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) if (err) return err; - return 0; -} - -/** - * ice_phy_cfg_fixed_tx_offset_e822 - Configure Tx offset for bypass mode - * @hw: pointer to the HW struct - * @port: the PHY port to configure - * - * Calculate and program the fixed Tx offset, and indicate that the offset is - * ready. This can be used when operating in bypass mode. - */ -static int -ice_phy_cfg_fixed_tx_offset_e822(struct ice_hw *hw, u8 port) -{ - enum ice_ptp_link_spd link_spd; - enum ice_ptp_fec_mode fec_mode; - u64 total_offset; - int err; - - err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); - if (err) - return err; - - total_offset = ice_calc_fixed_tx_offset_e822(hw, link_spd); - - /* Program the fixed Tx offset into the P_REG_TOTAL_TX_OFFSET_L - * register, then indicate that the Tx offset is ready. After this, - * timestamps will be enabled. - * - * Note that this skips including the more precise offsets generated - * by the Vernier calibration. - */ - err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_TX_OFFSET_L, - total_offset); - if (err) - return err; - - err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 1); - if (err) - return err; + dev_info(ice_hw_to_dev(hw), "Port=%d Tx vernier offset calibration complete\n", + port); return 0; } @@ -2026,6 +2041,11 @@ ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) * measurements taken in hardware with some data about known fixed delay as * well as adjusting for multi-lane alignment delay. * + * This function will not return successfully until the Rx offset calculations + * have been completed, which requires waiting until at least one packet has + * been received by the device. It is safe to call this function periodically + * until calibration succeeds, as it will only program the offset once. + * * This function must be called only after the offset registers are valid, * i.e. after the Vernier calibration wait has passed, to ensure that the PHY * has measured the offset. @@ -2034,13 +2054,38 @@ ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) * latency values, we use measurements in 1/100th of a nanosecond, and divide * the TUs per second up front. This avoids overflow while allowing * calculation of the adjustment using integer arithmetic. + * + * Returns zero on success, -EBUSY if the hardware vernier offset + * calibration has not completed, or another error code on failure. */ -static int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) +int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) { enum ice_ptp_link_spd link_spd; enum ice_ptp_fec_mode fec_mode; u64 total_offset, pmd, val; int err; + u32 reg; + + /* Nothing to do if we've already programmed the offset */ + err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OR, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OR for port %u, err %d\n", + port, err); + return err; + } + + if (reg) + return 0; + + err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n", + port, err); + return err; + } + + if (!(reg & P_REG_RX_OV_STATUS_OV_M)) + return -EBUSY; err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); if (err) @@ -2101,46 +2146,8 @@ static int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) if (err) return err; - return 0; -} - -/** - * ice_phy_cfg_fixed_rx_offset_e822 - Configure fixed Rx offset for bypass mode - * @hw: pointer to the HW struct - * @port: the PHY port to configure - * - * Calculate and program the fixed Rx offset, and indicate that the offset is - * ready. This can be used when operating in bypass mode. - */ -static int -ice_phy_cfg_fixed_rx_offset_e822(struct ice_hw *hw, u8 port) -{ - enum ice_ptp_link_spd link_spd; - enum ice_ptp_fec_mode fec_mode; - u64 total_offset; - int err; - - err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); - if (err) - return err; - - total_offset = ice_calc_fixed_rx_offset_e822(hw, link_spd); - - /* Program the fixed Rx offset into the P_REG_TOTAL_RX_OFFSET_L - * register, then indicate that the Rx offset is ready. After this, - * timestamps will be enabled. - * - * Note that this skips including the more precise offsets generated - * by Vernier calibration. - */ - err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_RX_OFFSET_L, - total_offset); - if (err) - return err; - - err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 1); - if (err) - return err; + dev_info(ice_hw_to_dev(hw), "Port=%d Rx vernier offset calibration complete\n", + port); return 0; } @@ -2323,20 +2330,14 @@ ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset) * ice_start_phy_timer_e822 - Start the PHY clock timer * @hw: pointer to the HW struct * @port: the PHY port to start - * @bypass: if true, start the PHY in bypass mode * * Start the clock of a PHY port. This must be done as part of the flow to * re-calibrate Tx and Rx timestamping offsets whenever the clock time is * initialized or when link speed changes. * - * Bypass mode enables timestamps immediately without waiting for Vernier - * calibration to complete. Hardware will still continue taking Vernier - * measurements on Tx or Rx of packets, but they will not be applied to - * timestamps. Use ice_phy_exit_bypass_e822 to exit bypass mode once hardware - * has completed offset calculation. + * Hardware will take Vernier measurements on Tx or Rx of packets. */ -int -ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass) +int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port) { u32 lo, hi, val; u64 incval; @@ -2414,110 +2415,42 @@ ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass) if (err) return err; - if (bypass) { - val |= P_REG_PS_BYPASS_MODE_M; - /* Enter BYPASS mode, enabling timestamps immediately. */ - err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); - if (err) - return err; - - /* Program the fixed Tx offset */ - err = ice_phy_cfg_fixed_tx_offset_e822(hw, port); - if (err) - return err; - - /* Program the fixed Rx offset */ - err = ice_phy_cfg_fixed_rx_offset_e822(hw, port); - if (err) - return err; - } - ice_debug(hw, ICE_DBG_PTP, "Enabled clock on PHY port %u\n", port); return 0; } /** - * ice_phy_exit_bypass_e822 - Exit bypass mode, after vernier calculations + * ice_get_phy_tx_tstamp_ready_e822 - Read Tx memory status register * @hw: pointer to the HW struct - * @port: the PHY port to configure - * - * After hardware finishes vernier calculations for the Tx and Rx offset, this - * function can be used to exit bypass mode by updating the total Tx and Rx - * offsets, and then disabling bypass. This will enable hardware to include - * the more precise offset calibrations, increasing precision of the generated - * timestamps. + * @quad: the timestamp quad to read from + * @tstamp_ready: contents of the Tx memory status register * - * This cannot be done until hardware has measured the offsets, which requires - * waiting until at least one packet has been sent and received by the device. + * Read the Q_REG_TX_MEMORY_STATUS register indicating which timestamps in + * the PHY are ready. A set bit means the corresponding timestamp is valid and + * ready to be captured from the PHY timestamp block. */ -int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port) +static int +ice_get_phy_tx_tstamp_ready_e822(struct ice_hw *hw, u8 quad, u64 *tstamp_ready) { + u32 hi, lo; int err; - u32 val; - - err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, &val); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n", - port, err); - return err; - } - - if (!(val & P_REG_TX_OV_STATUS_OV_M)) { - ice_debug(hw, ICE_DBG_PTP, "Tx offset is not yet valid for port %u\n", - port); - return -EBUSY; - } - - err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, &val); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n", - port, err); - return err; - } - - if (!(val & P_REG_TX_OV_STATUS_OV_M)) { - ice_debug(hw, ICE_DBG_PTP, "Rx offset is not yet valid for port %u\n", - port); - return -EBUSY; - } - err = ice_phy_cfg_tx_offset_e822(hw, port); + err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEMORY_STATUS_U, &hi); if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to program total Tx offset for port %u, err %d\n", - port, err); + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEMORY_STATUS_U for quad %u, err %d\n", + quad, err); return err; } - err = ice_phy_cfg_rx_offset_e822(hw, port); + err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEMORY_STATUS_L, &lo); if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to program total Rx offset for port %u, err %d\n", - port, err); + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEMORY_STATUS_L for quad %u, err %d\n", + quad, err); return err; } - /* Exit bypass mode now that the offset has been updated */ - err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read P_REG_PS for port %u, err %d\n", - port, err); - return err; - } - - if (!(val & P_REG_PS_BYPASS_MODE_M)) - ice_debug(hw, ICE_DBG_PTP, "Port %u not in bypass mode\n", - port); - - val &= ~P_REG_PS_BYPASS_MODE_M; - err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to disable bypass for port %u, err %d\n", - port, err); - return err; - } - - dev_info(ice_hw_to_dev(hw), "Exiting bypass mode on PHY port %u\n", - port); + *tstamp_ready = (u64)hi << 32 | (u64)lo; return 0; } @@ -2963,16 +2896,18 @@ bool ice_ptp_lock(struct ice_hw *hw) u32 hw_lock; int i; -#define MAX_TRIES 5 +#define MAX_TRIES 15 for (i = 0; i < MAX_TRIES; i++) { hw_lock = rd32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); hw_lock = hw_lock & PFTSYN_SEM_BUSY_M; - if (!hw_lock) - break; + if (hw_lock) { + /* Somebody is holding the lock */ + usleep_range(5000, 6000); + continue; + } - /* Somebody is holding the lock */ - usleep_range(10000, 20000); + break; } return !hw_lock; @@ -3194,6 +3129,22 @@ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) return ice_clear_phy_tstamp_e822(hw, block, idx); } +/** + * ice_get_phy_tx_tstamp_ready_e810 - Read Tx memory status register + * @hw: pointer to the HW struct + * @port: the PHY port to read + * @tstamp_ready: contents of the Tx memory status register + * + * E810 devices do not use a Tx memory status register. Instead simply + * indicate that all timestamps are currently ready. + */ +static int +ice_get_phy_tx_tstamp_ready_e810(struct ice_hw *hw, u8 port, u64 *tstamp_ready) +{ + *tstamp_ready = 0xFFFFFFFFFFFFFFFF; + return 0; +} + /* E810T SMA functions * * The following functions operate specifically on E810T hardware and are used @@ -3377,6 +3328,18 @@ bool ice_is_pca9575_present(struct ice_hw *hw) } /** + * ice_ptp_reset_ts_memory - Reset timestamp memory for all blocks + * @hw: pointer to the HW struct + */ +void ice_ptp_reset_ts_memory(struct ice_hw *hw) +{ + if (ice_is_e810(hw)) + return; + + ice_ptp_reset_ts_memory_e822(hw); +} + +/** * ice_ptp_init_phc - Initialize PTP hardware clock * @hw: pointer to the HW struct * @@ -3397,3 +3360,24 @@ int ice_ptp_init_phc(struct ice_hw *hw) else return ice_ptp_init_phc_e822(hw); } + +/** + * ice_get_phy_tx_tstamp_ready - Read PHY Tx memory status indication + * @hw: pointer to the HW struct + * @block: the timestamp block to check + * @tstamp_ready: storage for the PHY Tx memory status information + * + * Check the PHY for Tx timestamp memory status. This reports a 64 bit value + * which indicates which timestamps in the block may be captured. A set bit + * means the timestamp can be read. An unset bit means the timestamp is not + * ready and software should avoid reading the register. + */ +int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready) +{ + if (ice_is_e810(hw)) + return ice_get_phy_tx_tstamp_ready_e810(hw, block, + tstamp_ready); + else + return ice_get_phy_tx_tstamp_ready_e822(hw, block, + tstamp_ready); +} diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 2bda64c76abc..3b68cb91bd81 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -133,7 +133,9 @@ int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval); int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj); int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp); int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx); +void ice_ptp_reset_ts_memory(struct ice_hw *hw); int ice_ptp_init_phc(struct ice_hw *hw); +int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready); /* E822 family functions */ int ice_read_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 *val); @@ -141,6 +143,7 @@ int ice_write_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 val); int ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val); int ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val); int ice_ptp_prep_port_adj_e822(struct ice_hw *hw, u8 port, s64 time); +void ice_ptp_reset_ts_memory_quad_e822(struct ice_hw *hw, u8 quad); /** * ice_e822_time_ref - Get the current TIME_REF from capabilities @@ -184,8 +187,9 @@ static inline u64 ice_e822_pps_delay(enum ice_time_ref_freq time_ref) /* E822 Vernier calibration functions */ int ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset); -int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass); -int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port); +int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port); +int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port); +int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port); /* E810 family functions */ int ice_ptp_init_phy_e810(struct ice_hw *hw); diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index bd31748aae1b..fd1f8b0ad0ab 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -6,6 +6,7 @@ #include "ice_devlink.h" #include "ice_sriov.h" #include "ice_tc_lib.h" +#include "ice_dcb_lib.h" /** * ice_repr_get_sw_port_id - get port ID associated with representor @@ -134,14 +135,6 @@ static int ice_repr_stop(struct net_device *netdev) return 0; } -static struct devlink_port * -ice_repr_get_devlink_port(struct net_device *netdev) -{ - struct ice_repr *repr = ice_netdev_to_repr(netdev); - - return &repr->vf->devlink_port; -} - /** * ice_repr_sp_stats64 - get slow path stats for port representor * @dev: network interface device structure @@ -163,18 +156,20 @@ ice_repr_sp_stats64(const struct net_device *dev, u64 pkts, bytes; tx_ring = np->vsi->tx_rings[vf_id]; - ice_fetch_u64_stats_per_ring(&tx_ring->syncp, tx_ring->stats, + ice_fetch_u64_stats_per_ring(&tx_ring->ring_stats->syncp, + tx_ring->ring_stats->stats, &pkts, &bytes); stats->rx_packets = pkts; stats->rx_bytes = bytes; rx_ring = np->vsi->rx_rings[vf_id]; - ice_fetch_u64_stats_per_ring(&rx_ring->syncp, rx_ring->stats, + ice_fetch_u64_stats_per_ring(&rx_ring->ring_stats->syncp, + rx_ring->ring_stats->stats, &pkts, &bytes); stats->tx_packets = pkts; stats->tx_bytes = bytes; - stats->tx_dropped = rx_ring->rx_stats.alloc_page_failed + - rx_ring->rx_stats.alloc_buf_failed; + stats->tx_dropped = rx_ring->ring_stats->rx_stats.alloc_page_failed + + rx_ring->ring_stats->rx_stats.alloc_buf_failed; return 0; } @@ -250,7 +245,6 @@ static const struct net_device_ops ice_repr_netdev_ops = { .ndo_open = ice_repr_open, .ndo_stop = ice_repr_stop, .ndo_start_xmit = ice_eswitch_port_start_xmit, - .ndo_get_devlink_port = ice_repr_get_devlink_port, .ndo_setup_tc = ice_repr_setup_tc, .ndo_has_offload_stats = ice_repr_ndo_has_offload_stats, .ndo_get_offload_stats = ice_repr_ndo_get_offload_stats, @@ -339,12 +333,11 @@ static int ice_repr_add(struct ice_vf *vf) repr->netdev->max_mtu = ICE_MAX_MTU; SET_NETDEV_DEV(repr->netdev, ice_pf_to_dev(vf->pf)); + SET_NETDEV_DEVLINK_PORT(repr->netdev, &vf->devlink_port); err = ice_repr_reg_netdev(repr->netdev); if (err) goto err_netdev; - devlink_port_type_eth_set(&vf->devlink_port, repr->netdev); - ice_virtchnl_set_repr_ops(vf); return 0; @@ -399,6 +392,7 @@ static void ice_repr_rem(struct ice_vf *vf) */ void ice_repr_rem_from_all_vfs(struct ice_pf *pf) { + struct devlink *devlink; struct ice_vf *vf; unsigned int bkt; @@ -406,6 +400,14 @@ void ice_repr_rem_from_all_vfs(struct ice_pf *pf) ice_for_each_vf(pf, bkt, vf) ice_repr_rem(vf); + + /* since all port representors are destroyed, there is + * no point in keeping the nodes + */ + devlink = priv_to_devlink(pf); + devl_lock(devlink); + devl_rate_nodes_destroy(devlink); + devl_unlock(devlink); } /** @@ -414,6 +416,7 @@ void ice_repr_rem_from_all_vfs(struct ice_pf *pf) */ int ice_repr_add_for_all_vfs(struct ice_pf *pf) { + struct devlink *devlink; struct ice_vf *vf; unsigned int bkt; int err; @@ -426,6 +429,13 @@ int ice_repr_add_for_all_vfs(struct ice_pf *pf) goto err; } + /* only export if ADQ and DCB disabled */ + if (ice_is_adq_active(pf) || ice_is_dcb_active(pf)) + return 0; + + devlink = priv_to_devlink(pf); + ice_devlink_rate_init_tx_topology(devlink, ice_get_main_vsi(pf)); + return 0; err: diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 118595763bba..6d08b397df2a 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018, Intel Corporation. */ +#include <net/devlink.h> #include "ice_sched.h" /** @@ -142,12 +143,14 @@ ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req, * @pi: port information structure * @layer: Scheduler layer of the node * @info: Scheduler element information from firmware + * @prealloc_node: preallocated ice_sched_node struct for SW DB * * This function inserts a scheduler node to the SW DB. */ int ice_sched_add_node(struct ice_port_info *pi, u8 layer, - struct ice_aqc_txsched_elem_data *info) + struct ice_aqc_txsched_elem_data *info, + struct ice_sched_node *prealloc_node) { struct ice_aqc_txsched_elem_data elem; struct ice_sched_node *parent; @@ -176,7 +179,10 @@ ice_sched_add_node(struct ice_port_info *pi, u8 layer, if (status) return status; - node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL); + if (prealloc_node) + node = prealloc_node; + else + node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; if (hw->max_children[layer]) { @@ -355,6 +361,9 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) /* leaf nodes have no children */ if (node->children) devm_kfree(ice_hw_to_dev(hw), node->children); + + kfree(node->name); + xa_erase(&pi->sched_node_ids, node->id); devm_kfree(ice_hw_to_dev(hw), node); } @@ -872,13 +881,15 @@ void ice_sched_cleanup_all(struct ice_hw *hw) * @num_nodes: number of nodes * @num_nodes_added: pointer to num nodes added * @first_node_teid: if new nodes are added then return the TEID of first node + * @prealloc_nodes: preallocated nodes struct for software DB * * This function add nodes to HW as well as to SW DB for a given layer */ -static int +int ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, struct ice_sched_node *parent, u8 layer, u16 num_nodes, - u16 *num_nodes_added, u32 *first_node_teid) + u16 *num_nodes_added, u32 *first_node_teid, + struct ice_sched_node **prealloc_nodes) { struct ice_sched_node *prev, *new_node; struct ice_aqc_add_elem *buf; @@ -924,7 +935,11 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, *num_nodes_added = num_nodes; /* add nodes to the SW DB */ for (i = 0; i < num_nodes; i++) { - status = ice_sched_add_node(pi, layer, &buf->generic[i]); + if (prealloc_nodes) + status = ice_sched_add_node(pi, layer, &buf->generic[i], prealloc_nodes[i]); + else + status = ice_sched_add_node(pi, layer, &buf->generic[i], NULL); + if (status) { ice_debug(hw, ICE_DBG_SCHED, "add nodes in SW DB failed status =%d\n", status); @@ -940,6 +955,22 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, new_node->sibling = NULL; new_node->tc_num = tc_node->tc_num; + new_node->tx_weight = ICE_SCHED_DFLT_BW_WT; + new_node->tx_share = ICE_SCHED_DFLT_BW; + new_node->tx_max = ICE_SCHED_DFLT_BW; + new_node->name = kzalloc(SCHED_NODE_NAME_MAX_LEN, GFP_KERNEL); + if (!new_node->name) + return -ENOMEM; + + status = xa_alloc(&pi->sched_node_ids, &new_node->id, NULL, XA_LIMIT(0, UINT_MAX), + GFP_KERNEL); + if (status) { + ice_debug(hw, ICE_DBG_SCHED, "xa_alloc failed for sched node status =%d\n", + status); + break; + } + + snprintf(new_node->name, SCHED_NODE_NAME_MAX_LEN, "node_%u", new_node->id); /* add it to previous node sibling pointer */ /* Note: siblings are not linked across branches */ @@ -1003,7 +1034,7 @@ ice_sched_add_nodes_to_hw_layer(struct ice_port_info *pi, } return ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes, - num_nodes_added, first_node_teid); + num_nodes_added, first_node_teid, NULL); } /** @@ -1268,7 +1299,7 @@ int ice_sched_init_port(struct ice_port_info *pi) ICE_AQC_ELEM_TYPE_ENTRY_POINT) hw->sw_entry_point_layer = j; - status = ice_sched_add_node(pi, j, &buf[i].generic[j]); + status = ice_sched_add_node(pi, j, &buf[i].generic[j], NULL); if (status) goto err_init_port; } @@ -2154,7 +2185,7 @@ ice_sched_get_free_vsi_parent(struct ice_hw *hw, struct ice_sched_node *node, * This function removes the child from the old parent and adds it to a new * parent */ -static void +void ice_sched_update_parent(struct ice_sched_node *new_parent, struct ice_sched_node *node) { @@ -2188,7 +2219,7 @@ ice_sched_update_parent(struct ice_sched_node *new_parent, * * This function move the child nodes to a given parent. */ -static int +int ice_sched_move_nodes(struct ice_port_info *pi, struct ice_sched_node *parent, u16 num_items, u32 *list) { @@ -3560,7 +3591,7 @@ ice_sched_set_eir_srl_excl(struct ice_port_info *pi, * node's RL profile ID of type CIR, EIR, or SRL, and removes old profile * ID from local database. The caller needs to hold scheduler lock. */ -static int +int ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node, enum ice_rl_type rl_type, u32 bw, u8 layer_num) { @@ -3597,6 +3628,57 @@ ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node, } /** + * ice_sched_set_node_priority - set node's priority + * @pi: port information structure + * @node: tree node + * @priority: number 0-7 representing priority among siblings + * + * This function sets priority of a node among it's siblings. + */ +int +ice_sched_set_node_priority(struct ice_port_info *pi, struct ice_sched_node *node, + u16 priority) +{ + struct ice_aqc_txsched_elem_data buf; + struct ice_aqc_txsched_elem *data; + + buf = node->info; + data = &buf.data; + + data->valid_sections |= ICE_AQC_ELEM_VALID_GENERIC; + data->generic |= FIELD_PREP(ICE_AQC_ELEM_GENERIC_PRIO_M, priority); + + return ice_sched_update_elem(pi->hw, node, &buf); +} + +/** + * ice_sched_set_node_weight - set node's weight + * @pi: port information structure + * @node: tree node + * @weight: number 1-200 representing weight for WFQ + * + * This function sets weight of the node for WFQ algorithm. + */ +int +ice_sched_set_node_weight(struct ice_port_info *pi, struct ice_sched_node *node, u16 weight) +{ + struct ice_aqc_txsched_elem_data buf; + struct ice_aqc_txsched_elem *data; + + buf = node->info; + data = &buf.data; + + data->valid_sections = ICE_AQC_ELEM_VALID_CIR | ICE_AQC_ELEM_VALID_EIR | + ICE_AQC_ELEM_VALID_GENERIC; + data->cir_bw.bw_alloc = cpu_to_le16(weight); + data->eir_bw.bw_alloc = cpu_to_le16(weight); + + data->generic |= FIELD_PREP(ICE_AQC_ELEM_GENERIC_SP_M, 0x0); + + return ice_sched_update_elem(pi->hw, node, &buf); +} + +/** * ice_sched_set_node_bw_lmt - set node's BW limit * @pi: port information structure * @node: tree node @@ -3606,7 +3688,7 @@ ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node, * It updates node's BW limit parameters like BW RL profile ID of type CIR, * EIR, or SRL. The caller needs to hold scheduler lock. */ -static int +int ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node, enum ice_rl_type rl_type, u32 bw) { diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index 4f91577fed56..9c100747445a 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -6,6 +6,8 @@ #include "ice_common.h" +#define SCHED_NODE_NAME_MAX_LEN 32 + #define ICE_QGRP_LAYER_OFFSET 2 #define ICE_VSI_LAYER_OFFSET 4 #define ICE_AGG_LAYER_OFFSET 6 @@ -69,6 +71,29 @@ int ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req, struct ice_aqc_txsched_elem_data *buf, u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd); + +int +ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node, + enum ice_rl_type rl_type, u32 bw); + +int +ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node, + enum ice_rl_type rl_type, u32 bw, u8 layer_num); + +int +ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, + struct ice_sched_node *parent, u8 layer, u16 num_nodes, + u16 *num_nodes_added, u32 *first_node_teid, + struct ice_sched_node **prealloc_node); + +int +ice_sched_move_nodes(struct ice_port_info *pi, struct ice_sched_node *parent, + u16 num_items, u32 *list); + +int ice_sched_set_node_priority(struct ice_port_info *pi, struct ice_sched_node *node, + u16 priority); +int ice_sched_set_node_weight(struct ice_port_info *pi, struct ice_sched_node *node, u16 weight); + int ice_sched_init_port(struct ice_port_info *pi); int ice_sched_query_res_alloc(struct ice_hw *hw); void ice_sched_get_psm_clk_freq(struct ice_hw *hw); @@ -81,7 +106,11 @@ struct ice_sched_node * ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid); int ice_sched_add_node(struct ice_port_info *pi, u8 layer, - struct ice_aqc_txsched_elem_data *info); + struct ice_aqc_txsched_elem_data *info, + struct ice_sched_node *prealloc_node); +void +ice_sched_update_parent(struct ice_sched_node *new_parent, + struct ice_sched_node *node); void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node); struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc); struct ice_sched_node * diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index f68c555be4e9..faba0f857cd9 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -724,7 +724,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) */ fltr->rid = rule_added.rid; fltr->rule_id = rule_added.rule_id; - fltr->dest_id = rule_added.vsi_handle; + fltr->dest_vsi_handle = rule_added.vsi_handle; exit: kfree(list); @@ -732,6 +732,116 @@ exit: } /** + * ice_locate_vsi_using_queue - locate VSI using queue (forward to queue action) + * @vsi: Pointer to VSI + * @tc_fltr: Pointer to tc_flower_filter + * + * Locate the VSI using specified queue. When ADQ is not enabled, always + * return input VSI, otherwise locate corresponding VSI based on per channel + * offset and qcount + */ +static struct ice_vsi * +ice_locate_vsi_using_queue(struct ice_vsi *vsi, + struct ice_tc_flower_fltr *tc_fltr) +{ + int num_tc, tc, queue; + + /* if ADQ is not active, passed VSI is the candidate VSI */ + if (!ice_is_adq_active(vsi->back)) + return vsi; + + /* Locate the VSI (it could still be main PF VSI or CHNL_VSI depending + * upon queue number) + */ + num_tc = vsi->mqprio_qopt.qopt.num_tc; + queue = tc_fltr->action.fwd.q.queue; + + for (tc = 0; tc < num_tc; tc++) { + int qcount = vsi->mqprio_qopt.qopt.count[tc]; + int offset = vsi->mqprio_qopt.qopt.offset[tc]; + + if (queue >= offset && queue < offset + qcount) { + /* for non-ADQ TCs, passed VSI is the candidate VSI */ + if (tc < ICE_CHNL_START_TC) + return vsi; + else + return vsi->tc_map_vsi[tc]; + } + } + return NULL; +} + +static struct ice_rx_ring * +ice_locate_rx_ring_using_queue(struct ice_vsi *vsi, + struct ice_tc_flower_fltr *tc_fltr) +{ + u16 queue = tc_fltr->action.fwd.q.queue; + + return queue < vsi->num_rxq ? vsi->rx_rings[queue] : NULL; +} + +/** + * ice_tc_forward_action - Determine destination VSI and queue for the action + * @vsi: Pointer to VSI + * @tc_fltr: Pointer to TC flower filter structure + * + * Validates the tc forward action and determines the destination VSI and queue + * for the forward action. + */ +static struct ice_vsi * +ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr) +{ + struct ice_rx_ring *ring = NULL; + struct ice_vsi *ch_vsi = NULL; + struct ice_pf *pf = vsi->back; + struct device *dev; + u32 tc_class; + + dev = ice_pf_to_dev(pf); + + /* Get the destination VSI and/or destination queue and validate them */ + switch (tc_fltr->action.fltr_act) { + case ICE_FWD_TO_VSI: + tc_class = tc_fltr->action.fwd.tc.tc_class; + /* Select the destination VSI */ + if (tc_class < ICE_CHNL_START_TC) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, + "Unable to add filter because of unsupported destination"); + return ERR_PTR(-EOPNOTSUPP); + } + /* Locate ADQ VSI depending on hw_tc number */ + ch_vsi = vsi->tc_map_vsi[tc_class]; + break; + case ICE_FWD_TO_Q: + /* Locate the Rx queue */ + ring = ice_locate_rx_ring_using_queue(vsi, tc_fltr); + if (!ring) { + dev_err(dev, + "Unable to locate Rx queue for action fwd_to_queue: %u\n", + tc_fltr->action.fwd.q.queue); + return ERR_PTR(-EINVAL); + } + /* Determine destination VSI even though the action is + * FWD_TO_QUEUE, because QUEUE is associated with VSI + */ + ch_vsi = tc_fltr->dest_vsi; + break; + default: + dev_err(dev, + "Unable to add filter because of unsupported action %u (supported actions: fwd to tc, fwd to queue)\n", + tc_fltr->action.fltr_act); + return ERR_PTR(-EINVAL); + } + /* Must have valid ch_vsi (it could be main VSI or ADQ VSI) */ + if (!ch_vsi) { + dev_err(dev, + "Unable to add filter because specified destination VSI doesn't exist\n"); + return ERR_PTR(-EINVAL); + } + return ch_vsi; +} + +/** * ice_add_tc_flower_adv_fltr - add appropriate filter rules * @vsi: Pointer to VSI * @tc_fltr: Pointer to TC flower filter structure @@ -772,11 +882,10 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, return -EOPNOTSUPP; } - /* get the channel (aka ADQ VSI) */ - if (tc_fltr->dest_vsi) - ch_vsi = tc_fltr->dest_vsi; - else - ch_vsi = vsi->tc_map_vsi[tc_fltr->action.tc_class]; + /* validate forwarding action VSI and queue */ + ch_vsi = ice_tc_forward_action(vsi, tc_fltr); + if (IS_ERR(ch_vsi)) + return PTR_ERR(ch_vsi); lkups_cnt = ice_tc_count_lkups(flags, headers, tc_fltr); list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC); @@ -790,30 +899,40 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, } rule_info.sw_act.fltr_act = tc_fltr->action.fltr_act; - if (tc_fltr->action.tc_class >= ICE_CHNL_START_TC) { - if (!ch_vsi) { - NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because specified destination doesn't exist"); - ret = -EINVAL; - goto exit; - } + /* specify the cookie as filter_rule_id */ + rule_info.fltr_rule_id = tc_fltr->cookie; - rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI; + switch (tc_fltr->action.fltr_act) { + case ICE_FWD_TO_VSI: rule_info.sw_act.vsi_handle = ch_vsi->idx; - rule_info.priority = 7; + rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI; rule_info.sw_act.src = hw->pf_id; rule_info.rx = true; dev_dbg(dev, "add switch rule for TC:%u vsi_idx:%u, lkups_cnt:%u\n", - tc_fltr->action.tc_class, + tc_fltr->action.fwd.tc.tc_class, rule_info.sw_act.vsi_handle, lkups_cnt); - } else { + break; + case ICE_FWD_TO_Q: + /* HW queue number in global space */ + rule_info.sw_act.fwd_id.q_id = tc_fltr->action.fwd.q.hw_queue; + rule_info.sw_act.vsi_handle = ch_vsi->idx; + rule_info.priority = ICE_SWITCH_FLTR_PRIO_QUEUE; + rule_info.sw_act.src = hw->pf_id; + rule_info.rx = true; + dev_dbg(dev, "add switch rule action to forward to queue:%u (HW queue %u), lkups_cnt:%u\n", + tc_fltr->action.fwd.q.queue, + tc_fltr->action.fwd.q.hw_queue, lkups_cnt); + break; + default: rule_info.sw_act.flag |= ICE_FLTR_TX; + /* In case of Tx (LOOKUP_TX), src needs to be src VSI */ rule_info.sw_act.src = vsi->idx; + /* 'Rx' is false, direction of rule(LOOKUPTRX) */ rule_info.rx = false; + rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI; + break; } - /* specify the cookie as filter_rule_id */ - rule_info.fltr_rule_id = tc_fltr->cookie; - ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added); if (ret == -EEXIST) { NL_SET_ERR_MSG_MOD(tc_fltr->extack, @@ -831,19 +950,14 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, */ tc_fltr->rid = rule_added.rid; tc_fltr->rule_id = rule_added.rule_id; - if (tc_fltr->action.tc_class > 0 && ch_vsi) { - /* For PF ADQ, VSI type is set as ICE_VSI_CHNL, and - * for PF ADQ filter, it is not yet set in tc_fltr, - * hence store the dest_vsi ptr in tc_fltr - */ - if (ch_vsi->type == ICE_VSI_CHNL) - tc_fltr->dest_vsi = ch_vsi; + tc_fltr->dest_vsi_handle = rule_added.vsi_handle; + if (tc_fltr->action.fltr_act == ICE_FWD_TO_VSI || + tc_fltr->action.fltr_act == ICE_FWD_TO_Q) { + tc_fltr->dest_vsi = ch_vsi; /* keep track of advanced switch filter for - * destination VSI (channel VSI) + * destination VSI */ ch_vsi->num_chnl_fltr++; - /* in this case, dest_id is VSI handle (sw handle) */ - tc_fltr->dest_id = rule_added.vsi_handle; /* keeps track of channel filters for PF VSI */ if (vsi->type == ICE_VSI_PF && @@ -851,10 +965,22 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, ICE_TC_FLWR_FIELD_ENC_DST_MAC))) pf->num_dmac_chnl_fltrs++; } - dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x) for TC %u, rid %u, rule_id %u, vsi_idx %u\n", - lkups_cnt, flags, - tc_fltr->action.tc_class, rule_added.rid, - rule_added.rule_id, rule_added.vsi_handle); + switch (tc_fltr->action.fltr_act) { + case ICE_FWD_TO_VSI: + dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x), action is forward to TC %u, rid %u, rule_id %u, vsi_idx %u\n", + lkups_cnt, flags, + tc_fltr->action.fwd.tc.tc_class, rule_added.rid, + rule_added.rule_id, rule_added.vsi_handle); + break; + case ICE_FWD_TO_Q: + dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x), action is forward to queue: %u (HW queue %u) , rid %u, rule_id %u\n", + lkups_cnt, flags, tc_fltr->action.fwd.q.queue, + tc_fltr->action.fwd.q.hw_queue, rule_added.rid, + rule_added.rule_id); + break; + default: + break; + } exit: kfree(list); return ret; @@ -1455,43 +1581,15 @@ ice_add_switch_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) } /** - * ice_handle_tclass_action - Support directing to a traffic class + * ice_prep_adq_filter - Prepare ADQ filter with the required additional headers * @vsi: Pointer to VSI - * @cls_flower: Pointer to TC flower offload structure * @fltr: Pointer to TC flower filter structure * - * Support directing traffic to a traffic class + * Prepare ADQ filter with the required additional header fields */ static int -ice_handle_tclass_action(struct ice_vsi *vsi, - struct flow_cls_offload *cls_flower, - struct ice_tc_flower_fltr *fltr) +ice_prep_adq_filter(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) { - int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); - struct ice_vsi *main_vsi; - - if (tc < 0) { - NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because specified destination is invalid"); - return -EINVAL; - } - if (!tc) { - NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of invalid destination"); - return -EINVAL; - } - - if (!(vsi->all_enatc & BIT(tc))) { - NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of non-existence destination"); - return -EINVAL; - } - - /* Redirect to a TC class or Queue Group */ - main_vsi = ice_get_main_vsi(vsi->back); - if (!main_vsi || !main_vsi->netdev) { - NL_SET_ERR_MSG_MOD(fltr->extack, - "Unable to add filter because of invalid netdevice"); - return -EINVAL; - } - if ((fltr->flags & ICE_TC_FLWR_FIELD_TENANT_ID) && (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC | ICE_TC_FLWR_FIELD_SRC_MAC))) { @@ -1503,9 +1601,8 @@ ice_handle_tclass_action(struct ice_vsi *vsi, /* For ADQ, filter must include dest MAC address, otherwise unwanted * packets with unrelated MAC address get delivered to ADQ VSIs as long * as remaining filter criteria is satisfied such as dest IP address - * and dest/src L4 port. Following code is trying to handle: - * 1. For non-tunnel, if user specify MAC addresses, use them (means - * this code won't do anything + * and dest/src L4 port. Below code handles the following cases: + * 1. For non-tunnel, if user specify MAC addresses, use them. * 2. For non-tunnel, if user didn't specify MAC address, add implicit * dest MAC to be lower netdev's active unicast MAC address * 3. For tunnel, as of now TC-filter through flower classifier doesn't @@ -1528,35 +1625,97 @@ ice_handle_tclass_action(struct ice_vsi *vsi, eth_broadcast_addr(fltr->outer_headers.l2_mask.dst_mac); } - /* validate specified dest MAC address, make sure either it belongs to - * lower netdev or any of MACVLAN. MACVLANs MAC address are added as - * unicast MAC filter destined to main VSI. - */ - if (!ice_mac_fltr_exist(&main_vsi->back->hw, - fltr->outer_headers.l2_key.dst_mac, - main_vsi->idx)) { - NL_SET_ERR_MSG_MOD(fltr->extack, - "Unable to add filter because legacy MAC filter for specified destination doesn't exist"); - return -EINVAL; - } - /* Make sure VLAN is already added to main VSI, before allowing ADQ to * add a VLAN based filter such as MAC + VLAN + L4 port. */ if (fltr->flags & ICE_TC_FLWR_FIELD_VLAN) { u16 vlan_id = be16_to_cpu(fltr->outer_headers.vlan_hdr.vlan_id); - if (!ice_vlan_fltr_exist(&main_vsi->back->hw, vlan_id, - main_vsi->idx)) { + if (!ice_vlan_fltr_exist(&vsi->back->hw, vlan_id, vsi->idx)) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because legacy VLAN filter for specified destination doesn't exist"); return -EINVAL; } } + return 0; +} + +/** + * ice_handle_tclass_action - Support directing to a traffic class + * @vsi: Pointer to VSI + * @cls_flower: Pointer to TC flower offload structure + * @fltr: Pointer to TC flower filter structure + * + * Support directing traffic to a traffic class/queue-set + */ +static int +ice_handle_tclass_action(struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower, + struct ice_tc_flower_fltr *fltr) +{ + int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); + + /* user specified hw_tc (must be non-zero for ADQ TC), action is forward + * to hw_tc (i.e. ADQ channel number) + */ + if (tc < ICE_CHNL_START_TC) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because of unsupported destination"); + return -EOPNOTSUPP; + } + if (!(vsi->all_enatc & BIT(tc))) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because of non-existence destination"); + return -EINVAL; + } fltr->action.fltr_act = ICE_FWD_TO_VSI; - fltr->action.tc_class = tc; + fltr->action.fwd.tc.tc_class = tc; - return 0; + return ice_prep_adq_filter(vsi, fltr); +} + +static int +ice_tc_forward_to_queue(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr, + struct flow_action_entry *act) +{ + struct ice_vsi *ch_vsi = NULL; + u16 queue = act->rx_queue; + + if (queue > vsi->num_rxq) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because specified queue is invalid"); + return -EINVAL; + } + fltr->action.fltr_act = ICE_FWD_TO_Q; + fltr->action.fwd.q.queue = queue; + /* determine corresponding HW queue */ + fltr->action.fwd.q.hw_queue = vsi->rxq_map[queue]; + + /* If ADQ is configured, and the queue belongs to ADQ VSI, then prepare + * ADQ switch filter + */ + ch_vsi = ice_locate_vsi_using_queue(vsi, fltr); + if (!ch_vsi) + return -EINVAL; + fltr->dest_vsi = ch_vsi; + if (!ice_is_chnl_fltr(fltr)) + return 0; + + return ice_prep_adq_filter(vsi, fltr); +} + +static int +ice_tc_parse_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr, + struct flow_action_entry *act) +{ + switch (act->id) { + case FLOW_ACTION_RX_QUEUE_MAPPING: + /* forward to queue */ + return ice_tc_forward_to_queue(vsi, fltr, act); + default: + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported TC action"); + return -EOPNOTSUPP; + } } /** @@ -1575,7 +1734,7 @@ ice_parse_tc_flower_actions(struct ice_vsi *vsi, struct flow_rule *rule = flow_cls_offload_flow_rule(cls_flower); struct flow_action *flow_action = &rule->action; struct flow_action_entry *act; - int i; + int i, err; if (cls_flower->classid) return ice_handle_tclass_action(vsi, cls_flower, fltr); @@ -1584,21 +1743,13 @@ ice_parse_tc_flower_actions(struct ice_vsi *vsi, return -EINVAL; flow_action_for_each(i, act, flow_action) { - if (ice_is_eswitch_mode_switchdev(vsi->back)) { - int err = ice_eswitch_tc_parse_action(fltr, act); - - if (err) - return err; - continue; - } - /* Allow only one rule per filter */ - - /* Drop action */ - if (act->id == FLOW_ACTION_DROP) { - NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action DROP"); - return -EINVAL; - } - fltr->action.fltr_act = ICE_FWD_TO_VSI; + if (ice_is_eswitch_mode_switchdev(vsi->back)) + err = ice_eswitch_tc_parse_action(fltr, act); + else + err = ice_tc_parse_action(vsi, fltr, act); + if (err) + return err; + continue; } return 0; } @@ -1618,7 +1769,7 @@ static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) rule_rem.rid = fltr->rid; rule_rem.rule_id = fltr->rule_id; - rule_rem.vsi_handle = fltr->dest_id; + rule_rem.vsi_handle = fltr->dest_vsi_handle; err = ice_rem_adv_rule_by_id(&pf->hw, &rule_rem); if (err) { if (err == -ENOENT) { diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index 92642faad595..d916d1e92aa3 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -45,7 +45,20 @@ struct ice_indr_block_priv { }; struct ice_tc_flower_action { - u32 tc_class; + /* forward action specific params */ + union { + struct { + u32 tc_class; /* forward to hw_tc */ + u32 rsvd; + } tc; + struct { + u16 queue; /* forward to queue */ + /* To add filter in HW, absolute queue number in global + * space of queues (between 0...N) is needed + */ + u16 hw_queue; + } q; + } fwd; enum ice_sw_fwd_act_type fltr_act; }; @@ -131,11 +144,11 @@ struct ice_tc_flower_fltr { */ u16 rid; u16 rule_id; - /* this could be queue/vsi_idx (sw handle)/queue_group, depending upon - * destination type + /* VSI handle of the destination VSI (it could be main PF VSI, CHNL_VSI, + * VF VSI) */ - u16 dest_id; - /* if dest_id is vsi_idx, then need to store destination VSI ptr */ + u16 dest_vsi_handle; + /* ptr to destination VSI */ struct ice_vsi *dest_vsi; /* direction of fltr for eswitch use case */ enum ice_eswitch_fltr_direction direction; @@ -162,12 +175,23 @@ struct ice_tc_flower_fltr { * @f: Pointer to tc-flower filter * * Criteria to determine of given filter is valid channel filter - * or not is based on its "destination". If destination is hw_tc (aka tc_class) - * and it is non-zero, then it is valid channel (aka ADQ) filter + * or not is based on its destination. + * For forward to VSI action, if destination is valid hw_tc (aka tc_class) + * and in supported range of TCs for ADQ, then return true. + * For forward to queue, as long as dest_vsi is valid and it is of type + * VSI_CHNL (PF ADQ VSI is of type VSI_CHNL), return true. + * NOTE: For forward to queue, correct dest_vsi is still set in tc_fltr based + * on destination queue specified. */ static inline bool ice_is_chnl_fltr(struct ice_tc_flower_fltr *f) { - return !!f->action.tc_class; + if (f->action.fltr_act == ICE_FWD_TO_VSI) + return f->action.fwd.tc.tc_class >= ICE_CHNL_START_TC && + f->action.fwd.tc.tc_class < ICE_CHNL_MAX_TC; + else if (f->action.fltr_act == ICE_FWD_TO_Q) + return f->dest_vsi && f->dest_vsi->type == ICE_VSI_CHNL; + + return false; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index dbe80e5053a8..086f0b3ab68d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -325,7 +325,7 @@ static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget) if (netif_tx_queue_stopped(txring_txq(tx_ring)) && !test_bit(ICE_VSI_DOWN, vsi->state)) { netif_tx_wake_queue(txring_txq(tx_ring)); - ++tx_ring->tx_stats.restart_q; + ++tx_ring->ring_stats->tx_stats.restart_q; } } @@ -367,7 +367,7 @@ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; - tx_ring->tx_stats.prev_pkt = -1; + tx_ring->ring_stats->tx_stats.prev_pkt = -1; return 0; err: @@ -667,7 +667,7 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) /* alloc new page for storage */ page = dev_alloc_pages(ice_rx_pg_order(rx_ring)); if (unlikely(!page)) { - rx_ring->rx_stats.alloc_page_failed++; + rx_ring->ring_stats->rx_stats.alloc_page_failed++; return false; } @@ -680,7 +680,7 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) */ if (dma_mapping_error(rx_ring->dev, dma)) { __free_pages(page, ice_rx_pg_order(rx_ring)); - rx_ring->rx_stats.alloc_page_failed++; + rx_ring->ring_stats->rx_stats.alloc_page_failed++; return false; } @@ -1091,7 +1091,7 @@ ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF))) return false; - rx_ring->rx_stats.non_eop_descs++; + rx_ring->ring_stats->rx_stats.non_eop_descs++; return true; } @@ -1222,7 +1222,7 @@ construct_skb: } /* exit if we failed to retrieve a buffer */ if (!skb) { - rx_ring->rx_stats.alloc_buf_failed++; + rx_ring->ring_stats->rx_stats.alloc_buf_failed++; if (rx_buf) rx_buf->pagecnt_bias++; break; @@ -1275,7 +1275,9 @@ construct_skb: ice_finalize_xdp_rx(xdp_ring, xdp_xmit); rx_ring->skb = skb; - ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes); + if (rx_ring->ring_stats) + ice_update_rx_ring_stats(rx_ring, total_rx_pkts, + total_rx_bytes); /* guarantee a trip back through this routine if there was a failure */ return failure ? budget : (int)total_rx_pkts; @@ -1292,15 +1294,25 @@ static void __ice_update_sample(struct ice_q_vector *q_vector, struct ice_tx_ring *tx_ring; ice_for_each_tx_ring(tx_ring, *rc) { - packets += tx_ring->stats.pkts; - bytes += tx_ring->stats.bytes; + struct ice_ring_stats *ring_stats; + + ring_stats = tx_ring->ring_stats; + if (!ring_stats) + continue; + packets += ring_stats->stats.pkts; + bytes += ring_stats->stats.bytes; } } else { struct ice_rx_ring *rx_ring; ice_for_each_rx_ring(rx_ring, *rc) { - packets += rx_ring->stats.pkts; - bytes += rx_ring->stats.bytes; + struct ice_ring_stats *ring_stats; + + ring_stats = rx_ring->ring_stats; + if (!ring_stats) + continue; + packets += ring_stats->stats.pkts; + bytes += ring_stats->stats.bytes; } } @@ -1549,7 +1561,7 @@ static int __ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size) /* A reprieve! - use start_queue because it doesn't call schedule */ netif_tx_start_queue(txring_txq(tx_ring)); - ++tx_ring->tx_stats.restart_q; + ++tx_ring->ring_stats->tx_stats.restart_q; return 0; } @@ -2293,7 +2305,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) if (__skb_linearize(skb)) goto out_drop; count = ice_txd_use_count(skb->len); - tx_ring->tx_stats.tx_linearize++; + tx_ring->ring_stats->tx_stats.tx_linearize++; } /* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD, @@ -2304,7 +2316,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) */ if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_CTX_DESC)) { - tx_ring->tx_stats.tx_busy++; + tx_ring->ring_stats->tx_stats.tx_busy++; return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 932b5661ec4d..4fd0e5d0a313 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -191,6 +191,16 @@ struct ice_rxq_stats { u64 alloc_buf_failed; }; +struct ice_ring_stats { + struct rcu_head rcu; /* to avoid race on free */ + struct ice_q_stats stats; + struct u64_stats_sync syncp; + union { + struct ice_txq_stats tx_stats; + struct ice_rxq_stats rx_stats; + }; +}; + enum ice_ring_state_t { ICE_TX_XPS_INIT_DONE, ICE_TX_NBITS, @@ -283,9 +293,7 @@ struct ice_rx_ring { u16 rx_buf_len; /* stats structs */ - struct ice_rxq_stats rx_stats; - struct ice_q_stats stats; - struct u64_stats_sync syncp; + struct ice_ring_stats *ring_stats; struct rcu_head rcu; /* to avoid race on free */ /* CL4 - 3rd cacheline starts here */ @@ -325,10 +333,8 @@ struct ice_tx_ring { u16 count; /* Number of descriptors */ u16 q_index; /* Queue number of ring */ /* stats structs */ - struct ice_txq_stats tx_stats; + struct ice_ring_stats *ring_stats; /* CL3 - 3rd cacheline starts here */ - struct ice_q_stats stats; - struct u64_stats_sync syncp; struct rcu_head rcu; /* to avoid race on free */ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ struct ice_channel *ch; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 7ee38d02d1e5..25f04266c668 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -285,7 +285,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) ice_clean_xdp_irq(xdp_ring); if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) { - xdp_ring->tx_stats.tx_busy++; + xdp_ring->ring_stats->tx_stats.tx_busy++; return ICE_XDP_CONSUMED; } diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index e1abfcee96dc..e3f622cad425 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -524,7 +524,14 @@ struct ice_sched_node { struct ice_sched_node *sibling; /* next sibling in the same layer */ struct ice_sched_node **children; struct ice_aqc_txsched_elem_data info; + char *name; + struct devlink_rate *rate_node; + u64 tx_max; + u64 tx_share; u32 agg_id; /* aggregator group ID */ + u32 id; + u32 tx_priority; + u32 tx_weight; u16 vsi_handle; u8 in_use; /* suspended or in use */ u8 tx_sched_layer; /* Logical Layer (1-9) */ @@ -706,7 +713,9 @@ struct ice_port_info { /* List contain profile ID(s) and other params per layer */ struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM]; struct ice_qos_cfg qos_cfg; + struct xarray sched_node_ids; u8 is_vf:1; + u8 is_custom_tx_enabled:1; }; struct ice_switch_info { diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 1c51778db951..375eb6493f0f 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -700,6 +700,30 @@ void ice_dis_vf_qs(struct ice_vf *vf) } /** + * ice_err_to_virt_err - translate errors for VF return code + * @err: error return code + */ +enum virtchnl_status_code ice_err_to_virt_err(int err) +{ + switch (err) { + case 0: + return VIRTCHNL_STATUS_SUCCESS; + case -EINVAL: + case -ENODEV: + return VIRTCHNL_STATUS_ERR_PARAM; + case -ENOMEM: + return VIRTCHNL_STATUS_ERR_NO_MEMORY; + case -EALREADY: + case -EBUSY: + case -EIO: + case -ENOSPC: + return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; + default: + return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + } +} + +/** * ice_check_vf_init - helper to check if VF init complete * @vf: the pointer to the VF to check */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h index 15887e772c76..9c8ef2b01f0f 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h @@ -25,6 +25,7 @@ void ice_dis_vf_qs(struct ice_vf *vf); int ice_check_vf_init(struct ice_vf *vf); +enum virtchnl_status_code ice_err_to_virt_err(int err); struct ice_port_info *ice_vf_get_port_info(struct ice_vf *vf); int ice_vsi_apply_spoofchk(struct ice_vsi *vsi, bool enable); bool ice_is_vf_trusted(struct ice_vf *vf); diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c index fc8c93fa4455..d4a4001b6e5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c @@ -39,6 +39,24 @@ ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd); } +static const u32 ice_legacy_aq_to_vc_speed[15] = { + VIRTCHNL_LINK_SPEED_100MB, /* BIT(0) */ + VIRTCHNL_LINK_SPEED_100MB, + VIRTCHNL_LINK_SPEED_1GB, + VIRTCHNL_LINK_SPEED_1GB, + VIRTCHNL_LINK_SPEED_1GB, + VIRTCHNL_LINK_SPEED_10GB, + VIRTCHNL_LINK_SPEED_20GB, + VIRTCHNL_LINK_SPEED_25GB, + VIRTCHNL_LINK_SPEED_40GB, + VIRTCHNL_LINK_SPEED_40GB, + VIRTCHNL_LINK_SPEED_40GB, + VIRTCHNL_LINK_SPEED_UNKNOWN, + VIRTCHNL_LINK_SPEED_UNKNOWN, + VIRTCHNL_LINK_SPEED_UNKNOWN, + VIRTCHNL_LINK_SPEED_UNKNOWN /* BIT(14) */ +}; + /** * ice_conv_link_speed_to_virtchnl * @adv_link_support: determines the format of the returned link speed @@ -55,79 +73,17 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed) { u32 speed; - if (adv_link_support) - switch (link_speed) { - case ICE_AQ_LINK_SPEED_10MB: - speed = ICE_LINK_SPEED_10MBPS; - break; - case ICE_AQ_LINK_SPEED_100MB: - speed = ICE_LINK_SPEED_100MBPS; - break; - case ICE_AQ_LINK_SPEED_1000MB: - speed = ICE_LINK_SPEED_1000MBPS; - break; - case ICE_AQ_LINK_SPEED_2500MB: - speed = ICE_LINK_SPEED_2500MBPS; - break; - case ICE_AQ_LINK_SPEED_5GB: - speed = ICE_LINK_SPEED_5000MBPS; - break; - case ICE_AQ_LINK_SPEED_10GB: - speed = ICE_LINK_SPEED_10000MBPS; - break; - case ICE_AQ_LINK_SPEED_20GB: - speed = ICE_LINK_SPEED_20000MBPS; - break; - case ICE_AQ_LINK_SPEED_25GB: - speed = ICE_LINK_SPEED_25000MBPS; - break; - case ICE_AQ_LINK_SPEED_40GB: - speed = ICE_LINK_SPEED_40000MBPS; - break; - case ICE_AQ_LINK_SPEED_50GB: - speed = ICE_LINK_SPEED_50000MBPS; - break; - case ICE_AQ_LINK_SPEED_100GB: - speed = ICE_LINK_SPEED_100000MBPS; - break; - default: - speed = ICE_LINK_SPEED_UNKNOWN; - break; - } - else + if (adv_link_support) { + /* convert a BIT() value into an array index */ + speed = ice_get_link_speed(fls(link_speed) - 1); + } else { /* Virtchnl speeds are not defined for every speed supported in * the hardware. To maintain compatibility with older AVF * drivers, while reporting the speed the new speed values are * resolved to the closest known virtchnl speeds */ - switch (link_speed) { - case ICE_AQ_LINK_SPEED_10MB: - case ICE_AQ_LINK_SPEED_100MB: - speed = (u32)VIRTCHNL_LINK_SPEED_100MB; - break; - case ICE_AQ_LINK_SPEED_1000MB: - case ICE_AQ_LINK_SPEED_2500MB: - case ICE_AQ_LINK_SPEED_5GB: - speed = (u32)VIRTCHNL_LINK_SPEED_1GB; - break; - case ICE_AQ_LINK_SPEED_10GB: - speed = (u32)VIRTCHNL_LINK_SPEED_10GB; - break; - case ICE_AQ_LINK_SPEED_20GB: - speed = (u32)VIRTCHNL_LINK_SPEED_20GB; - break; - case ICE_AQ_LINK_SPEED_25GB: - speed = (u32)VIRTCHNL_LINK_SPEED_25GB; - break; - case ICE_AQ_LINK_SPEED_40GB: - case ICE_AQ_LINK_SPEED_50GB: - case ICE_AQ_LINK_SPEED_100GB: - speed = (u32)VIRTCHNL_LINK_SPEED_40GB; - break; - default: - speed = (u32)VIRTCHNL_LINK_SPEED_UNKNOWN; - break; - } + speed = ice_legacy_aq_to_vc_speed[fls(link_speed) - 1]; + } return speed; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 2b4c791b6cba..dab3cd5d300e 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -462,6 +462,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG; } + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC; + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_FDIR_PF; @@ -1658,6 +1661,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) /* copy Rx queue info from VF into VSI */ if (qpi->rxq.ring_len > 0) { u16 max_frame_size = ice_vc_get_max_frame_size(vf); + u32 rxdid; vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr; vsi->rx_rings[i]->count = qpi->rxq.ring_len; @@ -1685,6 +1689,24 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) vf->vf_id, i); goto error_param; } + + /* If Rx flex desc is supported, select RXDID for Rx + * queues. Otherwise, use legacy 32byte descriptor + * format. Legacy 16byte descriptor is not supported. + * If this RXDID is selected, return error. + */ + if (vf->driver_caps & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) { + rxdid = qpi->rxq.rxdid; + if (!(BIT(rxdid) & pf->supported_rxdids)) + goto error_param; + } else { + rxdid = ICE_RXDID_LEGACY_1; + } + + ice_write_qrxflxp_cntxt(&vsi->back->hw, + vsi->rxq_map[q_idx], + rxdid, 0x03, false); } } @@ -2457,6 +2479,164 @@ error_param: } /** + * ice_vc_get_rss_hena - return the RSS HENA bits allowed by the hardware + * @vf: pointer to the VF info + */ +static int ice_vc_get_rss_hena(struct ice_vf *vf) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_rss_hena *vrh = NULL; + int len = 0, ret; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + dev_err(ice_pf_to_dev(vf->pf), "RSS not supported by PF\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + len = sizeof(struct virtchnl_rss_hena); + vrh = kzalloc(len, GFP_KERNEL); + if (!vrh) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + len = 0; + goto err; + } + + vrh->hena = ICE_DEFAULT_RSS_HENA; +err: + /* send the response back to the VF */ + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_RSS_HENA_CAPS, v_ret, + (u8 *)vrh, len); + kfree(vrh); + return ret; +} + +/** + * ice_vc_set_rss_hena - set RSS HENA bits for the VF + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + */ +static int ice_vc_set_rss_hena(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_rss_hena *vrh = (struct virtchnl_rss_hena *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + struct device *dev; + int status; + + dev = ice_pf_to_dev(pf); + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + dev_err(dev, "RSS not supported by PF\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + /* clear all previously programmed RSS configuration to allow VF drivers + * the ability to customize the RSS configuration and/or completely + * disable RSS + */ + status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx); + if (status && !vrh->hena) { + /* only report failure to clear the current RSS configuration if + * that was clearly the VF's intention (i.e. vrh->hena = 0) + */ + v_ret = ice_err_to_virt_err(status); + goto err; + } else if (status) { + /* allow the VF to update the RSS configuration even on failure + * to clear the current RSS confguration in an attempt to keep + * RSS in a working state + */ + dev_warn(dev, "Failed to clear the RSS configuration for VF %u\n", + vf->vf_id); + } + + if (vrh->hena) { + status = ice_add_avf_rss_cfg(&pf->hw, vsi->idx, vrh->hena); + v_ret = ice_err_to_virt_err(status); + } + + /* send the response to the VF */ +err: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_SET_RSS_HENA, v_ret, + NULL, 0); +} + +/** + * ice_vc_query_rxdid - query RXDID supported by DDP package + * @vf: pointer to VF info + * + * Called from VF to query a bitmap of supported flexible + * descriptor RXDIDs of a DDP package. + */ +static int ice_vc_query_rxdid(struct ice_vf *vf) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_supported_rxdids *rxdid = NULL; + struct ice_hw *hw = &vf->pf->hw; + struct ice_pf *pf = vf->pf; + int len = 0; + int ret, i; + u32 regval; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + len = sizeof(struct virtchnl_supported_rxdids); + rxdid = kzalloc(len, GFP_KERNEL); + if (!rxdid) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + len = 0; + goto err; + } + + /* Read flexiflag registers to determine whether the + * corresponding RXDID is configured and supported or not. + * Since Legacy 16byte descriptor format is not supported, + * start from Legacy 32byte descriptor. + */ + for (i = ICE_RXDID_LEGACY_1; i < ICE_FLEX_DESC_RXDID_MAX_NUM; i++) { + regval = rd32(hw, GLFLXP_RXDID_FLAGS(i, 0)); + if ((regval >> GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) + & GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) + rxdid->supported_rxdids |= BIT(i); + } + + pf->supported_rxdids = rxdid->supported_rxdids; + +err: + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_SUPPORTED_RXDIDS, + v_ret, (u8 *)rxdid, len); + kfree(rxdid); + return ret; +} + +/** * ice_vf_init_vlan_stripping - enable/disable VLAN stripping on initialization * @vf: VF to enable/disable VLAN stripping for on initialization * @@ -3490,6 +3670,9 @@ static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = { .cfg_promiscuous_mode_msg = ice_vc_cfg_promiscuous_mode_msg, .add_vlan_msg = ice_vc_add_vlan_msg, .remove_vlan_msg = ice_vc_remove_vlan_msg, + .query_rxdid = ice_vc_query_rxdid, + .get_rss_hena = ice_vc_get_rss_hena, + .set_rss_hena_msg = ice_vc_set_rss_hena, .ena_vlan_stripping = ice_vc_ena_vlan_stripping, .dis_vlan_stripping = ice_vc_dis_vlan_stripping, .handle_rss_cfg_msg = ice_vc_handle_rss_cfg, @@ -3624,6 +3807,9 @@ static const struct ice_virtchnl_ops ice_virtchnl_repr_ops = { .cfg_promiscuous_mode_msg = ice_vc_repr_cfg_promiscuous_mode, .add_vlan_msg = ice_vc_add_vlan_msg, .remove_vlan_msg = ice_vc_remove_vlan_msg, + .query_rxdid = ice_vc_query_rxdid, + .get_rss_hena = ice_vc_get_rss_hena, + .set_rss_hena_msg = ice_vc_set_rss_hena, .ena_vlan_stripping = ice_vc_ena_vlan_stripping, .dis_vlan_stripping = ice_vc_dis_vlan_stripping, .handle_rss_cfg_msg = ice_vc_handle_rss_cfg, @@ -3764,6 +3950,15 @@ error_handler: case VIRTCHNL_OP_DEL_VLAN: err = ops->remove_vlan_msg(vf, msg); break; + case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS: + err = ops->query_rxdid(vf); + break; + case VIRTCHNL_OP_GET_RSS_HENA_CAPS: + err = ops->get_rss_hena(vf); + break; + case VIRTCHNL_OP_SET_RSS_HENA: + err = ops->set_rss_hena_msg(vf, msg); + break; case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: err = ops->ena_vlan_stripping(vf); break; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h index b5a3fd8adbb4..b454654d7b0c 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h @@ -17,6 +17,7 @@ * broadcast, and 16 for additional unicast/multicast filters */ #define ICE_MAX_MACADDR_PER_VF 18 +#define ICE_FLEX_DESC_RXDID_MAX_NUM 64 struct ice_virtchnl_ops { int (*get_ver_msg)(struct ice_vf *vf, u8 *msg); @@ -35,6 +36,9 @@ struct ice_virtchnl_ops { int (*cfg_promiscuous_mode_msg)(struct ice_vf *vf, u8 *msg); int (*add_vlan_msg)(struct ice_vf *vf, u8 *msg); int (*remove_vlan_msg)(struct ice_vf *vf, u8 *msg); + int (*query_rxdid)(struct ice_vf *vf); + int (*get_rss_hena)(struct ice_vf *vf); + int (*set_rss_hena_msg)(struct ice_vf *vf, u8 *msg); int (*ena_vlan_stripping)(struct ice_vf *vf); int (*dis_vlan_stripping)(struct ice_vf *vf); int (*handle_rss_cfg_msg)(struct ice_vf *vf, u8 *msg, bool add); diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index 5a82216e7d03..7d547fa616fa 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -70,6 +70,11 @@ static const u32 rss_pf_allowlist_opcodes[] = { VIRTCHNL_OP_GET_RSS_HENA_CAPS, VIRTCHNL_OP_SET_RSS_HENA, }; +/* VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC */ +static const u32 rx_flex_desc_allowlist_opcodes[] = { + VIRTCHNL_OP_GET_SUPPORTED_RXDIDS, +}; + /* VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF */ static const u32 adv_rss_pf_allowlist_opcodes[] = { VIRTCHNL_OP_ADD_RSS_CFG, VIRTCHNL_OP_DEL_RSS_CFG, @@ -96,6 +101,7 @@ static const struct allowlist_opcode_info allowlist_opcodes[] = { ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_REQ_QUEUES, req_queues_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN, vlan_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_RSS_PF, rss_pf_allowlist_opcodes), + ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC, rx_flex_desc_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF, adv_rss_pf_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN_V2, vlan_v2_allowlist_opcodes), diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 056c904b83cc..907055b77af0 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -24,13 +24,24 @@ static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx) */ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) { - memset(&vsi->rx_rings[q_idx]->rx_stats, 0, - sizeof(vsi->rx_rings[q_idx]->rx_stats)); - memset(&vsi->tx_rings[q_idx]->stats, 0, - sizeof(vsi->tx_rings[q_idx]->stats)); + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf; + + pf = vsi->back; + if (!pf->vsi_stats) + return; + + vsi_stat = pf->vsi_stats[vsi->idx]; + if (!vsi_stat) + return; + + memset(&vsi_stat->rx_ring_stats[q_idx]->rx_stats, 0, + sizeof(vsi_stat->rx_ring_stats[q_idx]->rx_stats)); + memset(&vsi_stat->tx_ring_stats[q_idx]->stats, 0, + sizeof(vsi_stat->tx_ring_stats[q_idx]->stats)); if (ice_is_xdp_ena_vsi(vsi)) - memset(&vsi->xdp_rings[q_idx]->stats, 0, - sizeof(vsi->xdp_rings[q_idx]->stats)); + memset(&vsi->xdp_rings[q_idx]->ring_stats->stats, 0, + sizeof(vsi->xdp_rings[q_idx]->ring_stats->stats)); } /** @@ -722,7 +733,7 @@ construct_skb: /* XDP_PASS path */ skb = ice_construct_skb_zc(rx_ring, xdp); if (!skb) { - rx_ring->rx_stats.alloc_buf_failed++; + rx_ring->ring_stats->rx_stats.alloc_buf_failed++; break; } diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index ff911af16a4b..7d60da1b7bf4 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2313,15 +2313,15 @@ static void igb_get_ethtool_stats(struct net_device *netdev, ring = adapter->tx_ring[j]; do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp); + start = u64_stats_fetch_begin(&ring->tx_syncp); data[i] = ring->tx_stats.packets; data[i+1] = ring->tx_stats.bytes; data[i+2] = ring->tx_stats.restart_queue; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp2); + start = u64_stats_fetch_begin(&ring->tx_syncp2); restart2 = ring->tx_stats.restart_queue2; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp2, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp2, start)); data[i+2] += restart2; i += IGB_TX_QUEUE_STATS_LEN; @@ -2329,13 +2329,13 @@ static void igb_get_ethtool_stats(struct net_device *netdev, for (j = 0; j < adapter->num_rx_queues; j++) { ring = adapter->rx_ring[j]; do { - start = u64_stats_fetch_begin_irq(&ring->rx_syncp); + start = u64_stats_fetch_begin(&ring->rx_syncp); data[i] = ring->rx_stats.packets; data[i+1] = ring->rx_stats.bytes; data[i+2] = ring->rx_stats.drops; data[i+3] = ring->rx_stats.csum_err; data[i+4] = ring->rx_stats.alloc_failed; - } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); i += IGB_RX_QUEUE_STATS_LEN; } spin_unlock(&adapter->stats64_lock); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index f8e32833226c..97290fc0fddd 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1195,15 +1195,19 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, return -ENOMEM; ring_count = txr_count + rxr_count; - size = struct_size(q_vector, ring, ring_count); + size = kmalloc_size_roundup(struct_size(q_vector, ring, ring_count)); /* allocate q_vector and rings */ q_vector = adapter->q_vector[v_idx]; if (!q_vector) { q_vector = kzalloc(size, GFP_KERNEL); } else if (size > ksize(q_vector)) { - kfree_rcu(q_vector, rcu); - q_vector = kzalloc(size, GFP_KERNEL); + struct igb_q_vector *new_q_vector; + + new_q_vector = kzalloc(size, GFP_KERNEL); + if (new_q_vector) + kfree_rcu(q_vector, rcu); + q_vector = new_q_vector; } else { memset(q_vector, 0, size); } @@ -6632,10 +6636,10 @@ void igb_update_stats(struct igb_adapter *adapter) } do { - start = u64_stats_fetch_begin_irq(&ring->rx_syncp); + start = u64_stats_fetch_begin(&ring->rx_syncp); _bytes = ring->rx_stats.bytes; _packets = ring->rx_stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); bytes += _bytes; packets += _packets; } @@ -6648,10 +6652,10 @@ void igb_update_stats(struct igb_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) { struct igb_ring *ring = adapter->tx_ring[i]; do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp); + start = u64_stats_fetch_begin(&ring->tx_syncp); _bytes = ring->tx_stats.bytes; _packets = ring->tx_stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); bytes += _bytes; packets += _packets; } diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 15e57460e19e..6f471b91f562 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -195,23 +195,9 @@ static int igb_ptp_adjfine_82576(struct ptp_clock_info *ptp, long scaled_ppm) struct igb_adapter *igb = container_of(ptp, struct igb_adapter, ptp_caps); struct e1000_hw *hw = &igb->hw; - int neg_adj = 0; - u64 rate; - u32 incvalue; - - if (scaled_ppm < 0) { - neg_adj = 1; - scaled_ppm = -scaled_ppm; - } - - incvalue = INCVALUE_82576; - rate = mul_u64_u64_div_u64(incvalue, (u64)scaled_ppm, - 1000000ULL << 16); + u64 incvalue; - if (neg_adj) - incvalue -= rate; - else - incvalue += rate; + incvalue = adjust_by_scaled_ppm(INCVALUE_82576, scaled_ppm); wr32(E1000_TIMINCA, INCPERIOD_82576 | (incvalue & INCVALUE_82576_MASK)); diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 4f9d7f013a95..f7311aeb293b 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -400,6 +400,15 @@ #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */ #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */ +/* Transmit Scheduling Latency */ +/* Latency between transmission scheduling (LaunchTime) and the time + * the packet is transmitted to the network in nanosecond. + */ +#define IGC_TXOFFSET_SPEED_10 0x000034BC +#define IGC_TXOFFSET_SPEED_100 0x00000578 +#define IGC_TXOFFSET_SPEED_1000 0x0000012C +#define IGC_TXOFFSET_SPEED_2500 0x00000578 + /* Time Sync Interrupt Causes */ #define IGC_TSICR_SYS_WRAP BIT(0) /* SYSTIM Wrap around. */ #define IGC_TSICR_TXTS BIT(1) /* Transmit Timestamp. */ diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 8cc077b712ad..5a26a7805ef8 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -839,15 +839,15 @@ static void igc_ethtool_get_stats(struct net_device *netdev, ring = adapter->tx_ring[j]; do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp); + start = u64_stats_fetch_begin(&ring->tx_syncp); data[i] = ring->tx_stats.packets; data[i + 1] = ring->tx_stats.bytes; data[i + 2] = ring->tx_stats.restart_queue; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp2); + start = u64_stats_fetch_begin(&ring->tx_syncp2); restart2 = ring->tx_stats.restart_queue2; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp2, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp2, start)); data[i + 2] += restart2; i += IGC_TX_QUEUE_STATS_LEN; @@ -855,13 +855,13 @@ static void igc_ethtool_get_stats(struct net_device *netdev, for (j = 0; j < adapter->num_rx_queues; j++) { ring = adapter->rx_ring[j]; do { - start = u64_stats_fetch_begin_irq(&ring->rx_syncp); + start = u64_stats_fetch_begin(&ring->rx_syncp); data[i] = ring->rx_stats.packets; data[i + 1] = ring->rx_stats.bytes; data[i + 2] = ring->rx_stats.drops; data[i + 3] = ring->rx_stats.csum_err; data[i + 4] = ring->rx_stats.alloc_failed; - } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); i += IGC_RX_QUEUE_STATS_LEN; } spin_unlock(&adapter->stats64_lock); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 34889be63e78..1586e1e435c6 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -4682,10 +4682,10 @@ void igc_update_stats(struct igc_adapter *adapter) } do { - start = u64_stats_fetch_begin_irq(&ring->rx_syncp); + start = u64_stats_fetch_begin(&ring->rx_syncp); _bytes = ring->rx_stats.bytes; _packets = ring->rx_stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); bytes += _bytes; packets += _packets; } @@ -4699,10 +4699,10 @@ void igc_update_stats(struct igc_adapter *adapter) struct igc_ring *ring = adapter->tx_ring[i]; do { - start = u64_stats_fetch_begin_irq(&ring->tx_syncp); + start = u64_stats_fetch_begin(&ring->tx_syncp); _bytes = ring->tx_stats.bytes; _packets = ring->tx_stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); bytes += _bytes; packets += _packets; } @@ -5381,6 +5381,13 @@ static void igc_watchdog_task(struct work_struct *work) break; } + /* Once the launch time has been set on the wire, there + * is a delay before the link speed can be determined + * based on link-up activity. Write into the register + * as soon as we know the correct link speed. + */ + igc_tsn_adjust_txtime_offset(adapter); + if (adapter->link_speed != SPEED_1000) goto no_wait; diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index c0d8214148d1..01c86d36856d 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -224,6 +224,7 @@ /* Transmit Scheduling Registers */ #define IGC_TQAVCTRL 0x3570 #define IGC_TXQCTL(_n) (0x3344 + 0x4 * (_n)) +#define IGC_GTXOFFSET 0x3310 #define IGC_BASET_L 0x3314 #define IGC_BASET_H 0x3318 #define IGC_QBVCYCLET 0x331C diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 0fce22de2ab8..f975ed807da1 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -48,6 +48,35 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) return new_flags; } +void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u16 txoffset; + + if (!is_any_launchtime(adapter)) + return; + + switch (adapter->link_speed) { + case SPEED_10: + txoffset = IGC_TXOFFSET_SPEED_10; + break; + case SPEED_100: + txoffset = IGC_TXOFFSET_SPEED_100; + break; + case SPEED_1000: + txoffset = IGC_TXOFFSET_SPEED_1000; + break; + case SPEED_2500: + txoffset = IGC_TXOFFSET_SPEED_2500; + break; + default: + txoffset = 0; + break; + } + + wr32(IGC_GTXOFFSET, txoffset); +} + /* Returns the TSN specific registers to their default values after * the adapter is reset. */ @@ -57,6 +86,7 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) u32 tqavctrl; int i; + wr32(IGC_GTXOFFSET, 0); wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT); diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h index 1512307f5a52..b53e6af560b7 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.h +++ b/drivers/net/ethernet/intel/igc/igc_tsn.h @@ -6,5 +6,6 @@ int igc_tsn_offload_apply(struct igc_adapter *adapter); int igc_tsn_reset(struct igc_adapter *adapter); +void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter); #endif /* _IGC_BASE_H */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 5369a97ff5ec..bc68b8f2176d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -39,7 +39,10 @@ /* TX/RX descriptor defines */ #define IXGBE_DEFAULT_TXD 512 #define IXGBE_DEFAULT_TX_WORK 256 -#define IXGBE_MAX_TXD 4096 +#define IXGBE_MAX_TXD_82598 4096 +#define IXGBE_MAX_TXD_82599 8192 +#define IXGBE_MAX_TXD_X540 8192 +#define IXGBE_MAX_TXD_X550 32768 #define IXGBE_MIN_TXD 64 #if (PAGE_SIZE < 8192) @@ -47,7 +50,10 @@ #else #define IXGBE_DEFAULT_RXD 128 #endif -#define IXGBE_MAX_RXD 4096 +#define IXGBE_MAX_RXD_82598 4096 +#define IXGBE_MAX_RXD_82599 8192 +#define IXGBE_MAX_RXD_X540 8192 +#define IXGBE_MAX_RXD_X550 32768 #define IXGBE_MIN_RXD 64 /* flow control */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index e88e3dfac8c2..6cfc9dc16537 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1117,6 +1117,42 @@ static void ixgbe_get_drvinfo(struct net_device *netdev, drvinfo->n_priv_flags = IXGBE_PRIV_FLAGS_STR_LEN; } +static u32 ixgbe_get_max_rxd(struct ixgbe_adapter *adapter) +{ + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + return IXGBE_MAX_RXD_82598; + case ixgbe_mac_82599EB: + return IXGBE_MAX_RXD_82599; + case ixgbe_mac_X540: + return IXGBE_MAX_RXD_X540; + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + return IXGBE_MAX_RXD_X550; + default: + return IXGBE_MAX_RXD_82598; + } +} + +static u32 ixgbe_get_max_txd(struct ixgbe_adapter *adapter) +{ + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + return IXGBE_MAX_TXD_82598; + case ixgbe_mac_82599EB: + return IXGBE_MAX_TXD_82599; + case ixgbe_mac_X540: + return IXGBE_MAX_TXD_X540; + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + return IXGBE_MAX_TXD_X550; + default: + return IXGBE_MAX_TXD_82598; + } +} + static void ixgbe_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, struct kernel_ethtool_ringparam *kernel_ring, @@ -1126,8 +1162,8 @@ static void ixgbe_get_ringparam(struct net_device *netdev, struct ixgbe_ring *tx_ring = adapter->tx_ring[0]; struct ixgbe_ring *rx_ring = adapter->rx_ring[0]; - ring->rx_max_pending = IXGBE_MAX_RXD; - ring->tx_max_pending = IXGBE_MAX_TXD; + ring->rx_max_pending = ixgbe_get_max_rxd(adapter); + ring->tx_max_pending = ixgbe_get_max_txd(adapter); ring->rx_pending = rx_ring->count; ring->tx_pending = tx_ring->count; } @@ -1146,11 +1182,11 @@ static int ixgbe_set_ringparam(struct net_device *netdev, return -EINVAL; new_tx_count = clamp_t(u32, ring->tx_pending, - IXGBE_MIN_TXD, IXGBE_MAX_TXD); + IXGBE_MIN_TXD, ixgbe_get_max_txd(adapter)); new_tx_count = ALIGN(new_tx_count, IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE); new_rx_count = clamp_t(u32, ring->rx_pending, - IXGBE_MIN_RXD, IXGBE_MAX_RXD); + IXGBE_MIN_RXD, ixgbe_get_max_rxd(adapter)); new_rx_count = ALIGN(new_rx_count, IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE); if ((new_tx_count == adapter->tx_ring_count) && @@ -1335,10 +1371,10 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, } do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); data[i] = ring->stats.packets; data[i+1] = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); i += 2; } for (j = 0; j < IXGBE_NUM_RX_QUEUES; j++) { @@ -1351,10 +1387,10 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, } do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); data[i] = ring->stats.packets; data[i+1] = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); i += 2; } @@ -1960,18 +1996,13 @@ static bool ixgbe_check_lbtest_frame(struct ixgbe_rx_buffer *rx_buffer, unsigned int frame_size) { unsigned char *data; - bool match = true; frame_size >>= 1; data = page_address(rx_buffer->page) + rx_buffer->page_offset; - if (data[3] != 0xFF || - data[frame_size + 10] != 0xBE || - data[frame_size + 12] != 0xAF) - match = false; - - return match; + return data[3] == 0xFF && data[frame_size + 10] == 0xBE && + data[frame_size + 12] == 0xAF; } static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 774de63dd93a..53a969e34883 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -585,6 +585,11 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs) return -EINVAL; } + if (xs->xso.type != XFRM_DEV_OFFLOAD_CRYPTO) { + netdev_err(dev, "Unsupported ipsec offload type\n"); + return -EINVAL; + } + if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) { struct rx_sa rsa; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 298cfbfcb7b6..ab8370c413f3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9041,10 +9041,10 @@ static void ixgbe_get_ring_stats64(struct rtnl_link_stats64 *stats, if (ring) { do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; } @@ -9064,10 +9064,10 @@ static void ixgbe_get_stats64(struct net_device *netdev, if (ring) { do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index f8605f57bd06..0310af851086 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -451,21 +451,11 @@ static int ixgbe_ptp_adjfine_82599(struct ptp_clock_info *ptp, long scaled_ppm) struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); struct ixgbe_hw *hw = &adapter->hw; - u64 incval, diff; - int neg_adj = 0; - - if (scaled_ppm < 0) { - neg_adj = 1; - scaled_ppm = -scaled_ppm; - } + u64 incval; smp_mb(); incval = READ_ONCE(adapter->base_incval); - - diff = mul_u64_u64_div_u64(incval, scaled_ppm, - 1000000ULL << 16); - - incval = neg_adj ? (incval - diff) : (incval + diff); + incval = adjust_by_scaled_ppm(incval, scaled_ppm); switch (hw->mac.type) { case ixgbe_mac_X540: @@ -502,17 +492,11 @@ static int ixgbe_ptp_adjfine_X550(struct ptp_clock_info *ptp, long scaled_ppm) struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); struct ixgbe_hw *hw = &adapter->hw; - int neg_adj = 0; + bool neg_adj; u64 rate; u32 inca; - if (scaled_ppm < 0) { - neg_adj = 1; - scaled_ppm = -scaled_ppm; - } - - rate = mul_u64_u64_div_u64(IXGBE_X550_BASE_PERIOD, scaled_ppm, - 1000000ULL << 16); + neg_adj = diff_by_scaled_ppm(IXGBE_X550_BASE_PERIOD, scaled_ppm, &rate); /* warn if rate is too large */ if (rate >= INCVALUE_MASK) @@ -1318,7 +1302,7 @@ static void ixgbe_ptp_init_systime(struct ixgbe_adapter *adapter) default: /* Other devices aren't supported */ return; - }; + } IXGBE_WRITE_FLUSH(hw); } diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index ccfa6b91aac6..296915414a7c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -458,10 +458,10 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, } do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); data[i] = ring->stats.packets; data[i + 1] = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); i += 2; } @@ -475,10 +475,10 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, } do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); data[i] = ring->stats.packets; data[i + 1] = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); i += 2; } @@ -492,10 +492,10 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, } do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); data[i] = ring->stats.packets; data[i + 1] = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); i += 2; } } diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c index 9984ebc62d78..c1cf540d162a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c +++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c @@ -280,6 +280,11 @@ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs) return -EINVAL; } + if (xs->xso.type != XFRM_DEV_OFFLOAD_CRYPTO) { + netdev_err(dev, "Unsupported ipsec offload type\n"); + return -EINVAL; + } + if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) { struct rx_sa rsa; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index e338fa572793..ea0a230c1153 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2044,12 +2044,16 @@ static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, spin_unlock_bh(&adapter->mbx_lock); - /* translate error return types so error makes sense */ - if (err == IXGBE_ERR_MBX) - return -EIO; + if (err) { + netdev_err(netdev, "VF could not set VLAN %d\n", vid); + + /* translate error return types so error makes sense */ + if (err == IXGBE_ERR_MBX) + return -EIO; - if (err == IXGBE_ERR_INVALID_ARGUMENT) - return -EACCES; + if (err == IXGBE_ERR_INVALID_ARGUMENT) + return -EACCES; + } set_bit(vid, adapter->active_vlans); @@ -2070,6 +2074,9 @@ static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, spin_unlock_bh(&adapter->mbx_lock); + if (err) + netdev_err(netdev, "Could not remove VLAN %d\n", vid); + clear_bit(vid, adapter->active_vlans); return err; @@ -4350,10 +4357,10 @@ static void ixgbevf_get_tx_ring_stats(struct rtnl_link_stats64 *stats, if (ring) { do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); bytes = ring->stats.bytes; packets = ring->stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->tx_bytes += bytes; stats->tx_packets += packets; } @@ -4376,10 +4383,10 @@ static void ixgbevf_get_stats(struct net_device *netdev, for (i = 0; i < adapter->num_rx_queues; i++) { ring = adapter->rx_ring[i]; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); + start = u64_stats_fetch_begin(&ring->syncp); bytes = ring->stats.bytes; packets = ring->stats.packets; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + } while (u64_stats_fetch_retry(&ring->syncp, start)); stats->rx_bytes += bytes; stats->rx_packets += packets; } |