diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-19 20:05:34 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-19 20:05:34 +0300 |
commit | 1200b6809dfd9d73bc4c7db76d288c35fa4b2ebe (patch) | |
tree | 552e03de245cdbd0780ca1215914edc4a26540f7 /drivers/net/ethernet/intel | |
parent | 6b5f04b6cf8ebab9a65d9c0026c650bb2538fd0f (diff) | |
parent | fe30937b65354c7fec244caebbdaae68e28ca797 (diff) | |
download | linux-1200b6809dfd9d73bc4c7db76d288c35fa4b2ebe.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
1) Support more Realtek wireless chips, from Jes Sorenson.
2) New BPF types for per-cpu hash and arrap maps, from Alexei
Starovoitov.
3) Make several TCP sysctls per-namespace, from Nikolay Borisov.
4) Allow the use of SO_REUSEPORT in order to do per-thread processing
of incoming TCP/UDP connections. The muxing can be done using a
BPF program which hashes the incoming packet. From Craig Gallek.
5) Add a multiplexer for TCP streams, to provide a messaged based
interface. BPF programs can be used to determine the message
boundaries. From Tom Herbert.
6) Add 802.1AE MACSEC support, from Sabrina Dubroca.
7) Avoid factorial complexity when taking down an inetdev interface
with lots of configured addresses. We were doing things like
traversing the entire address less for each address removed, and
flushing the entire netfilter conntrack table for every address as
well.
8) Add and use SKB bulk free infrastructure, from Jesper Brouer.
9) Allow offloading u32 classifiers to hardware, and implement for
ixgbe, from John Fastabend.
10) Allow configuring IRQ coalescing parameters on a per-queue basis,
from Kan Liang.
11) Extend ethtool so that larger link mode masks can be supported.
From David Decotigny.
12) Introduce devlink, which can be used to configure port link types
(ethernet vs Infiniband, etc.), port splitting, and switch device
level attributes as a whole. From Jiri Pirko.
13) Hardware offload support for flower classifiers, from Amir Vadai.
14) Add "Local Checksum Offload". Basically, for a tunneled packet
the checksum of the outer header is 'constant' (because with the
checksum field filled into the inner protocol header, the payload
of the outer frame checksums to 'zero'), and we can take advantage
of that in various ways. From Edward Cree"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1548 commits)
bonding: fix bond_get_stats()
net: bcmgenet: fix dma api length mismatch
net/mlx4_core: Fix backward compatibility on VFs
phy: mdio-thunder: Fix some Kconfig typos
lan78xx: add ndo_get_stats64
lan78xx: handle statistics counter rollover
RDS: TCP: Remove unused constant
RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket
net: smc911x: convert pxa dma to dmaengine
team: remove duplicate set of flag IFF_MULTICAST
bonding: remove duplicate set of flag IFF_MULTICAST
net: fix a comment typo
ethernet: micrel: fix some error codes
ip_tunnels, bpf: define IP_TUNNEL_OPTS_MAX and use it
bpf, dst: add and use dst_tclassid helper
bpf: make skb->tc_classid also readable
net: mvneta: bm: clarify dependencies
cls_bpf: reset class and reuse major in da
ldmvsw: Checkpatch sunvnet.c and sunvnet_common.c
ldmvsw: Add ldmvsw.c driver code
...
Diffstat (limited to 'drivers/net/ethernet/intel')
52 files changed, 4297 insertions, 2123 deletions
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index b3949d5bef5c..4e733bf1a38e 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -92,6 +92,10 @@ struct e1000_hw; #define E1000_DEV_ID_PCH_SPT_I219_LM2 0x15B7 /* SPT-H PCH */ #define E1000_DEV_ID_PCH_SPT_I219_V2 0x15B8 /* SPT-H PCH */ #define E1000_DEV_ID_PCH_LBG_I219_LM3 0x15B9 /* LBG PCH */ +#define E1000_DEV_ID_PCH_SPT_I219_LM4 0x15D7 +#define E1000_DEV_ID_PCH_SPT_I219_V4 0x15D8 +#define E1000_DEV_ID_PCH_SPT_I219_LM5 0x15E3 +#define E1000_DEV_ID_PCH_SPT_I219_V5 0x15D6 #define E1000_REVISION_4 4 diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index a049e30639a1..c0f4887ea44d 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1252,9 +1252,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) ew32(H2ME, mac_reg); } - /* Poll up to 100msec for ME to clear ULP_CFG_DONE */ + /* Poll up to 300msec for ME to clear ULP_CFG_DONE. */ while (er32(FWSM) & E1000_FWSM_ULP_CFG_DONE) { - if (i++ == 10) { + if (i++ == 30) { ret_val = -E1000_ERR_PHY; goto out; } @@ -1328,6 +1328,8 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) I218_ULP_CONFIG1_RESET_TO_SMBUS | I218_ULP_CONFIG1_WOL_HOST | I218_ULP_CONFIG1_INBAND_EXIT | + I218_ULP_CONFIG1_EN_ULP_LANPHYPC | + I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST | I218_ULP_CONFIG1_DISABLE_SMB_PERST); e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); @@ -1433,6 +1435,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) emi_addr = I217_RX_CONFIG; ret_val = e1000_write_emi_reg_locked(hw, emi_addr, emi_val); + if (hw->mac.type == e1000_pch_lpt || + hw->mac.type == e1000_pch_spt) { + u16 phy_reg; + + e1e_rphy_locked(hw, I217_PLL_CLOCK_GATE_REG, &phy_reg); + phy_reg &= ~I217_PLL_CLOCK_GATE_MASK; + if (speed == SPEED_100 || speed == SPEED_10) + phy_reg |= 0x3E8; + else + phy_reg |= 0xFA; + e1e_wphy_locked(hw, I217_PLL_CLOCK_GATE_REG, phy_reg); + } hw->phy.ops.release(hw); if (ret_val) @@ -1467,6 +1481,18 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) hw->phy.ops.release(hw); if (ret_val) return ret_val; + } else { + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1e_wphy_locked(hw, + PHY_REG(776, 20), + 0xC023); + hw->phy.ops.release(hw); + if (ret_val) + return ret_val; + } } } diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 34c551e322eb..2311f6003f58 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -188,6 +188,10 @@ #define I218_ULP_CONFIG1_INBAND_EXIT 0x0020 /* Inband on ULP exit */ #define I218_ULP_CONFIG1_WOL_HOST 0x0040 /* WoL Host on ULP exit */ #define I218_ULP_CONFIG1_RESET_TO_SMBUS 0x0100 /* Reset to SMBus mode */ +/* enable ULP even if when phy powered down via lanphypc */ +#define I218_ULP_CONFIG1_EN_ULP_LANPHYPC 0x0400 +/* disable clear of sticky ULP on PERST */ +#define I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST 0x0800 #define I218_ULP_CONFIG1_DISABLE_SMB_PERST 0x1000 /* Disable on PERST# */ /* SMBus Address Phy Register */ @@ -226,6 +230,9 @@ #define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA 0x100 #define HV_PM_CTRL_K1_ENABLE 0x4000 +#define I217_PLL_CLOCK_GATE_REG PHY_REG(772, 28) +#define I217_PLL_CLOCK_GATE_MASK 0x07FF + #define SW_FLAG_TIMEOUT 1000 /* SW Semaphore flag timeout in ms */ /* Inband Control */ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index c71ba1bfc1ec..9b4ec13d9161 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -7452,6 +7452,10 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM2), board_pch_spt }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V2), board_pch_spt }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LBG_I219_LM3), board_pch_spt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM4), board_pch_spt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V4), board_pch_spt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM5), board_pch_spt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V5), board_pch_spt }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index b4547ebed774..4de17db3808c 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1937,8 +1937,10 @@ static void fm10k_init_reta(struct fm10k_intfc *interface) u16 i, rss_i = interface->ring_feature[RING_F_RSS].indices; u32 reta, base; - /* If the netdev is initialized we have to maintain table if possible */ - if (interface->netdev->reg_state != NETREG_UNINITIALIZED) { + /* If the Rx flow indirection table has been configured manually, we + * need to maintain it when possible. + */ + if (netif_is_rxfh_configured(interface->netdev)) { for (i = FM10K_RETA_SIZE; i--;) { reta = interface->reta[i]; if ((((reta << 24) >> 24) < rss_i) && @@ -1946,6 +1948,10 @@ static void fm10k_init_reta(struct fm10k_intfc *interface) (((reta << 8) >> 24) < rss_i) && (((reta) >> 24) < rss_i)) continue; + + /* this should never happen */ + dev_err(&interface->pdev->dev, + "RSS indirection table assigned flows out of queue bounds. Reconfiguring.\n"); goto repopulate_reta; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 662569d5b7c0..d09a8dd71fc2 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1204,6 +1204,15 @@ err_queueing_scheme: return err; } +static int __fm10k_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + if (tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + + return fm10k_setup_tc(dev, tc->tc); +} + static int fm10k_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { switch (cmd) { @@ -1386,7 +1395,7 @@ static const struct net_device_ops fm10k_netdev_ops = { .ndo_vlan_rx_kill_vid = fm10k_vlan_rx_kill_vid, .ndo_set_rx_mode = fm10k_set_rx_mode, .ndo_get_stats64 = fm10k_get_stats64, - .ndo_setup_tc = fm10k_setup_tc, + .ndo_setup_tc = __fm10k_setup_tc, .ndo_set_vf_mac = fm10k_ndo_set_vf_mac, .ndo_set_vf_vlan = fm10k_ndo_set_vf_vlan, .ndo_set_vf_rate = fm10k_ndo_set_vf_bw, diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 68f2204ec6f3..2f6210ae8ba0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -64,9 +64,6 @@ #include "i40e_dcb.h" /* Useful i40e defaults */ -#define I40E_BASE_PF_SEID 16 -#define I40E_BASE_VSI_SEID 512 -#define I40E_BASE_VEB_SEID 288 #define I40E_MAX_VEB 16 #define I40E_MAX_NUM_DESCRIPTORS 4096 @@ -104,6 +101,7 @@ #define I40E_PRIV_FLAGS_FD_ATR BIT(2) #define I40E_PRIV_FLAGS_VEB_STATS BIT(3) #define I40E_PRIV_FLAGS_PS BIT(4) +#define I40E_PRIV_FLAGS_HW_ATR_EVICT BIT(5) #define I40E_NVM_VERSION_LO_SHIFT 0 #define I40E_NVM_VERSION_LO_MASK (0xff << I40E_NVM_VERSION_LO_SHIFT) @@ -113,6 +111,7 @@ #define I40E_OEM_VER_PATCH_MASK 0xff #define I40E_OEM_VER_BUILD_SHIFT 8 #define I40E_OEM_VER_SHIFT 24 +#define I40E_PHY_DEBUG_PORT BIT(4) /* The values in here are decimal coded as hex as is the case in the NVM map*/ #define I40E_CURRENT_NVM_VERSION_HI 0x2 @@ -137,6 +136,19 @@ /* default to trying for four seconds */ #define I40E_TRY_LINK_TIMEOUT (4 * HZ) +/** + * i40e_is_mac_710 - Return true if MAC is X710/XL710 + * @hw: ptr to the hardware info + **/ +static inline bool i40e_is_mac_710(struct i40e_hw *hw) +{ + if ((hw->mac.type == I40E_MAC_X710) || + (hw->mac.type == I40E_MAC_XL710)) + return true; + + return false; +} + /* driver state flags */ enum i40e_state_t { __I40E_TESTING, @@ -339,6 +351,12 @@ struct i40e_pf { #define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40) #define I40E_FLAG_GENEVE_OFFLOAD_CAPABLE BIT_ULL(41) #define I40E_FLAG_NO_PCI_LINK_CHECK BIT_ULL(42) +#define I40E_FLAG_100M_SGMII_CAPABLE BIT_ULL(43) +#define I40E_FLAG_RESTART_AUTONEG BIT_ULL(44) +#define I40E_FLAG_NO_DCB_SUPPORT BIT_ULL(45) +#define I40E_FLAG_USE_SET_LLDP_MIB BIT_ULL(46) +#define I40E_FLAG_STOP_FW_LLDP BIT_ULL(47) +#define I40E_FLAG_HAVE_10GBASET_PHY BIT_ULL(48) #define I40E_FLAG_PF_MAC BIT_ULL(50) /* tracks features that get auto disabled by errors */ @@ -391,6 +409,7 @@ struct i40e_pf { struct i40e_vf *vf; int num_alloc_vfs; /* actual number of VFs allocated */ u32 vf_aq_requests; + u32 arq_overflows; /* Not fatal, possibly indicative of problems */ /* DCBx/DCBNL capability for PF that indicates * whether DCBx is managed by firmware or host @@ -423,6 +442,7 @@ struct i40e_pf { u32 ioremap_len; u32 fd_inv; + u16 phy_led_val; }; struct i40e_mac_filter { @@ -492,6 +512,7 @@ struct i40e_vsi { u32 tx_busy; u64 tx_linearize; u64 tx_force_wb; + u64 tx_lost_interrupt; u32 rx_buf_failed; u32 rx_page_failed; @@ -500,13 +521,6 @@ struct i40e_vsi { struct i40e_ring **tx_rings; u16 work_limit; - /* high bit set means dynamic, use accessor routines to read/write. - * hardware only supports 2us resolution for the ITR registers. - * these values always store the USER setting, and must be converted - * before programming to a register. - */ - u16 rx_itr_setting; - u16 tx_itr_setting; u16 int_rate_limit; /* value in usecs */ u16 rss_table_size; /* HW RSS table size */ @@ -747,6 +761,9 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector) struct i40e_hw *hw = &pf->hw; u32 val; + /* definitely clear the PBA here, as this function is meant to + * clean out all previous interrupts AND enable the interrupt + */ val = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); @@ -754,9 +771,8 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector) /* skip the flush */ } -void i40e_irq_dynamic_disable(struct i40e_vsi *vsi, int vector); void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf); -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf); +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba); #ifdef I40E_FCOE struct rtnl_link_stats64 *i40e_get_netdev_stats_struct( struct net_device *netdev, @@ -786,7 +802,8 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr, bool is_vf, bool is_netdev); #ifdef I40E_FCOE int i40e_close(struct net_device *netdev); -int i40e_setup_tc(struct net_device *netdev, u8 tc); +int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc); void i40e_netpoll(struct net_device *netdev); int i40e_fcoe_enable(struct net_device *netdev); int i40e_fcoe_disable(struct net_device *netdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 1fd5ea82a9bc..df8e2fd6a649 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -953,6 +953,9 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, u16 flags; u16 ntu; + /* pre-clean the event info */ + memset(&e->desc, 0, sizeof(e->desc)); + /* take the lock before we start messing with the ring */ mutex_lock(&hw->aq.arq_mutex); @@ -1020,14 +1023,6 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, hw->aq.arq.next_to_clean = ntc; hw->aq.arq.next_to_use = ntu; -clean_arq_element_out: - /* Set pending if needed, unlock and return */ - if (pending != NULL) - *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc); - -clean_arq_element_err: - mutex_unlock(&hw->aq.arq_mutex); - if (i40e_is_nvm_update_op(&e->desc)) { if (hw->aq.nvm_release_on_done) { i40e_release_nvm(hw); @@ -1048,6 +1043,13 @@ clean_arq_element_err: } } +clean_arq_element_out: + /* Set pending if needed, unlock and return */ + if (pending) + *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc); +clean_arq_element_err: + mutex_unlock(&hw->aq.arq_mutex); + return ret_code; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index b22012a446a6..8d5c65ab6267 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -34,7 +34,7 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0004 +#define I40E_FW_API_VERSION_MINOR 0x0005 struct i40e_aq_desc { __le16 flags; @@ -145,6 +145,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_remove_statistics = 0x0202, i40e_aqc_opc_set_port_parameters = 0x0203, i40e_aqc_opc_get_switch_resource_alloc = 0x0204, + i40e_aqc_opc_set_switch_config = 0x0205, + i40e_aqc_opc_rx_ctl_reg_read = 0x0206, + i40e_aqc_opc_rx_ctl_reg_write = 0x0207, i40e_aqc_opc_add_vsi = 0x0210, i40e_aqc_opc_update_vsi_parameters = 0x0211, @@ -220,6 +223,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_get_phy_wol_caps = 0x0621, i40e_aqc_opc_set_phy_debug = 0x0622, i40e_aqc_opc_upload_ext_phy_fm = 0x0625, + i40e_aqc_opc_run_phy_activity = 0x0626, /* NVM commands */ i40e_aqc_opc_nvm_read = 0x0701, @@ -228,6 +232,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_nvm_config_read = 0x0704, i40e_aqc_opc_nvm_config_write = 0x0705, i40e_aqc_opc_oem_post_update = 0x0720, + i40e_aqc_opc_thermal_sensor = 0x0721, /* virtualization commands */ i40e_aqc_opc_send_msg_to_pf = 0x0801, @@ -402,6 +407,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_OS2BMC_CAP 0x0004 #define I40E_AQ_CAP_ID_FUNCTIONS_VALID 0x0005 #define I40E_AQ_CAP_ID_ALTERNATE_RAM 0x0006 +#define I40E_AQ_CAP_ID_WOL_AND_PROXY 0x0008 #define I40E_AQ_CAP_ID_SRIOV 0x0012 #define I40E_AQ_CAP_ID_VF 0x0013 #define I40E_AQ_CAP_ID_VMDQ 0x0014 @@ -422,6 +428,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_LED 0x0061 #define I40E_AQ_CAP_ID_SDP 0x0062 #define I40E_AQ_CAP_ID_MDIO 0x0063 +#define I40E_AQ_CAP_ID_WSR_PROT 0x0064 #define I40E_AQ_CAP_ID_FLEX10 0x00F1 #define I40E_AQ_CAP_ID_CEM 0x00F2 @@ -680,6 +687,31 @@ struct i40e_aqc_switch_resource_alloc_element_resp { I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp); +/* Set Switch Configuration (direct 0x0205) */ +struct i40e_aqc_set_switch_config { + __le16 flags; +#define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 +#define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 + __le16 valid_flags; + u8 reserved[12]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); + +/* Read Receive control registers (direct 0x0206) + * Write Receive control registers (direct 0x0207) + * used for accessing Rx control registers that can be + * slow and need special handling when under high Rx load + */ +struct i40e_aqc_rx_ctl_reg_read_write { + __le32 reserved1; + __le32 address; + __le32 reserved2; + __le32 value; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_rx_ctl_reg_read_write); + /* Add VSI (indirect 0x0210) * this indirect command uses struct i40e_aqc_vsi_properties_data * as the indirect buffer (128 bytes) @@ -906,7 +938,8 @@ struct i40e_aqc_add_veb { I40E_AQC_ADD_VEB_PORT_TYPE_SHIFT) #define I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT 0x2 #define I40E_AQC_ADD_VEB_PORT_TYPE_DATA 0x4 -#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8 +#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8 /* deprecated */ +#define I40E_AQC_ADD_VEB_ENABLE_DISABLE_STATS 0x10 u8 enable_tcs; u8 reserved[9]; }; @@ -973,6 +1006,7 @@ struct i40e_aqc_add_macvlan_element_data { #define I40E_AQC_MACVLAN_ADD_HASH_MATCH 0x0002 #define I40E_AQC_MACVLAN_ADD_IGNORE_VLAN 0x0004 #define I40E_AQC_MACVLAN_ADD_TO_QUEUE 0x0008 +#define I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC 0x0010 __le16 queue_number; #define I40E_AQC_MACVLAN_CMD_QUEUE_SHIFT 0 #define I40E_AQC_MACVLAN_CMD_QUEUE_MASK (0x7FF << \ @@ -1069,6 +1103,7 @@ struct i40e_aqc_set_vsi_promiscuous_modes { #define I40E_AQC_SET_VSI_PROMISC_BROADCAST 0x04 #define I40E_AQC_SET_VSI_DEFAULT 0x08 #define I40E_AQC_SET_VSI_PROMISC_VLAN 0x10 +#define I40E_AQC_SET_VSI_PROMISC_TX 0x8000 __le16 seid; #define I40E_AQC_VSI_PROM_CMD_SEID_MASK 0x3FF __le16 vlan_tag; @@ -1257,10 +1292,16 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { #define I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT 9 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK 0x1E00 -#define I40E_AQC_ADD_CLOUD_TNL_TYPE_XVLAN 0 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN 0 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_NVGRE_OMAC 1 -#define I40E_AQC_ADD_CLOUD_TNL_TYPE_NGE 2 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE 2 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_IP 3 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_RESERVED 4 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN_GPE 5 + +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_MAC 0x2000 +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_INNER_MAC 0x4000 +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_IP 0x8000 __le32 tenant_id; u8 reserved[4]; @@ -1755,7 +1796,12 @@ struct i40e_aqc_get_link_status { u8 config; #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 reserved[5]; + u8 external_power_ability; +#define I40E_AQ_LINK_POWER_CLASS_1 0x00 +#define I40E_AQ_LINK_POWER_CLASS_2 0x01 +#define I40E_AQ_LINK_POWER_CLASS_3 0x02 +#define I40E_AQ_LINK_POWER_CLASS_4 0x03 + u8 reserved[4]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status); @@ -1823,6 +1869,18 @@ enum i40e_aq_phy_reg_type { I40E_AQC_PHY_REG_EXERNAL_MODULE = 0x3 }; +/* Run PHY Activity (0x0626) */ +struct i40e_aqc_run_phy_activity { + __le16 activity_id; + u8 flags; + u8 reserved1; + __le32 control; + __le32 data; + u8 reserved2[4]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity); + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) @@ -1912,6 +1970,22 @@ struct i40e_aqc_nvm_oem_post_update_buffer { I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer); +/* Thermal Sensor (indirect 0x0721) + * read or set thermal sensor configs and values + * takes a sensor and command specific data buffer, not detailed here + */ +struct i40e_aqc_thermal_sensor { + u8 sensor_action; +#define I40E_AQ_THERMAL_SENSOR_READ_CONFIG 0 +#define I40E_AQ_THERMAL_SENSOR_SET_CONFIG 1 +#define I40E_AQ_THERMAL_SENSOR_READ_TEMP 2 + u8 reserved[7]; + __le32 addr_high; + __le32 addr_low; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_thermal_sensor); + /* Send to PF command (indirect 0x0801) id is only used by PF * Send to VF command (indirect 0x0802) id is only used by PF * Send to Peer PF command (indirect 0x0803) @@ -2191,6 +2265,7 @@ struct i40e_aqc_add_udp_tunnel { #define I40E_AQC_TUNNEL_TYPE_VXLAN 0x00 #define I40E_AQC_TUNNEL_TYPE_NGE 0x01 #define I40E_AQC_TUNNEL_TYPE_TEREDO 0x10 +#define I40E_AQC_TUNNEL_TYPE_VXLAN_GPE 0x11 u8 reserved1[10]; }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 6a034ddac36a..4596294c2ab1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -55,19 +55,13 @@ static i40e_status i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_20G_KR2_A: hw->mac.type = I40E_MAC_XL710; break; + case I40E_DEV_ID_KX_X722: + case I40E_DEV_ID_QSFP_X722: case I40E_DEV_ID_SFP_X722: case I40E_DEV_ID_1G_BASE_T_X722: case I40E_DEV_ID_10G_BASE_T_X722: hw->mac.type = I40E_MAC_X722; break; - case I40E_DEV_ID_X722_VF: - case I40E_DEV_ID_X722_VF_HV: - hw->mac.type = I40E_MAC_X722_VF; - break; - case I40E_DEV_ID_VF: - case I40E_DEV_ID_VF_HV: - hw->mac.type = I40E_MAC_VF; - break; default: hw->mac.type = I40E_MAC_GENERIC; break; @@ -1245,7 +1239,13 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw) grst_del = (rd32(hw, I40E_GLGEN_RSTCTL) & I40E_GLGEN_RSTCTL_GRSTDEL_MASK) >> I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT; - for (cnt = 0; cnt < grst_del + 10; cnt++) { + + /* It can take upto 15 secs for GRST steady state. + * Bump it to 16 secs max to be safe. + */ + grst_del = grst_del * 20; + + for (cnt = 0; cnt < grst_del; cnt++) { reg = rd32(hw, I40E_GLGEN_RSTAT); if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK)) break; @@ -1894,6 +1894,32 @@ i40e_status i40e_aq_set_phy_int_mask(struct i40e_hw *hw, } /** + * i40e_aq_set_phy_debug + * @hw: pointer to the hw struct + * @cmd_flags: debug command flags + * @cmd_details: pointer to command details structure or NULL + * + * Reset the external PHY. + **/ +enum i40e_status_code i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_phy_debug *cmd = + (struct i40e_aqc_set_phy_debug *)&desc.params.raw; + enum i40e_status_code status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_phy_debug); + + cmd->command_flags = cmd_flags; + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** * i40e_aq_add_vsi * @hw: pointer to the hw struct * @vsi_ctx: pointer to a vsi context struct @@ -1958,12 +1984,19 @@ i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw, i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_vsi_promiscuous_modes); - if (set) + if (set) { flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST; + if (((hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver >= 5)) || + (hw->aq.api_maj_ver > 1)) + flags |= I40E_AQC_SET_VSI_PROMISC_TX; + } cmd->promiscuous_flags = cpu_to_le16(flags); cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST); + if (((hw->aq.api_maj_ver >= 1) && (hw->aq.api_min_ver >= 5)) || + (hw->aq.api_maj_ver > 1)) + cmd->valid_flags |= cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_TX); cmd->seid = cpu_to_le16(seid); status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); @@ -2039,6 +2072,37 @@ i40e_status i40e_aq_set_vsi_broadcast(struct i40e_hw *hw, } /** + * i40e_aq_set_vsi_vlan_promisc - control the VLAN promiscuous setting + * @hw: pointer to the hw struct + * @seid: vsi number + * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN + * @cmd_details: pointer to command details structure or NULL + **/ +i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, + u16 seid, bool enable, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw; + i40e_status status; + u16 flags = 0; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_vsi_promiscuous_modes); + if (enable) + flags |= I40E_AQC_SET_VSI_PROMISC_VLAN; + + cmd->promiscuous_flags = cpu_to_le16(flags); + cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_VLAN); + cmd->seid = cpu_to_le16(seid); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** * i40e_get_vsi_params - get VSI configuration info * @hw: pointer to the hw struct * @vsi_ctx: pointer to a vsi context struct @@ -2283,8 +2347,8 @@ i40e_status i40e_update_link_info(struct i40e_hw *hw) * @downlink_seid: the VSI SEID * @enabled_tc: bitmap of TCs to be enabled * @default_port: true for default port VSI, false for control port - * @enable_l2_filtering: true to add L2 filter table rules to regular forwarding rules for cloud support * @veb_seid: pointer to where to put the resulting VEB SEID + * @enable_stats: true to turn on VEB stats * @cmd_details: pointer to command details structure or NULL * * This asks the FW to add a VEB between the uplink and downlink @@ -2292,8 +2356,8 @@ i40e_status i40e_update_link_info(struct i40e_hw *hw) **/ i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid, u16 downlink_seid, u8 enabled_tc, - bool default_port, bool enable_l2_filtering, - u16 *veb_seid, + bool default_port, u16 *veb_seid, + bool enable_stats, struct i40e_asq_cmd_details *cmd_details) { struct i40e_aq_desc desc; @@ -2320,8 +2384,9 @@ i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid, else veb_flags |= I40E_AQC_ADD_VEB_PORT_TYPE_DATA; - if (enable_l2_filtering) - veb_flags |= I40E_AQC_ADD_VEB_ENABLE_L2_FILTER; + /* reverse logic here: set the bitflag to disable the stats */ + if (!enable_stats) + veb_flags |= I40E_AQC_ADD_VEB_ENABLE_DISABLE_STATS; cmd->veb_flags = cpu_to_le16(veb_flags); @@ -2410,6 +2475,7 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, (struct i40e_aqc_macvlan *)&desc.params.raw; i40e_status status; u16 buf_size; + int i; if (count == 0 || !mv_list || !hw) return I40E_ERR_PARAM; @@ -2423,12 +2489,17 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid, cmd->seid[1] = 0; cmd->seid[2] = 0; + for (i = 0; i < count; i++) + if (is_multicast_ether_addr(mv_list[i].mac_addr)) + mv_list[i].flags |= + cpu_to_le16(I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC); + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); if (buf_size > I40E_AQ_LARGE_BUF) desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); status = i40e_asq_send_command(hw, &desc, mv_list, buf_size, - cmd_details); + cmd_details); return status; } @@ -2476,6 +2547,137 @@ i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid, } /** + * i40e_mirrorrule_op - Internal helper function to add/delete mirror rule + * @hw: pointer to the hw struct + * @opcode: AQ opcode for add or delete mirror rule + * @sw_seid: Switch SEID (to which rule refers) + * @rule_type: Rule Type (ingress/egress/VLAN) + * @id: Destination VSI SEID or Rule ID + * @count: length of the list + * @mr_list: list of mirrored VSI SEIDs or VLAN IDs + * @cmd_details: pointer to command details structure or NULL + * @rule_id: Rule ID returned from FW + * @rule_used: Number of rules used in internal switch + * @rule_free: Number of rules free in internal switch + * + * Add/Delete a mirror rule to a specific switch. Mirror rules are supported for + * VEBs/VEPA elements only + **/ +static i40e_status i40e_mirrorrule_op(struct i40e_hw *hw, + u16 opcode, u16 sw_seid, u16 rule_type, u16 id, + u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rule_id, u16 *rules_used, u16 *rules_free) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_add_delete_mirror_rule *cmd = + (struct i40e_aqc_add_delete_mirror_rule *)&desc.params.raw; + struct i40e_aqc_add_delete_mirror_rule_completion *resp = + (struct i40e_aqc_add_delete_mirror_rule_completion *)&desc.params.raw; + i40e_status status; + u16 buf_size; + + buf_size = count * sizeof(*mr_list); + + /* prep the rest of the request */ + i40e_fill_default_direct_cmd_desc(&desc, opcode); + cmd->seid = cpu_to_le16(sw_seid); + cmd->rule_type = cpu_to_le16(rule_type & + I40E_AQC_MIRROR_RULE_TYPE_MASK); + cmd->num_entries = cpu_to_le16(count); + /* Dest VSI for add, rule_id for delete */ + cmd->destination = cpu_to_le16(id); + if (mr_list) { + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | + I40E_AQ_FLAG_RD)); + if (buf_size > I40E_AQ_LARGE_BUF) + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + } + + status = i40e_asq_send_command(hw, &desc, mr_list, buf_size, + cmd_details); + if (!status || + hw->aq.asq_last_status == I40E_AQ_RC_ENOSPC) { + if (rule_id) + *rule_id = le16_to_cpu(resp->rule_id); + if (rules_used) + *rules_used = le16_to_cpu(resp->mirror_rules_used); + if (rules_free) + *rules_free = le16_to_cpu(resp->mirror_rules_free); + } + return status; +} + +/** + * i40e_aq_add_mirrorrule - add a mirror rule + * @hw: pointer to the hw struct + * @sw_seid: Switch SEID (to which rule refers) + * @rule_type: Rule Type (ingress/egress/VLAN) + * @dest_vsi: SEID of VSI to which packets will be mirrored + * @count: length of the list + * @mr_list: list of mirrored VSI SEIDs or VLAN IDs + * @cmd_details: pointer to command details structure or NULL + * @rule_id: Rule ID returned from FW + * @rule_used: Number of rules used in internal switch + * @rule_free: Number of rules free in internal switch + * + * Add mirror rule. Mirror rules are supported for VEBs or VEPA elements only + **/ +i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid, + u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rule_id, u16 *rules_used, u16 *rules_free) +{ + if (!(rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_INGRESS || + rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_EGRESS)) { + if (count == 0 || !mr_list) + return I40E_ERR_PARAM; + } + + return i40e_mirrorrule_op(hw, i40e_aqc_opc_add_mirror_rule, sw_seid, + rule_type, dest_vsi, count, mr_list, + cmd_details, rule_id, rules_used, rules_free); +} + +/** + * i40e_aq_delete_mirrorrule - delete a mirror rule + * @hw: pointer to the hw struct + * @sw_seid: Switch SEID (to which rule refers) + * @rule_type: Rule Type (ingress/egress/VLAN) + * @count: length of the list + * @rule_id: Rule ID that is returned in the receive desc as part of + * add_mirrorrule. + * @mr_list: list of mirrored VLAN IDs to be removed + * @cmd_details: pointer to command details structure or NULL + * @rule_used: Number of rules used in internal switch + * @rule_free: Number of rules free in internal switch + * + * Delete a mirror rule. Mirror rules are supported for VEBs/VEPA elements only + **/ +i40e_status i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid, + u16 rule_type, u16 rule_id, u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rules_used, u16 *rules_free) +{ + /* Rule ID has to be valid except rule_type: INGRESS VLAN mirroring */ + if (rule_type != I40E_AQC_MIRROR_RULE_TYPE_VLAN) { + if (!rule_id) + return I40E_ERR_PARAM; + } else { + /* count and mr_list shall be valid for rule_type INGRESS VLAN + * mirroring. For other rule_type, count and rule_type should + * not matter. + */ + if (count == 0 || !mr_list) + return I40E_ERR_PARAM; + } + + return i40e_mirrorrule_op(hw, i40e_aqc_opc_delete_mirror_rule, sw_seid, + rule_type, rule_id, count, mr_list, + cmd_details, NULL, rules_used, rules_free); +} + +/** * i40e_aq_send_msg_to_vf * @hw: pointer to the hardware structure * @vfid: VF id to send msg @@ -2765,35 +2967,6 @@ i40e_aq_erase_nvm_exit: return status; } -#define I40E_DEV_FUNC_CAP_SWITCH_MODE 0x01 -#define I40E_DEV_FUNC_CAP_MGMT_MODE 0x02 -#define I40E_DEV_FUNC_CAP_NPAR 0x03 -#define I40E_DEV_FUNC_CAP_OS2BMC 0x04 -#define I40E_DEV_FUNC_CAP_VALID_FUNC 0x05 -#define I40E_DEV_FUNC_CAP_SRIOV_1_1 0x12 -#define I40E_DEV_FUNC_CAP_VF 0x13 -#define I40E_DEV_FUNC_CAP_VMDQ 0x14 -#define I40E_DEV_FUNC_CAP_802_1_QBG 0x15 -#define I40E_DEV_FUNC_CAP_802_1_QBH 0x16 -#define I40E_DEV_FUNC_CAP_VSI 0x17 -#define I40E_DEV_FUNC_CAP_DCB 0x18 -#define I40E_DEV_FUNC_CAP_FCOE 0x21 -#define I40E_DEV_FUNC_CAP_ISCSI 0x22 -#define I40E_DEV_FUNC_CAP_RSS 0x40 -#define I40E_DEV_FUNC_CAP_RX_QUEUES 0x41 -#define I40E_DEV_FUNC_CAP_TX_QUEUES 0x42 -#define I40E_DEV_FUNC_CAP_MSIX 0x43 -#define I40E_DEV_FUNC_CAP_MSIX_VF 0x44 -#define I40E_DEV_FUNC_CAP_FLOW_DIRECTOR 0x45 -#define I40E_DEV_FUNC_CAP_IEEE_1588 0x46 -#define I40E_DEV_FUNC_CAP_FLEX10 0xF1 -#define I40E_DEV_FUNC_CAP_CEM 0xF2 -#define I40E_DEV_FUNC_CAP_IWARP 0x51 -#define I40E_DEV_FUNC_CAP_LED 0x61 -#define I40E_DEV_FUNC_CAP_SDP 0x62 -#define I40E_DEV_FUNC_CAP_MDIO 0x63 -#define I40E_DEV_FUNC_CAP_WR_CSR_PROT 0x64 - /** * i40e_parse_discover_capabilities * @hw: pointer to the hw struct @@ -2832,79 +3005,79 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, major_rev = cap->major_rev; switch (id) { - case I40E_DEV_FUNC_CAP_SWITCH_MODE: + case I40E_AQ_CAP_ID_SWITCH_MODE: p->switch_mode = number; break; - case I40E_DEV_FUNC_CAP_MGMT_MODE: + case I40E_AQ_CAP_ID_MNG_MODE: p->management_mode = number; break; - case I40E_DEV_FUNC_CAP_NPAR: + case I40E_AQ_CAP_ID_NPAR_ACTIVE: p->npar_enable = number; break; - case I40E_DEV_FUNC_CAP_OS2BMC: + case I40E_AQ_CAP_ID_OS2BMC_CAP: p->os2bmc = number; break; - case I40E_DEV_FUNC_CAP_VALID_FUNC: + case I40E_AQ_CAP_ID_FUNCTIONS_VALID: p->valid_functions = number; break; - case I40E_DEV_FUNC_CAP_SRIOV_1_1: + case I40E_AQ_CAP_ID_SRIOV: if (number == 1) p->sr_iov_1_1 = true; break; - case I40E_DEV_FUNC_CAP_VF: + case I40E_AQ_CAP_ID_VF: p->num_vfs = number; p->vf_base_id = logical_id; break; - case I40E_DEV_FUNC_CAP_VMDQ: + case I40E_AQ_CAP_ID_VMDQ: if (number == 1) p->vmdq = true; break; - case I40E_DEV_FUNC_CAP_802_1_QBG: + case I40E_AQ_CAP_ID_8021QBG: if (number == 1) p->evb_802_1_qbg = true; break; - case I40E_DEV_FUNC_CAP_802_1_QBH: + case I40E_AQ_CAP_ID_8021QBR: if (number == 1) p->evb_802_1_qbh = true; break; - case I40E_DEV_FUNC_CAP_VSI: + case I40E_AQ_CAP_ID_VSI: p->num_vsis = number; break; - case I40E_DEV_FUNC_CAP_DCB: + case I40E_AQ_CAP_ID_DCB: if (number == 1) { p->dcb = true; p->enabled_tcmap = logical_id; p->maxtc = phys_id; } break; - case I40E_DEV_FUNC_CAP_FCOE: + case I40E_AQ_CAP_ID_FCOE: if (number == 1) p->fcoe = true; break; - case I40E_DEV_FUNC_CAP_ISCSI: + case I40E_AQ_CAP_ID_ISCSI: if (number == 1) p->iscsi = true; break; - case I40E_DEV_FUNC_CAP_RSS: + case I40E_AQ_CAP_ID_RSS: p->rss = true; p->rss_table_size = number; p->rss_table_entry_width = logical_id; break; - case I40E_DEV_FUNC_CAP_RX_QUEUES: + case I40E_AQ_CAP_ID_RXQ: p->num_rx_qp = number; p->base_queue = phys_id; break; - case I40E_DEV_FUNC_CAP_TX_QUEUES: + case I40E_AQ_CAP_ID_TXQ: p->num_tx_qp = number; p->base_queue = phys_id; break; - case I40E_DEV_FUNC_CAP_MSIX: + case I40E_AQ_CAP_ID_MSIX: p->num_msix_vectors = number; break; - case I40E_DEV_FUNC_CAP_MSIX_VF: + case I40E_AQ_CAP_ID_VF_MSIX: p->num_msix_vectors_vf = number; break; - case I40E_DEV_FUNC_CAP_FLEX10: + case I40E_AQ_CAP_ID_FLEX10: if (major_rev == 1) { if (number == 1) { p->flex10_enable = true; @@ -2920,38 +3093,38 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, p->flex10_mode = logical_id; p->flex10_status = phys_id; break; - case I40E_DEV_FUNC_CAP_CEM: + case I40E_AQ_CAP_ID_CEM: if (number == 1) p->mgmt_cem = true; break; - case I40E_DEV_FUNC_CAP_IWARP: + case I40E_AQ_CAP_ID_IWARP: if (number == 1) p->iwarp = true; break; - case I40E_DEV_FUNC_CAP_LED: + case I40E_AQ_CAP_ID_LED: if (phys_id < I40E_HW_CAP_MAX_GPIO) p->led[phys_id] = true; break; - case I40E_DEV_FUNC_CAP_SDP: + case I40E_AQ_CAP_ID_SDP: if (phys_id < I40E_HW_CAP_MAX_GPIO) p->sdp[phys_id] = true; break; - case I40E_DEV_FUNC_CAP_MDIO: + case I40E_AQ_CAP_ID_MDIO: if (number == 1) { p->mdio_port_num = phys_id; p->mdio_port_mode = logical_id; } break; - case I40E_DEV_FUNC_CAP_IEEE_1588: + case I40E_AQ_CAP_ID_1588: if (number == 1) p->ieee_1588 = true; break; - case I40E_DEV_FUNC_CAP_FLOW_DIRECTOR: + case I40E_AQ_CAP_ID_FLOW_DIRECTOR: p->fd = true; p->fd_filters_guaranteed = number; p->fd_filters_best_effort = logical_id; break; - case I40E_DEV_FUNC_CAP_WR_CSR_PROT: + case I40E_AQ_CAP_ID_WSR_PROT: p->wr_csr_prot = (u64)number; p->wr_csr_prot |= (u64)logical_id << 32; break; @@ -3709,7 +3882,7 @@ i40e_status i40e_set_filter_control(struct i40e_hw *hw, return ret; /* Read the PF Queue Filter control register */ - val = rd32(hw, I40E_PFQF_CTL_0); + val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0); /* Program required PE hash buckets for the PF */ val &= ~I40E_PFQF_CTL_0_PEHSIZE_MASK; @@ -3746,7 +3919,7 @@ i40e_status i40e_set_filter_control(struct i40e_hw *hw, if (settings->enable_macvlan) val |= I40E_PFQF_CTL_0_MACVLAN_ENA_MASK; - wr32(hw, I40E_PFQF_CTL_0, val); + i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, val); return 0; } @@ -4073,3 +4246,454 @@ i40e_status i40e_aq_configure_partition_bw(struct i40e_hw *hw, return status; } + +/** + * i40e_read_phy_register + * @hw: pointer to the HW structure + * @page: registers page number + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Reads specified PHY register value + **/ +i40e_status i40e_read_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, + u16 *value) +{ + i40e_status status = I40E_ERR_TIMEOUT; + u32 command = 0; + u16 retry = 1000; + u8 port_num = hw->func_caps.mdio_port_num; + + command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) | + (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_OPCODE_ADDRESS) | + (I40E_MDIO_STCODE) | + (I40E_GLGEN_MSCA_MDICMD_MASK) | + (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = 0; + break; + } + usleep_range(10, 20); + retry--; + } while (retry); + + if (status) { + i40e_debug(hw, I40E_DEBUG_PHY, + "PHY: Can't write command to external PHY.\n"); + goto phy_read_end; + } + + command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_OPCODE_READ) | + (I40E_MDIO_STCODE) | + (I40E_GLGEN_MSCA_MDICMD_MASK) | + (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); + status = I40E_ERR_TIMEOUT; + retry = 1000; + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = 0; + break; + } + usleep_range(10, 20); + retry--; + } while (retry); + + if (!status) { + command = rd32(hw, I40E_GLGEN_MSRWD(port_num)); + *value = (command & I40E_GLGEN_MSRWD_MDIRDDATA_MASK) >> + I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT; + } else { + i40e_debug(hw, I40E_DEBUG_PHY, + "PHY: Can't read register value from external PHY.\n"); + } + +phy_read_end: + return status; +} + +/** + * i40e_write_phy_register + * @hw: pointer to the HW structure + * @page: registers page number + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Writes value to specified PHY register + **/ +i40e_status i40e_write_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, + u16 value) +{ + i40e_status status = I40E_ERR_TIMEOUT; + u32 command = 0; + u16 retry = 1000; + u8 port_num = hw->func_caps.mdio_port_num; + + command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) | + (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_OPCODE_ADDRESS) | + (I40E_MDIO_STCODE) | + (I40E_GLGEN_MSCA_MDICMD_MASK) | + (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = 0; + break; + } + usleep_range(10, 20); + retry--; + } while (retry); + if (status) { + i40e_debug(hw, I40E_DEBUG_PHY, + "PHY: Can't write command to external PHY.\n"); + goto phy_write_end; + } + + command = value << I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT; + wr32(hw, I40E_GLGEN_MSRWD(port_num), command); + + command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_OPCODE_WRITE) | + (I40E_MDIO_STCODE) | + (I40E_GLGEN_MSCA_MDICMD_MASK) | + (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); + status = I40E_ERR_TIMEOUT; + retry = 1000; + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = 0; + break; + } + usleep_range(10, 20); + retry--; + } while (retry); + +phy_write_end: + return status; +} + +/** + * i40e_get_phy_address + * @hw: pointer to the HW structure + * @dev_num: PHY port num that address we want + * @phy_addr: Returned PHY address + * + * Gets PHY address for current port + **/ +u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num) +{ + u8 port_num = hw->func_caps.mdio_port_num; + u32 reg_val = rd32(hw, I40E_GLGEN_MDIO_I2C_SEL(port_num)); + + return (u8)(reg_val >> ((dev_num + 1) * 5)) & 0x1f; +} + +/** + * i40e_blink_phy_led + * @hw: pointer to the HW structure + * @time: time how long led will blinks in secs + * @interval: gap between LED on and off in msecs + * + * Blinks PHY link LED + **/ +i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw, + u32 time, u32 interval) +{ + i40e_status status = 0; + u32 i; + u16 led_ctl; + u16 gpio_led_port; + u16 led_reg; + u16 led_addr = I40E_PHY_LED_PROV_REG_1; + u8 phy_addr = 0; + u8 port_num; + + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + + for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++, + led_addr++) { + status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, &led_reg); + if (status) + goto phy_blinking_end; + led_ctl = led_reg; + if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { + led_reg = 0; + status = i40e_write_phy_register(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + led_reg); + if (status) + goto phy_blinking_end; + break; + } + } + + if (time > 0 && interval > 0) { + for (i = 0; i < time * 1000; i += interval) { + status = i40e_read_phy_register(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + &led_reg); + if (status) + goto restore_config; + if (led_reg & I40E_PHY_LED_MANUAL_ON) + led_reg = 0; + else + led_reg = I40E_PHY_LED_MANUAL_ON; + status = i40e_write_phy_register(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + led_reg); + if (status) + goto restore_config; + msleep(interval); + } + } + +restore_config: + status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, + phy_addr, led_ctl); + +phy_blinking_end: + return status; +} + +/** + * i40e_led_get_phy - return current on/off mode + * @hw: pointer to the hw struct + * @led_addr: address of led register to use + * @val: original value of register to use + * + **/ +i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, + u16 *val) +{ + i40e_status status = 0; + u16 gpio_led_port; + u8 phy_addr = 0; + u16 reg_val; + u16 temp_addr; + u8 port_num; + u32 i; + + temp_addr = I40E_PHY_LED_PROV_REG_1; + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + + for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++, + temp_addr++) { + status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, + temp_addr, phy_addr, ®_val); + if (status) + return status; + *val = reg_val; + if (reg_val & I40E_PHY_LED_LINK_MODE_MASK) { + *led_addr = temp_addr; + break; + } + } + return status; +} + +/** + * i40e_led_set_phy + * @hw: pointer to the HW structure + * @on: true or false + * @mode: original val plus bit for set or ignore + * Set led's on or off when controlled by the PHY + * + **/ +i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on, + u16 led_addr, u32 mode) +{ + i40e_status status = 0; + u16 led_ctl = 0; + u16 led_reg = 0; + u8 phy_addr = 0; + u8 port_num; + u32 i; + + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + + status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, + phy_addr, &led_reg); + if (status) + return status; + led_ctl = led_reg; + if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { + led_reg = 0; + status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_reg); + if (status) + return status; + } + status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, &led_reg); + if (status) + goto restore_config; + if (on) + led_reg = I40E_PHY_LED_MANUAL_ON; + else + led_reg = 0; + status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_reg); + if (status) + goto restore_config; + if (mode & I40E_PHY_LED_MODE_ORIG) { + led_ctl = (mode & I40E_PHY_LED_MODE_MASK); + status = i40e_write_phy_register(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_ctl); + } + return status; +restore_config: + status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, + phy_addr, led_ctl); + return status; +} + +/** + * i40e_aq_rx_ctl_read_register - use FW to read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: ptr to register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to read the Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +i40e_status i40e_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd_resp = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + i40e_status status; + + if (!reg_val) + return I40E_ERR_PARAM; + + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_read); + + cmd_resp->address = cpu_to_le32(reg_addr); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + if (status == 0) + *reg_val = le32_to_cpu(cmd_resp->value); + + return status; +} + +/** + * i40e_read_rx_ctl - read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + **/ +u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr) +{ + i40e_status status = 0; + bool use_register; + int retry = 5; + u32 val = 0; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40e_aq_rx_ctl_read_register(hw, reg_addr, &val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + usleep_range(1000, 2000); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + val = rd32(hw, reg_addr); + + return val; +} + +/** + * i40e_aq_rx_ctl_write_register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to write to an Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + i40e_status status; + + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_write); + + cmd->address = cpu_to_le32(reg_addr); + cmd->value = cpu_to_le32(reg_val); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40e_write_rx_ctl - write to an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + **/ +void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val) +{ + i40e_status status = 0; + bool use_register; + int retry = 5; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40e_aq_rx_ctl_write_register(hw, reg_addr, + reg_val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + usleep_range(1000, 2000); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + wr32(hw, reg_addr, reg_val); +} diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 2691277c0055..0fab3a9b51d9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -380,17 +380,20 @@ static void i40e_parse_cee_app_tlv(struct i40e_cee_feat_tlv *tlv, { u16 length, typelength, offset = 0; struct i40e_cee_app_prio *app; - u8 i, up, selector; + u8 i; typelength = ntohs(tlv->hdr.typelen); length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >> I40E_LLDP_TLV_LEN_SHIFT); dcbcfg->numapps = length / sizeof(*app); + if (!dcbcfg->numapps) return; for (i = 0; i < dcbcfg->numapps; i++) { + u8 up, selector; + app = (struct i40e_cee_app_prio *)(tlv->tlvinfo + offset); for (up = 0; up < I40E_MAX_USER_PRIORITY; up++) { if (app->prio_map & BIT(up)) @@ -400,13 +403,17 @@ static void i40e_parse_cee_app_tlv(struct i40e_cee_feat_tlv *tlv, /* Get Selector from lower 2 bits, and convert to IEEE */ selector = (app->upper_oui_sel & I40E_CEE_APP_SELECTOR_MASK); - if (selector == I40E_CEE_APP_SEL_ETHTYPE) + switch (selector) { + case I40E_CEE_APP_SEL_ETHTYPE: dcbcfg->app[i].selector = I40E_APP_SEL_ETHTYPE; - else if (selector == I40E_CEE_APP_SEL_TCPIP) + break; + case I40E_CEE_APP_SEL_TCPIP: dcbcfg->app[i].selector = I40E_APP_SEL_TCPIP; - else + break; + default: /* Keep selector as it is for unknown types */ dcbcfg->app[i].selector = selector; + } dcbcfg->app[i].protocolid = ntohs(app->protocol); /* Move to next app */ @@ -814,13 +821,15 @@ i40e_status i40e_get_dcb_config(struct i40e_hw *hw) struct i40e_aqc_get_cee_dcb_cfg_resp cee_cfg; struct i40e_aqc_get_cee_dcb_cfg_v1_resp cee_v1_cfg; - /* If Firmware version < v4.33 IEEE only */ - if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || - (hw->aq.fw_maj_ver < 4)) + /* If Firmware version < v4.33 on X710/XL710, IEEE only */ + if ((hw->mac.type == I40E_MAC_XL710) && + (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || + (hw->aq.fw_maj_ver < 4))) return i40e_get_ieee_dcb_config(hw); - /* If Firmware version == v4.33 use old CEE struct */ - if ((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver == 33)) { + /* If Firmware version == v4.33 on X710/XL710, use old CEE struct */ + if ((hw->mac.type == I40E_MAC_XL710) && + ((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver == 33))) { ret = i40e_aq_get_cee_dcb_config(hw, &cee_v1_cfg, sizeof(cee_v1_cfg), NULL); if (!ret) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 10744a698d6f..0c97733d253c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -61,257 +61,13 @@ static struct i40e_veb *i40e_dbg_find_veb(struct i40e_pf *pf, int seid) { int i; - if ((seid < I40E_BASE_VEB_SEID) || - (seid > (I40E_BASE_VEB_SEID + I40E_MAX_VEB))) - dev_info(&pf->pdev->dev, "%d: bad seid\n", seid); - else - for (i = 0; i < I40E_MAX_VEB; i++) - if (pf->veb[i] && pf->veb[i]->seid == seid) - return pf->veb[i]; + for (i = 0; i < I40E_MAX_VEB; i++) + if (pf->veb[i] && pf->veb[i]->seid == seid) + return pf->veb[i]; return NULL; } /************************************************************** - * dump - * The dump entry in debugfs is for getting a data snapshow of - * the driver's current configuration and runtime details. - * When the filesystem entry is written, a snapshot is taken. - * When the entry is read, the most recent snapshot data is dumped. - **************************************************************/ -static char *i40e_dbg_dump_buf; -static ssize_t i40e_dbg_dump_data_len; -static ssize_t i40e_dbg_dump_buffer_len; - -/** - * i40e_dbg_dump_read - read the dump data - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - **/ -static ssize_t i40e_dbg_dump_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - int bytes_not_copied; - int len; - - /* is *ppos bigger than the available data? */ - if (*ppos >= i40e_dbg_dump_data_len || !i40e_dbg_dump_buf) - return 0; - - /* be sure to not read beyond the end of available data */ - len = min_t(int, count, (i40e_dbg_dump_data_len - *ppos)); - - bytes_not_copied = copy_to_user(buffer, &i40e_dbg_dump_buf[*ppos], len); - if (bytes_not_copied) - return -EFAULT; - - *ppos += len; - return len; -} - -/** - * i40e_dbg_prep_dump_buf - * @pf: the PF we're working with - * @buflen: the desired buffer length - * - * Return positive if success, 0 if failed - **/ -static int i40e_dbg_prep_dump_buf(struct i40e_pf *pf, int buflen) -{ - /* if not already big enough, prep for re alloc */ - if (i40e_dbg_dump_buffer_len && i40e_dbg_dump_buffer_len < buflen) { - kfree(i40e_dbg_dump_buf); - i40e_dbg_dump_buffer_len = 0; - i40e_dbg_dump_buf = NULL; - } - - /* get a new buffer if needed */ - if (!i40e_dbg_dump_buf) { - i40e_dbg_dump_buf = kzalloc(buflen, GFP_KERNEL); - if (i40e_dbg_dump_buf != NULL) - i40e_dbg_dump_buffer_len = buflen; - } - - return i40e_dbg_dump_buffer_len; -} - -/** - * i40e_dbg_dump_write - trigger a datadump snapshot - * @filp: the opened file - * @buffer: where to find the user's data - * @count: the length of the user's data - * @ppos: file position offset - * - * Any write clears the stats - **/ -static ssize_t i40e_dbg_dump_write(struct file *filp, - const char __user *buffer, - size_t count, loff_t *ppos) -{ - struct i40e_pf *pf = filp->private_data; - bool seid_found = false; - long seid = -1; - int buflen = 0; - int i, ret; - int len; - u8 *p; - - /* don't allow partial writes */ - if (*ppos != 0) - return 0; - - /* decode the SEID given to be dumped */ - ret = kstrtol_from_user(buffer, count, 0, &seid); - - if (ret) { - dev_info(&pf->pdev->dev, "bad seid value\n"); - } else if (seid == 0) { - seid_found = true; - - kfree(i40e_dbg_dump_buf); - i40e_dbg_dump_buffer_len = 0; - i40e_dbg_dump_data_len = 0; - i40e_dbg_dump_buf = NULL; - dev_info(&pf->pdev->dev, "debug buffer freed\n"); - - } else if (seid == pf->pf_seid || seid == 1) { - seid_found = true; - - buflen = sizeof(struct i40e_pf); - buflen += (sizeof(struct i40e_aq_desc) - * (pf->hw.aq.num_arq_entries + pf->hw.aq.num_asq_entries)); - - if (i40e_dbg_prep_dump_buf(pf, buflen)) { - p = i40e_dbg_dump_buf; - - len = sizeof(struct i40e_pf); - memcpy(p, pf, len); - p += len; - - len = (sizeof(struct i40e_aq_desc) - * pf->hw.aq.num_asq_entries); - memcpy(p, pf->hw.aq.asq.desc_buf.va, len); - p += len; - - len = (sizeof(struct i40e_aq_desc) - * pf->hw.aq.num_arq_entries); - memcpy(p, pf->hw.aq.arq.desc_buf.va, len); - p += len; - - i40e_dbg_dump_data_len = buflen; - dev_info(&pf->pdev->dev, - "PF seid %ld dumped %d bytes\n", - seid, (int)i40e_dbg_dump_data_len); - } - } else if (seid >= I40E_BASE_VSI_SEID) { - struct i40e_vsi *vsi = NULL; - struct i40e_mac_filter *f; - int filter_count = 0; - - mutex_lock(&pf->switch_mutex); - vsi = i40e_dbg_find_vsi(pf, seid); - if (!vsi) { - mutex_unlock(&pf->switch_mutex); - goto write_exit; - } - - buflen = sizeof(struct i40e_vsi); - buflen += sizeof(struct i40e_q_vector) * vsi->num_q_vectors; - buflen += sizeof(struct i40e_ring) * 2 * vsi->num_queue_pairs; - buflen += sizeof(struct i40e_tx_buffer) * vsi->num_queue_pairs; - buflen += sizeof(struct i40e_rx_buffer) * vsi->num_queue_pairs; - list_for_each_entry(f, &vsi->mac_filter_list, list) - filter_count++; - buflen += sizeof(struct i40e_mac_filter) * filter_count; - - if (i40e_dbg_prep_dump_buf(pf, buflen)) { - p = i40e_dbg_dump_buf; - seid_found = true; - - len = sizeof(struct i40e_vsi); - memcpy(p, vsi, len); - p += len; - - if (vsi->num_q_vectors) { - len = (sizeof(struct i40e_q_vector) - * vsi->num_q_vectors); - memcpy(p, vsi->q_vectors, len); - p += len; - } - - if (vsi->num_queue_pairs) { - len = (sizeof(struct i40e_ring) * - vsi->num_queue_pairs); - memcpy(p, vsi->tx_rings, len); - p += len; - memcpy(p, vsi->rx_rings, len); - p += len; - } - - if (vsi->tx_rings[0]) { - len = sizeof(struct i40e_tx_buffer); - for (i = 0; i < vsi->num_queue_pairs; i++) { - memcpy(p, vsi->tx_rings[i]->tx_bi, len); - p += len; - } - len = sizeof(struct i40e_rx_buffer); - for (i = 0; i < vsi->num_queue_pairs; i++) { - memcpy(p, vsi->rx_rings[i]->rx_bi, len); - p += len; - } - } - - /* macvlan filter list */ - len = sizeof(struct i40e_mac_filter); - list_for_each_entry(f, &vsi->mac_filter_list, list) { - memcpy(p, f, len); - p += len; - } - - i40e_dbg_dump_data_len = buflen; - dev_info(&pf->pdev->dev, - "VSI seid %ld dumped %d bytes\n", - seid, (int)i40e_dbg_dump_data_len); - } - mutex_unlock(&pf->switch_mutex); - } else if (seid >= I40E_BASE_VEB_SEID) { - struct i40e_veb *veb = NULL; - - mutex_lock(&pf->switch_mutex); - veb = i40e_dbg_find_veb(pf, seid); - if (!veb) { - mutex_unlock(&pf->switch_mutex); - goto write_exit; - } - - buflen = sizeof(struct i40e_veb); - if (i40e_dbg_prep_dump_buf(pf, buflen)) { - seid_found = true; - memcpy(i40e_dbg_dump_buf, veb, buflen); - i40e_dbg_dump_data_len = buflen; - dev_info(&pf->pdev->dev, - "VEB seid %ld dumped %d bytes\n", - seid, (int)i40e_dbg_dump_data_len); - } - mutex_unlock(&pf->switch_mutex); - } - -write_exit: - if (!seid_found) - dev_info(&pf->pdev->dev, "unknown seid %ld\n", seid); - - return count; -} - -static const struct file_operations i40e_dbg_dump_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = i40e_dbg_dump_read, - .write = i40e_dbg_dump_write, -}; - -/************************************************************** * command * The command entry in debugfs is for giving the driver commands * to be executed - these may be for changing the internal switch @@ -379,19 +135,27 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) return; } dev_info(&pf->pdev->dev, "vsi seid %d\n", seid); - if (vsi->netdev) - dev_info(&pf->pdev->dev, - " netdev: name = %s\n", - vsi->netdev->name); + if (vsi->netdev) { + struct net_device *nd = vsi->netdev; + + dev_info(&pf->pdev->dev, " netdev: name = %s, state = %lu, flags = 0x%08x\n", + nd->name, nd->state, nd->flags); + dev_info(&pf->pdev->dev, " features = 0x%08lx\n", + (unsigned long int)nd->features); + dev_info(&pf->pdev->dev, " hw_features = 0x%08lx\n", + (unsigned long int)nd->hw_features); + dev_info(&pf->pdev->dev, " vlan_features = 0x%08lx\n", + (unsigned long int)nd->vlan_features); + } if (vsi->active_vlans) dev_info(&pf->pdev->dev, " vlgrp: & = %p\n", vsi->active_vlans); dev_info(&pf->pdev->dev, - " netdev_registered = %i, current_netdev_flags = 0x%04x, state = %li flags = 0x%08lx\n", - vsi->netdev_registered, - vsi->current_netdev_flags, vsi->state, vsi->flags); + " state = %li flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n", + vsi->state, vsi->flags, + vsi->netdev_registered, vsi->current_netdev_flags); if (vsi == pf->vsi[pf->lan_vsi]) - dev_info(&pf->pdev->dev, "MAC address: %pM SAN MAC: %pM Port MAC: %pM\n", + dev_info(&pf->pdev->dev, " MAC address: %pM SAN MAC: %pM Port MAC: %pM\n", pf->hw.mac.addr, pf->hw.mac.san_addr, pf->hw.mac.port_addr); @@ -511,7 +275,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->dtype); dev_info(&pf->pdev->dev, " rx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n", - i, rx_ring->hsplit, + i, ring_is_ps_enabled(rx_ring), rx_ring->next_to_use, rx_ring->next_to_clean, rx_ring->ring_active); @@ -526,6 +290,11 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->rx_stats.alloc_page_failed, rx_ring->rx_stats.alloc_buff_failed); dev_info(&pf->pdev->dev, + " rx_rings[%i]: rx_stats: realloc_count = %lld, page_reuse_count = %lld\n", + i, + rx_ring->rx_stats.realloc_count, + rx_ring->rx_stats.page_reuse_count); + dev_info(&pf->pdev->dev, " rx_rings[%i]: size = %i, dma = 0x%08lx\n", i, rx_ring->size, (unsigned long int)rx_ring->dma); @@ -533,6 +302,10 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) " rx_rings[%i]: vsi = %p, q_vector = %p\n", i, rx_ring->vsi, rx_ring->q_vector); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: rx_itr_setting = %d (%s)\n", + i, rx_ring->rx_itr_setting, + ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed"); } for (i = 0; i < vsi->num_queue_pairs; i++) { struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); @@ -557,8 +330,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) " tx_rings[%i]: dtype = %d\n", i, tx_ring->dtype); dev_info(&pf->pdev->dev, - " tx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n", - i, tx_ring->hsplit, + " tx_rings[%i]: next_to_use = %d, next_to_clean = %d, ring_active = %i\n", + i, tx_ring->next_to_use, tx_ring->next_to_clean, tx_ring->ring_active); @@ -583,14 +356,15 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) dev_info(&pf->pdev->dev, " tx_rings[%i]: DCB tc = %d\n", i, tx_ring->dcb_tc); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: tx_itr_setting = %d (%s)\n", + i, tx_ring->tx_itr_setting, + ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed"); } rcu_read_unlock(); dev_info(&pf->pdev->dev, - " work_limit = %d, rx_itr_setting = %d (%s), tx_itr_setting = %d (%s)\n", - vsi->work_limit, vsi->rx_itr_setting, - ITR_IS_DYNAMIC(vsi->rx_itr_setting) ? "dynamic" : "fixed", - vsi->tx_itr_setting, - ITR_IS_DYNAMIC(vsi->tx_itr_setting) ? "dynamic" : "fixed"); + " work_limit = %d\n", + vsi->work_limit); dev_info(&pf->pdev->dev, " max_frame = %d, rx_hdr_len = %d, rx_buf_len = %d dtype = %d\n", vsi->max_frame, vsi->rx_hdr_len, vsi->rx_buf_len, vsi->dtype); @@ -815,20 +589,20 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, if (!is_rx_ring) { txd = I40E_TX_DESC(ring, i); dev_info(&pf->pdev->dev, - " d[%03i] = 0x%016llx 0x%016llx\n", + " d[%03x] = 0x%016llx 0x%016llx\n", i, txd->buffer_addr, txd->cmd_type_offset_bsz); } else if (sizeof(union i40e_rx_desc) == sizeof(union i40e_16byte_rx_desc)) { rxd = I40E_RX_DESC(ring, i); dev_info(&pf->pdev->dev, - " d[%03i] = 0x%016llx 0x%016llx\n", + " d[%03x] = 0x%016llx 0x%016llx\n", i, rxd->read.pkt_addr, rxd->read.hdr_addr); } else { rxd = I40E_RX_DESC(ring, i); dev_info(&pf->pdev->dev, - " d[%03i] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", + " d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", i, rxd->read.pkt_addr, rxd->read.hdr_addr, rxd->read.rsvd1, rxd->read.rsvd2); @@ -843,20 +617,20 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, if (!is_rx_ring) { txd = I40E_TX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, - "vsi = %02i tx ring = %02i d[%03i] = 0x%016llx 0x%016llx\n", + "vsi = %02i tx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, txd->buffer_addr, txd->cmd_type_offset_bsz); } else if (sizeof(union i40e_rx_desc) == sizeof(union i40e_16byte_rx_desc)) { rxd = I40E_RX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, - "vsi = %02i rx ring = %02i d[%03i] = 0x%016llx 0x%016llx\n", + "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, rxd->read.pkt_addr, rxd->read.hdr_addr); } else { rxd = I40E_RX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, - "vsi = %02i rx ring = %02i d[%03i] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", + "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, rxd->read.pkt_addr, rxd->read.hdr_addr, rxd->read.rsvd1, rxd->read.rsvd2); @@ -918,12 +692,6 @@ static void i40e_dbg_dump_veb_seid(struct i40e_pf *pf, int seid) { struct i40e_veb *veb; - if ((seid < I40E_BASE_VEB_SEID) || - (seid >= (I40E_MAX_VEB + I40E_BASE_VEB_SEID))) { - dev_info(&pf->pdev->dev, "%d: bad seid\n", seid); - return; - } - veb = i40e_dbg_find_veb(pf, seid); if (!veb) { dev_info(&pf->pdev->dev, "can't find veb %d\n", seid); @@ -2202,11 +1970,6 @@ void i40e_dbg_pf_init(struct i40e_pf *pf) if (!pfile) goto create_failed; - pfile = debugfs_create_file("dump", 0600, pf->i40e_dbg_pf, pf, - &i40e_dbg_dump_fops); - if (!pfile) - goto create_failed; - pfile = debugfs_create_file("netdev_ops", 0600, pf->i40e_dbg_pf, pf, &i40e_dbg_netdev_ops_fops); if (!pfile) @@ -2227,9 +1990,6 @@ void i40e_dbg_pf_exit(struct i40e_pf *pf) { debugfs_remove_recursive(pf->i40e_dbg_pf); pf->i40e_dbg_pf = NULL; - - kfree(i40e_dbg_dump_buf); - i40e_dbg_dump_buf = NULL; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h index 448ef4c17efb..99257fcd1ef4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h @@ -39,13 +39,11 @@ #define I40E_DEV_ID_20G_KR2 0x1587 #define I40E_DEV_ID_20G_KR2_A 0x1588 #define I40E_DEV_ID_10G_BASE_T4 0x1589 -#define I40E_DEV_ID_VF 0x154C -#define I40E_DEV_ID_VF_HV 0x1571 +#define I40E_DEV_ID_KX_X722 0x37CE +#define I40E_DEV_ID_QSFP_X722 0x37CF #define I40E_DEV_ID_SFP_X722 0x37D0 #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 -#define I40E_DEV_ID_X722_VF 0x37CD -#define I40E_DEV_ID_X722_VF_HV 0x37D9 #define i40e_is_40G_device(d) ((d) == I40E_DEV_ID_QSFP_A || \ (d) == I40E_DEV_ID_QSFP_B || \ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 29d5833e24a3..784b1659457a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -89,6 +89,9 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = { I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), I40E_VSI_STAT("tx_linearize", tx_linearize), I40E_VSI_STAT("tx_force_wb", tx_force_wb), + I40E_VSI_STAT("tx_lost_interrupt", tx_lost_interrupt), + I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed), + I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), }; /* These PF_STATs might look like duplicates of some NETDEV_STATs, @@ -143,6 +146,7 @@ static struct i40e_stats i40e_gstrings_stats[] = { I40E_PF_STAT("rx_oversize", stats.rx_oversize), I40E_PF_STAT("rx_jabber", stats.rx_jabber), I40E_PF_STAT("VF_admin_queue_requests", vf_aq_requests), + I40E_PF_STAT("arq_overflows", arq_overflows), I40E_PF_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared), I40E_PF_STAT("fdir_flush_cnt", fd_flush_cnt), I40E_PF_STAT("fdir_atr_match", stats.fd_atr_match), @@ -232,6 +236,7 @@ static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = { "flow-director-atr", "veb-stats", "packet-split", + "hw-atr-eviction", }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings) @@ -340,7 +345,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, SUPPORTED_1000baseT_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) ecmd->advertising |= ADVERTISED_1000baseT_Full; - if (pf->hw.mac.type == I40E_MAC_X722) { + if (pf->flags & I40E_FLAG_100M_SGMII_CAPABLE) { ecmd->supported |= SUPPORTED_100baseT_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB) @@ -411,6 +416,10 @@ static void i40e_get_settings_link_down(struct i40e_hw *hw, if (pf->hw.mac.type == I40E_MAC_X722) { ecmd->supported |= SUPPORTED_100baseT_Full; ecmd->advertising |= ADVERTISED_100baseT_Full; + if (pf->flags & I40E_FLAG_100M_SGMII_CAPABLE) { + ecmd->supported |= SUPPORTED_100baseT_Full; + ecmd->advertising |= ADVERTISED_100baseT_Full; + } } } if (phy_types & I40E_CAP_PHY_TYPE_XAUI || @@ -996,16 +1005,19 @@ static int i40e_get_eeprom(struct net_device *netdev, /* check for NVMUpdate access method */ magic = hw->vendor_id | (hw->device_id << 16); if (eeprom->magic && eeprom->magic != magic) { - struct i40e_nvm_access *cmd; - int errno; + struct i40e_nvm_access *cmd = (struct i40e_nvm_access *)eeprom; + int errno = 0; /* make sure it is the right magic for NVMUpdate */ if ((eeprom->magic >> 16) != hw->device_id) - return -EINVAL; + errno = -EINVAL; + else if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) + errno = -EBUSY; + else + ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); - cmd = (struct i40e_nvm_access *)eeprom; - ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); - if (ret_val && (hw->debug_mask & I40E_DEBUG_NVM)) + if ((errno || ret_val) && (hw->debug_mask & I40E_DEBUG_NVM)) dev_info(&pf->pdev->dev, "NVMUpdate read failed err=%d status=0x%x errno=%d module=%d offset=0x%x size=%d\n", ret_val, hw->aq.asq_last_status, errno, @@ -1089,27 +1101,25 @@ static int i40e_set_eeprom(struct net_device *netdev, struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_hw *hw = &np->vsi->back->hw; struct i40e_pf *pf = np->vsi->back; - struct i40e_nvm_access *cmd; + struct i40e_nvm_access *cmd = (struct i40e_nvm_access *)eeprom; int ret_val = 0; - int errno; + int errno = 0; u32 magic; /* normal ethtool set_eeprom is not supported */ magic = hw->vendor_id | (hw->device_id << 16); if (eeprom->magic == magic) - return -EOPNOTSUPP; - + errno = -EOPNOTSUPP; /* check for NVMUpdate access method */ - if (!eeprom->magic || (eeprom->magic >> 16) != hw->device_id) - return -EINVAL; - - if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || - test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) - return -EBUSY; + else if (!eeprom->magic || (eeprom->magic >> 16) != hw->device_id) + errno = -EINVAL; + else if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) + errno = -EBUSY; + else + ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); - cmd = (struct i40e_nvm_access *)eeprom; - ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno); - if (ret_val && (hw->debug_mask & I40E_DEBUG_NVM)) + if ((errno || ret_val) && (hw->debug_mask & I40E_DEBUG_NVM)) dev_info(&pf->pdev->dev, "NVMUpdate write failed err=%d status=0x%x errno=%d module=%d offset=0x%x size=%d\n", ret_val, hw->aq.asq_last_status, errno, @@ -1816,28 +1826,52 @@ static int i40e_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { struct i40e_netdev_priv *np = netdev_priv(netdev); + i40e_status ret = 0; struct i40e_pf *pf = np->vsi->back; struct i40e_hw *hw = &pf->hw; int blink_freq = 2; + u16 temp_status; switch (state) { case ETHTOOL_ID_ACTIVE: - pf->led_status = i40e_led_get(hw); + if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) { + pf->led_status = i40e_led_get(hw); + } else { + i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_PORT, NULL); + ret = i40e_led_get_phy(hw, &temp_status, + &pf->phy_led_val); + pf->led_status = temp_status; + } return blink_freq; case ETHTOOL_ID_ON: - i40e_led_set(hw, 0xF, false); + if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) + i40e_led_set(hw, 0xf, false); + else + ret = i40e_led_set_phy(hw, true, pf->led_status, 0); break; case ETHTOOL_ID_OFF: - i40e_led_set(hw, 0x0, false); + if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) + i40e_led_set(hw, 0x0, false); + else + ret = i40e_led_set_phy(hw, false, pf->led_status, 0); break; case ETHTOOL_ID_INACTIVE: - i40e_led_set(hw, pf->led_status, false); + if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) { + i40e_led_set(hw, false, pf->led_status); + } else { + ret = i40e_led_set_phy(hw, false, pf->led_status, + (pf->phy_led_val | + I40E_PHY_LED_MODE_ORIG)); + i40e_aq_set_phy_debug(hw, 0, NULL); + } break; default: break; } - - return 0; + if (ret) + return -ENOENT; + else + return 0; } /* NOTE: i40e hardware uses a conversion factor of 2 for Interrupt @@ -1845,8 +1879,9 @@ static int i40e_set_phys_id(struct net_device *netdev, * 125us (8000 interrupts per second) == ITR(62) */ -static int i40e_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) +static int __i40e_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; @@ -1854,14 +1889,24 @@ static int i40e_get_coalesce(struct net_device *netdev, ec->tx_max_coalesced_frames_irq = vsi->work_limit; ec->rx_max_coalesced_frames_irq = vsi->work_limit; - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) + /* rx and tx usecs has per queue value. If user doesn't specify the queue, + * return queue 0's value to represent. + */ + if (queue < 0) { + queue = 0; + } else if (queue >= vsi->num_queue_pairs) { + return -EINVAL; + } + + if (ITR_IS_DYNAMIC(vsi->rx_rings[queue]->rx_itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) + if (ITR_IS_DYNAMIC(vsi->tx_rings[queue]->tx_itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = vsi->rx_rings[queue]->rx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = vsi->tx_rings[queue]->tx_itr_setting & ~I40E_ITR_DYNAMIC; + /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite * fits the original intent of the ethtool variable, @@ -1874,15 +1919,63 @@ static int i40e_get_coalesce(struct net_device *netdev, return 0; } -static int i40e_set_coalesce(struct net_device *netdev, +static int i40e_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { - struct i40e_netdev_priv *np = netdev_priv(netdev); + return __i40e_get_coalesce(netdev, ec, -1); +} + +static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40e_get_coalesce(netdev, ec, queue); +} + +static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; struct i40e_q_vector *q_vector; + u16 vector, intrl; + + intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit); + + vsi->rx_rings[queue]->rx_itr_setting = ec->rx_coalesce_usecs; + vsi->tx_rings[queue]->tx_itr_setting = ec->tx_coalesce_usecs; + + if (ec->use_adaptive_rx_coalesce) + vsi->rx_rings[queue]->rx_itr_setting |= I40E_ITR_DYNAMIC; + else + vsi->rx_rings[queue]->rx_itr_setting &= ~I40E_ITR_DYNAMIC; + + if (ec->use_adaptive_tx_coalesce) + vsi->tx_rings[queue]->tx_itr_setting |= I40E_ITR_DYNAMIC; + else + vsi->tx_rings[queue]->tx_itr_setting &= ~I40E_ITR_DYNAMIC; + + q_vector = vsi->rx_rings[queue]->q_vector; + q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[queue]->rx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + + q_vector = vsi->tx_rings[queue]->q_vector; + q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[queue]->tx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + + wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); + i40e_flush(hw); +} + +static int __i40e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u16 vector; int i; if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) @@ -1899,57 +1992,53 @@ static int i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - vector = vsi->base_vector; - if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1))) { - vsi->rx_itr_setting = ec->rx_coalesce_usecs; - } else if (ec->rx_coalesce_usecs == 0) { - vsi->rx_itr_setting = ec->rx_coalesce_usecs; + if (ec->rx_coalesce_usecs == 0) { if (ec->use_adaptive_rx_coalesce) netif_info(pf, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); - } else { - netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); - return -EINVAL; + } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); + return -EINVAL; } vsi->int_rate_limit = ec->rx_coalesce_usecs_high; - if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1))) { - vsi->tx_itr_setting = ec->tx_coalesce_usecs; - } else if (ec->tx_coalesce_usecs == 0) { - vsi->tx_itr_setting = ec->tx_coalesce_usecs; + if (ec->tx_coalesce_usecs == 0) { if (ec->use_adaptive_tx_coalesce) netif_info(pf, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); + } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); + return -EINVAL; + } + + /* rx and tx usecs has per queue value. If user doesn't specify the queue, + * apply to all queues. + */ + if (queue < 0) { + for (i = 0; i < vsi->num_queue_pairs; i++) + i40e_set_itr_per_queue(vsi, ec, i); + } else if (queue < vsi->num_queue_pairs) { + i40e_set_itr_per_queue(vsi, ec, queue); } else { - netif_info(pf, drv, netdev, - "Invalid value, tx-usecs range is 0-8160\n"); + netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", + vsi->num_queue_pairs - 1); return -EINVAL; } - if (ec->use_adaptive_rx_coalesce) - vsi->rx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC; - - if (ec->use_adaptive_tx_coalesce) - vsi->tx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC; - - for (i = 0; i < vsi->num_q_vectors; i++, vector++) { - u16 intrl = INTRL_USEC_TO_REG(vsi->int_rate_limit); + return 0; +} - q_vector = vsi->q_vectors[i]; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); - wr32(hw, I40E_PFINT_ITRN(0, vector - 1), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); - wr32(hw, I40E_PFINT_ITRN(1, vector - 1), q_vector->tx.itr); - wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); - i40e_flush(hw); - } +static int i40e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + return __i40e_set_coalesce(netdev, ec, -1); +} - return 0; +static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40e_set_coalesce(netdev, ec, queue); } /** @@ -2147,8 +2236,8 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) { struct i40e_hw *hw = &pf->hw; - u64 hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) | - ((u64)rd32(hw, I40E_PFQF_HENA(1)) << 32); + u64 hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | + ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); /* RSS does not support anything other than hashing * to queues on src and dst IPs and ports @@ -2166,9 +2255,12 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) case TCP_V4_FLOW: switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { case 0: - hena &= ~BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); - break; + return -EINVAL; case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); + hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); break; default: @@ -2178,9 +2270,12 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) case TCP_V6_FLOW: switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { case 0: - hena &= ~BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); - break; + return -EINVAL; case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); + hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); break; default: @@ -2190,10 +2285,13 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) case UDP_V4_FLOW: switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { case 0: - hena &= ~(BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - break; + return -EINVAL; case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); + hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); break; @@ -2204,10 +2302,13 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) case UDP_V6_FLOW: switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { case 0: - hena &= ~(BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - break; + return -EINVAL; case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); + hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); break; @@ -2245,8 +2346,8 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) return -EINVAL; } - wr32(hw, I40E_PFQF_HENA(0), (u32)hena); - wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); i40e_flush(hw); /* Save setting for future output/update */ @@ -2712,6 +2813,8 @@ static u32 i40e_get_priv_flags(struct net_device *dev) I40E_PRIV_FLAGS_VEB_STATS : 0; ret_flags |= pf->flags & I40E_FLAG_RX_PS_ENABLED ? I40E_PRIV_FLAGS_PS : 0; + ret_flags |= pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE ? + 0 : I40E_PRIV_FLAGS_HW_ATR_EVICT; return ret_flags; } @@ -2763,10 +2866,21 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; } - if (flags & I40E_PRIV_FLAGS_VEB_STATS) + if ((flags & I40E_PRIV_FLAGS_VEB_STATS) && + !(pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) { pf->flags |= I40E_FLAG_VEB_STATS_ENABLED; - else + reset_required = true; + } else if (!(flags & I40E_PRIV_FLAGS_VEB_STATS) && + (pf->flags & I40E_FLAG_VEB_STATS_ENABLED)) { pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED; + reset_required = true; + } + + if ((flags & I40E_PRIV_FLAGS_HW_ATR_EVICT) && + (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)) + pf->auto_disable_flags &= ~I40E_FLAG_HW_ATR_EVICT_CAPABLE; + else + pf->auto_disable_flags |= I40E_FLAG_HW_ATR_EVICT_CAPABLE; /* if needed, issue reset to cause things to take effect */ if (reset_required) @@ -2812,6 +2926,8 @@ static const struct ethtool_ops i40e_ethtool_ops = { .get_ts_info = i40e_get_ts_info, .get_priv_flags = i40e_get_priv_flags, .set_priv_flags = i40e_set_priv_flags, + .get_per_queue_coalesce = i40e_get_per_queue_coalesce, + .set_per_queue_coalesce = i40e_set_per_queue_coalesce, }; void i40e_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c index 579a46ca82df..8ad162c16f61 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c +++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c @@ -295,11 +295,11 @@ void i40e_init_pf_fcoe(struct i40e_pf *pf) } /* enable FCoE hash filter */ - val = rd32(hw, I40E_PFQF_HENA(1)); + val = i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)); val |= BIT(I40E_FILTER_PCTYPE_FCOE_OX - 32); val |= BIT(I40E_FILTER_PCTYPE_FCOE_RX - 32); val &= I40E_PFQF_HENA_PTYPE_ENA_MASK; - wr32(hw, I40E_PFQF_HENA(1), val); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), val); /* enable flag */ pf->flags |= I40E_FLAG_FCOE_ENABLED; @@ -317,11 +317,11 @@ void i40e_init_pf_fcoe(struct i40e_pf *pf) pf->filter_settings.fcoe_cntx_num = I40E_DMA_CNTX_SIZE_4K; /* Setup max frame with FCoE_MTU plus L2 overheads */ - val = rd32(hw, I40E_GLFCOE_RCTL); + val = i40e_read_rx_ctl(hw, I40E_GLFCOE_RCTL); val &= ~I40E_GLFCOE_RCTL_MAX_SIZE_MASK; val |= ((FCOE_MTU + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) << I40E_GLFCOE_RCTL_MAX_SIZE_SHIFT); - wr32(hw, I40E_GLFCOE_RCTL, val); + i40e_write_rx_ctl(hw, I40E_GLFCOE_RCTL, val); dev_info(&pf->pdev->dev, "FCoE is supported.\n"); } @@ -1359,16 +1359,32 @@ static netdev_tx_t i40e_fcoe_xmit_frame(struct sk_buff *skb, struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping]; struct i40e_tx_buffer *first; u32 tx_flags = 0; + int fso, count; u8 hdr_len = 0; u8 sof = 0; u8 eof = 0; - int fso; if (i40e_fcoe_set_skb_header(skb)) goto out_drop; - if (!i40e_xmit_descriptor_count(skb, tx_ring)) + count = i40e_xmit_descriptor_count(skb); + if (i40e_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) + goto out_drop; + count = TXD_USE_COUNT(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; + } /* prepare the xmit flags */ if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) @@ -1457,7 +1473,7 @@ static const struct net_device_ops i40e_fcoe_netdev_ops = { .ndo_tx_timeout = i40e_tx_timeout, .ndo_vlan_rx_add_vid = i40e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = i40e_vlan_rx_kill_vid, - .ndo_setup_tc = i40e_setup_tc, + .ndo_setup_tc = __i40e_setup_tc, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40e_netpoll, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8f3b53e0dc46..70d9605a0d9e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -28,11 +28,6 @@ #include <linux/of_net.h> #include <linux/pci.h> -#ifdef CONFIG_SPARC -#include <asm/idprom.h> -#include <asm/prom.h> -#endif - /* Local includes */ #include "i40e.h" #include "i40e_diag.h" @@ -51,7 +46,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 4 -#define DRV_VERSION_BUILD 8 +#define DRV_VERSION_BUILD 25 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -90,6 +85,8 @@ static const struct pci_device_id i40e_pci_tbl[] = { {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0}, @@ -110,6 +107,8 @@ MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +static struct workqueue_struct *i40e_wq; + /** * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure @@ -295,7 +294,7 @@ static void i40e_service_event_schedule(struct i40e_pf *pf) if (!test_bit(__I40E_DOWN, &pf->state) && !test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) && !test_and_set_bit(__I40E_SERVICE_SCHED, &pf->state)) - schedule_work(&pf->service_task); + queue_work(i40e_wq, &pf->service_task); } /** @@ -769,7 +768,7 @@ static void i40e_update_fcoe_stats(struct i40e_vsi *vsi) if (vsi->type != I40E_VSI_FCOE) return; - idx = (pf->pf_seid - I40E_BASE_PF_SEID) + I40E_FCOE_PF_STAT_OFFSET; + idx = hw->pf_id + I40E_FCOE_PF_STAT_OFFSET; fs = &vsi->fcoe_stats; ofs = &vsi->fcoe_stats_offsets; @@ -820,6 +819,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ u32 tx_restart, tx_busy; + u64 tx_lost_interrupt; struct i40e_ring *p; u32 rx_page, rx_buf; u64 bytes, packets; @@ -845,6 +845,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) rx_b = rx_p = 0; tx_b = tx_p = 0; tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; + tx_lost_interrupt = 0; rx_page = 0; rx_buf = 0; rcu_read_lock(); @@ -863,6 +864,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) tx_busy += p->tx_stats.tx_busy; tx_linearize += p->tx_stats.tx_linearize; tx_force_wb += p->tx_stats.tx_force_wb; + tx_lost_interrupt += p->tx_stats.tx_lost_interrupt; /* Rx queue is part of the same block as Tx queue */ p = &p[1]; @@ -881,6 +883,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) vsi->tx_busy = tx_busy; vsi->tx_linearize = tx_linearize; vsi->tx_force_wb = tx_force_wb; + vsi->tx_lost_interrupt = tx_lost_interrupt; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; @@ -1368,7 +1371,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, f->changed = true; INIT_LIST_HEAD(&f->list); - list_add(&f->list, &vsi->mac_filter_list); + list_add_tail(&f->list, &vsi->mac_filter_list); } /* increment counter and add a new flag if needed */ @@ -1538,7 +1541,11 @@ static int i40e_set_mac(struct net_device *netdev, void *p) ether_addr_copy(netdev->dev_addr, addr->sa_data); - return i40e_sync_vsi_filters(vsi); + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); + return 0; } /** @@ -1762,6 +1769,11 @@ bottom_of_search_loop: vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; vsi->back->flags |= I40E_FLAG_FILTER_SYNC; } + + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); } /** @@ -1933,7 +1945,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) sizeof(struct i40e_aqc_remove_macvlan_element_data); del_list_size = filter_list_len * sizeof(struct i40e_aqc_remove_macvlan_element_data); - del_list = kzalloc(del_list_size, GFP_KERNEL); + del_list = kzalloc(del_list_size, GFP_ATOMIC); if (!del_list) { i40e_cleanup_add_list(&tmp_add_list); @@ -2011,7 +2023,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) sizeof(struct i40e_aqc_add_macvlan_element_data), add_list_size = filter_list_len * sizeof(struct i40e_aqc_add_macvlan_element_data); - add_list = kzalloc(add_list_size, GFP_KERNEL); + add_list = kzalloc(add_list_size, GFP_ATOMIC); if (!add_list) { /* Purge element from temporary lists */ i40e_cleanup_add_list(&tmp_add_list); @@ -2110,7 +2122,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) || test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state)); - if (vsi->type == I40E_VSI_MAIN && pf->lan_veb != I40E_NO_VEB) { + if ((vsi->type == I40E_VSI_MAIN) && + (pf->lan_veb != I40E_NO_VEB) && + !(pf->flags & I40E_FLAG_MFP_ENABLED)) { /* set defport ON for Main VSI instead of true promisc * this way we will get all unicast/multicast and VLAN * promisc behavior but will not get VF or VMDq traffic @@ -2160,6 +2174,10 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } } out: + /* if something went wrong then set the changed flag so we try again */ + if (retval) + vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; + clear_bit(__I40E_CONFIG_BUSY, &vsi->state); return retval; } @@ -3106,11 +3124,11 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) struct i40e_q_vector *q_vector = vsi->q_vectors[i]; q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting); q_vector->rx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); + q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting); q_vector->tx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); @@ -3202,10 +3220,10 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) /* set the ITR configuration */ q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); + q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting); q_vector->rx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); + q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting); q_vector->tx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr); @@ -3245,14 +3263,15 @@ void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf) /** * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0 * @pf: board private structure + * @clearpba: true when all pending interrupt events should be cleared **/ -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf) +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba) { struct i40e_hw *hw = &pf->hw; u32 val; val = I40E_PFINT_DYN_CTL0_INTENA_MASK | - I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | + (clearpba ? I40E_PFINT_DYN_CTL0_CLEARPBA_MASK : 0) | (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, val); @@ -3260,22 +3279,6 @@ void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf) } /** - * i40e_irq_dynamic_disable - Disable default interrupt generation settings - * @vsi: pointer to a vsi - * @vector: disable a particular Hw Interrupt vector - **/ -void i40e_irq_dynamic_disable(struct i40e_vsi *vsi, int vector) -{ - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u32 val; - - val = I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; - wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val); - i40e_flush(hw); -} - -/** * i40e_msix_clean_rings - MSIX mode Interrupt Handler * @irq: interrupt number * @data: pointer to a q_vector @@ -3400,7 +3403,7 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++) i40e_irq_dynamic_enable(vsi, i); } else { - i40e_irq_dynamic_enable_icr0(pf); + i40e_irq_dynamic_enable_icr0(pf, true); } i40e_flush(&pf->hw); @@ -3459,16 +3462,12 @@ static irqreturn_t i40e_intr(int irq, void *data) struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_q_vector *q_vector = vsi->q_vectors[0]; - /* temporarily disable queue cause for NAPI processing */ - u32 qval = rd32(hw, I40E_QINT_RQCTL(0)); - - qval &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK; - wr32(hw, I40E_QINT_RQCTL(0), qval); - - qval = rd32(hw, I40E_QINT_TQCTL(0)); - qval &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK; - wr32(hw, I40E_QINT_TQCTL(0), qval); - + /* We do not have a way to disarm Queue causes while leaving + * interrupt enabled for all other causes, ideally + * interrupt should be disabled while we are in NAPI but + * this is not a performance path and napi_schedule() + * can deal with rescheduling. + */ if (!test_bit(__I40E_DOWN, &pf->state)) napi_schedule_irqoff(&q_vector->napi); } @@ -3476,6 +3475,7 @@ static irqreturn_t i40e_intr(int irq, void *data) if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { ena_mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK; set_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state); + i40e_debug(&pf->hw, I40E_DEBUG_NVM, "AdminQ event\n"); } if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK) { @@ -3546,7 +3546,7 @@ enable_intr: wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask); if (!test_bit(__I40E_DOWN, &pf->state)) { i40e_service_event_schedule(pf); - i40e_irq_dynamic_enable_icr0(pf); + i40e_irq_dynamic_enable_icr0(pf, false); } return ret; @@ -3750,7 +3750,7 @@ static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename) #ifdef CONFIG_NET_POLL_CONTROLLER /** - * i40e_netpoll - A Polling 'interrupt'handler + * i40e_netpoll - A Polling 'interrupt' handler * @netdev: network interface device structure * * This is used by netconsole to send skbs without having to re-enable @@ -3929,6 +3929,9 @@ static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable) else rx_reg &= ~I40E_QRX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QRX_ENA(pf_q), rx_reg); + /* No waiting for the Tx queue to disable */ + if (!enable && test_bit(__I40E_PORT_TX_SUSPENDED, &pf->state)) + continue; /* wait for the change to finish */ ret = i40e_pf_rxq_wait(pf, pf_q, enable); @@ -4287,12 +4290,12 @@ static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf) #ifdef CONFIG_I40E_DCB /** - * i40e_vsi_wait_txq_disabled - Wait for VSI's queues to be disabled + * i40e_vsi_wait_queues_disabled - Wait for VSI's queues to be disabled * @vsi: the VSI being configured * - * This function waits for the given VSI's Tx queues to be disabled. + * This function waits for the given VSI's queues to be disabled. **/ -static int i40e_vsi_wait_txq_disabled(struct i40e_vsi *vsi) +static int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret; @@ -4309,24 +4312,36 @@ static int i40e_vsi_wait_txq_disabled(struct i40e_vsi *vsi) } } + pf_q = vsi->base_queue; + for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { + /* Check and wait for the disable status of the queue */ + ret = i40e_pf_rxq_wait(pf, pf_q, false); + if (ret) { + dev_info(&pf->pdev->dev, + "VSI seid %d Rx ring %d disable timeout\n", + vsi->seid, pf_q); + return ret; + } + } + return 0; } /** - * i40e_pf_wait_txq_disabled - Wait for all queues of PF VSIs to be disabled + * i40e_pf_wait_queues_disabled - Wait for all queues of PF VSIs to be disabled * @pf: the PF * - * This function waits for the Tx queues to be in disabled state for all the + * This function waits for the queues to be in disabled state for all the * VSIs that are managed by this PF. **/ -static int i40e_pf_wait_txq_disabled(struct i40e_pf *pf) +static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) { int v, ret = 0; for (v = 0; v < pf->hw.func_caps.num_vsis; v++) { /* No need to wait for FCoE VSI queues */ if (pf->vsi[v] && pf->vsi[v]->type != I40E_VSI_FCOE) { - ret = i40e_vsi_wait_txq_disabled(pf->vsi[v]); + ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]); if (ret) break; } @@ -4352,7 +4367,7 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) { struct i40e_ring *tx_ring = NULL; struct i40e_pf *pf; - u32 head, val, tx_pending; + u32 head, val, tx_pending_hw; int i; pf = vsi->back; @@ -4378,16 +4393,9 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) else val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); - /* Bail out if interrupts are disabled because napi_poll - * execution in-progress or will get scheduled soon. - * napi_poll cleans TX and RX queues and updates 'next_to_clean'. - */ - if (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)) - return; - head = i40e_get_head(tx_ring); - tx_pending = i40e_get_tx_pending(tx_ring); + tx_pending_hw = i40e_get_tx_pending(tx_ring, false); /* HW is done executing descriptors, updated HEAD write back, * but SW hasn't processed those descriptors. If interrupt is @@ -4395,12 +4403,12 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) * dev_watchdog detecting timeout on those netdev_queue, * hence proactively trigger SW interrupt. */ - if (tx_pending) { + if (tx_pending_hw && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) { /* NAPI Poll didn't run and clear since it was set */ if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &tx_ring->q_vector->hung_detected)) { - netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n", - vsi->seid, q_idx, tx_pending, + netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending_hw: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n", + vsi->seid, q_idx, tx_pending_hw, tx_ring->next_to_clean, head, tx_ring->next_to_use, readl(tx_ring->tail)); @@ -4413,6 +4421,17 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) &tx_ring->q_vector->hung_detected); } } + + /* This is the case where we have interrupts missing, + * so the tx_pending in HW will most likely be 0, but we + * will have tx_pending in SW since the WB happened but the + * interrupt got lost. + */ + if ((!tx_pending_hw) && i40e_get_tx_pending(tx_ring, true) && + (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) { + if (napi_reschedule(&tx_ring->q_vector->napi)) + tx_ring->tx_stats.tx_lost_interrupt++; + } } /** @@ -5016,8 +5035,7 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf) int err = 0; /* Do not enable DCB for SW1 and SW2 images even if the FW is capable */ - if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || - (pf->hw.aq.fw_maj_ver < 4)) + if (pf->flags & I40E_FLAG_NO_DCB_SUPPORT) goto out; /* Get the initial DCB configuration */ @@ -5249,11 +5267,7 @@ void i40e_down(struct i40e_vsi *vsi) * @netdev: net device to configure * @tc: number of traffic classes to enable **/ -#ifdef I40E_FCOE -int i40e_setup_tc(struct net_device *netdev, u8 tc) -#else static int i40e_setup_tc(struct net_device *netdev, u8 tc) -#endif { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; @@ -5306,6 +5320,19 @@ exit: return ret; } +#ifdef I40E_FCOE +int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +#else +static int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +#endif +{ + if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + return i40e_setup_tc(netdev, tc->tc); +} + /** * i40e_open - Called when a network interface is made active * @netdev: network interface device structure @@ -5348,7 +5375,8 @@ int i40e_open(struct net_device *netdev) vxlan_get_rx_port(netdev); #endif #ifdef CONFIG_I40E_GENEVE - geneve_get_rx_port(netdev); + if (pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE) + geneve_get_rx_port(netdev); #endif return 0; @@ -5713,8 +5741,8 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, if (ret) goto exit; - /* Wait for the PF's Tx queues to be disabled */ - ret = i40e_pf_wait_txq_disabled(pf); + /* Wait for the PF's queues to be disabled */ + ret = i40e_pf_wait_queues_disabled(pf); if (ret) { /* Schedule PF reset to recover */ set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); @@ -6244,6 +6272,7 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) if (hw->debug_mask & I40E_DEBUG_AQ) dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n"); val &= ~I40E_PF_ARQLEN_ARQOVFL_MASK; + pf->arq_overflows++; } if (val & I40E_PF_ARQLEN_ARQCRIT_MASK) { if (hw->debug_mask & I40E_DEBUG_AQ) @@ -6319,7 +6348,9 @@ static void i40e_clean_adminq_subtask(struct i40e_pf *pf) case i40e_aqc_opc_nvm_erase: case i40e_aqc_opc_nvm_update: case i40e_aqc_opc_oem_post_update: - i40e_debug(&pf->hw, I40E_DEBUG_NVM, "ARQ NVM operation completed\n"); + i40e_debug(&pf->hw, I40E_DEBUG_NVM, + "ARQ NVM operation 0x%04x completed\n", + opcode); break; default: dev_info(&pf->pdev->dev, @@ -6803,12 +6834,12 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) if (ret) goto end_core_reset; - /* driver is only interested in link up/down and module qualification - * reports from firmware + /* The driver only wants link up/down and module qualification + * reports from firmware. Note the negative logic. */ ret = i40e_aq_set_phy_int_mask(&pf->hw, - I40E_AQ_EVENT_LINK_UPDOWN | - I40E_AQ_EVENT_MODULE_QUAL_FAIL, NULL); + ~(I40E_AQ_EVENT_LINK_UPDOWN | + I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (ret) dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n", i40e_stat_str(&pf->hw, ret), @@ -6889,8 +6920,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) wr32(hw, I40E_REG_MSS, val); } - if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || - (pf->hw.aq.fw_maj_ver < 4)) { + if (pf->flags & I40E_FLAG_RESTART_AUTONEG) { msleep(75); ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (ret) @@ -7079,12 +7109,13 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) ret = i40e_aq_del_udp_tunnel(hw, i, NULL); if (ret) { - dev_info(&pf->pdev->dev, - "%s vxlan port %d, index %d failed, err %s aq_err %s\n", - port ? "add" : "delete", - ntohs(port), i, - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, + dev_dbg(&pf->pdev->dev, + "%s %s port %d, index %d failed, err %s aq_err %s\n", + pf->udp_ports[i].type ? "vxlan" : "geneve", + port ? "add" : "delete", + ntohs(port), i, + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); pf->udp_ports[i].index = 0; } @@ -7111,6 +7142,7 @@ static void i40e_service_task(struct work_struct *work) } i40e_detect_recover_hung(pf); + i40e_sync_filters_subtask(pf); i40e_reset_subtask(pf); i40e_handle_mdd_event(pf); i40e_vc_process_vflr_event(pf); @@ -7290,8 +7322,6 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) set_bit(__I40E_DOWN, &vsi->state); vsi->flags = 0; vsi->idx = vsi_idx; - vsi->rx_itr_setting = pf->rx_itr_default; - vsi->tx_itr_setting = pf->tx_itr_default; vsi->int_rate_limit = 0; vsi->rss_table_size = (vsi->type == I40E_VSI_MAIN) ? pf->rss_table_size : 64; @@ -7458,8 +7488,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) tx_ring->dcb_tc = 0; if (vsi->back->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; - if (vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) - tx_ring->flags |= I40E_TXR_FLAGS_OUTER_UDP_CSUM; + tx_ring->tx_itr_setting = pf->tx_itr_default; vsi->tx_rings[i] = tx_ring; rx_ring = &tx_ring[1]; @@ -7476,6 +7505,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) set_ring_16byte_desc_enabled(rx_ring); else clear_ring_16byte_desc_enabled(rx_ring); + rx_ring->rx_itr_setting = pf->rx_itr_default; vsi->rx_rings[i] = rx_ring; } @@ -7852,7 +7882,7 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) i40e_flush(hw); - i40e_irq_dynamic_enable_icr0(pf); + i40e_irq_dynamic_enable_icr0(pf, true); return err; } @@ -7936,6 +7966,52 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi) } /** + * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands + * @vsi: Pointer to vsi structure + * @seed: Buffter to store the hash keys + * @lut: Buffer to store the lookup table entries + * @lut_size: Size of buffer to store the lookup table entries + * + * Return 0 on success, negative on failure + */ +static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int ret = 0; + + if (seed) { + ret = i40e_aq_get_rss_key(hw, vsi->id, + (struct i40e_aqc_get_set_rss_key_data *)seed); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot get RSS key, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + return ret; + } + } + + if (lut) { + bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; + + ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot get RSS lut, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + return ret; + } + } + + return ret; +} + +/** * i40e_config_rss_reg - Configure RSS keys and lut by writing registers * @vsi: Pointer to vsi structure * @seed: RSS hash seed @@ -7956,7 +8032,7 @@ static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, u32 *seed_dw = (u32 *)seed; for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) - wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]); + i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), seed_dw[i]); } if (lut) { @@ -7993,7 +8069,7 @@ static int i40e_get_rss_reg(struct i40e_vsi *vsi, u8 *seed, u32 *seed_dw = (u32 *)seed; for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) - seed_dw[i] = rd32(hw, I40E_PFQF_HKEY(i)); + seed_dw[i] = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i)); } if (lut) { u32 *lut_dw = (u32 *)lut; @@ -8037,7 +8113,12 @@ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) */ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) { - return i40e_get_rss_reg(vsi, seed, lut, lut_size); + struct i40e_pf *pf = vsi->back; + + if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) + return i40e_get_rss_aq(vsi, seed, lut, lut_size); + else + return i40e_get_rss_reg(vsi, seed, lut, lut_size); } /** @@ -8071,19 +8152,19 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) int ret; /* By default we enable TCP/UDP with IPv4/IPv6 ptypes */ - hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) | - ((u64)rd32(hw, I40E_PFQF_HENA(1)) << 32); + hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | + ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); hena |= i40e_pf_get_default_rss_hena(pf); - wr32(hw, I40E_PFQF_HENA(0), (u32)hena); - wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); /* Determine the RSS table size based on the hardware capabilities */ - reg_val = rd32(hw, I40E_PFQF_CTL_0); + reg_val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0); reg_val = (pf->rss_table_size == 512) ? (reg_val | I40E_PFQF_CTL_0_HASHLUTSIZE_512) : (reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512); - wr32(hw, I40E_PFQF_CTL_0, reg_val); + i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, reg_val); /* Determine the RSS size of the VSI */ if (!vsi->rss_size) @@ -8367,6 +8448,26 @@ static int i40e_sw_init(struct i40e_pf *pf) pf->hw.func_caps.fd_filters_best_effort; } + if (i40e_is_mac_710(&pf->hw) && + (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || + (pf->hw.aq.fw_maj_ver < 4))) { + pf->flags |= I40E_FLAG_RESTART_AUTONEG; + /* No DCB support for FW < v4.33 */ + pf->flags |= I40E_FLAG_NO_DCB_SUPPORT; + } + + /* Disable FW LLDP if FW < v4.3 */ + if (i40e_is_mac_710(&pf->hw) && + (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || + (pf->hw.aq.fw_maj_ver < 4))) + pf->flags |= I40E_FLAG_STOP_FW_LLDP; + + /* Use the FW Set LLDP MIB API if FW > v4.40 */ + if (i40e_is_mac_710(&pf->hw) && + (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) || + (pf->hw.aq.fw_maj_ver >= 5))) + pf->flags |= I40E_FLAG_USE_SET_LLDP_MIB; + if (pf->hw.func_caps.vmdq) { pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI; pf->flags |= I40E_FLAG_VMDQ_ENABLED; @@ -8393,8 +8494,19 @@ static int i40e_sw_init(struct i40e_pf *pf) I40E_FLAG_OUTER_UDP_CSUM_CAPABLE | I40E_FLAG_WB_ON_ITR_CAPABLE | I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE | + I40E_FLAG_100M_SGMII_CAPABLE | + I40E_FLAG_USE_SET_LLDP_MIB | I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; + } else if ((pf->hw.aq.api_maj_ver > 1) || + ((pf->hw.aq.api_maj_ver == 1) && + (pf->hw.aq.api_min_ver > 4))) { + /* Supported in FW API version higher than 1.4 */ + pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; + pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE; + } else { + pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE; } + pf->eeprom_version = 0xDEAD; pf->lan_veb = I40E_NO_VEB; pf->lan_vsi = I40E_NO_VSI; @@ -8530,9 +8642,6 @@ static void i40e_add_vxlan_port(struct net_device *netdev, u8 next_idx; u8 idx; - if (sa_family == AF_INET6) - return; - idx = i40e_get_udp_port_idx(pf, port); /* Check if port already exists */ @@ -8572,9 +8681,6 @@ static void i40e_del_vxlan_port(struct net_device *netdev, struct i40e_pf *pf = vsi->back; u8 idx; - if (sa_family == AF_INET6) - return; - idx = i40e_get_udp_port_idx(pf, port); /* Check if port already exists */ @@ -8608,7 +8714,7 @@ static void i40e_add_geneve_port(struct net_device *netdev, u8 next_idx; u8 idx; - if (sa_family == AF_INET6) + if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE)) return; idx = i40e_get_udp_port_idx(pf, port); @@ -8652,7 +8758,7 @@ static void i40e_del_geneve_port(struct net_device *netdev, struct i40e_pf *pf = vsi->back; u8 idx; - if (sa_family == AF_INET6) + if (!(pf->flags & I40E_FLAG_GENEVE_OFFLOAD_CAPABLE)) return; idx = i40e_get_udp_port_idx(pf, port); @@ -8890,7 +8996,7 @@ static const struct net_device_ops i40e_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40e_netpoll, #endif - .ndo_setup_tc = i40e_setup_tc, + .ndo_setup_tc = __i40e_setup_tc, #ifdef I40E_FCOE .ndo_fcoe_enable = i40e_fcoe_enable, .ndo_fcoe_disable = i40e_fcoe_disable, @@ -8942,11 +9048,15 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) np = netdev_priv(netdev); np->vsi = vsi; - netdev->hw_enc_features |= NETIF_F_IP_CSUM | - NETIF_F_RXCSUM | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_GRE | - NETIF_F_TSO; + netdev->hw_enc_features |= NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + 0; netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | @@ -8967,6 +9077,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) netdev->features |= NETIF_F_NTUPLE; + if (pf->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) + netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; @@ -9471,10 +9583,15 @@ vector_setup_out: **/ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi) { - struct i40e_pf *pf = vsi->back; + struct i40e_pf *pf; u8 enabled_tc; int ret; + if (!vsi) + return NULL; + + pf = vsi->back; + i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx); i40e_vsi_clear_rings(vsi); @@ -9975,13 +10092,13 @@ static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi) { struct i40e_pf *pf = veb->pf; bool is_default = veb->pf->cur_promisc; - bool is_cloud = false; + bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED); int ret; /* get a VEB from the hardware */ ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid, veb->enabled_tc, is_default, - is_cloud, &veb->seid, NULL); + &veb->seid, enable_stats, NULL); if (ret) { dev_info(&pf->pdev->dev, "couldn't add VEB, err %s aq_err %s\n", @@ -10538,21 +10655,9 @@ static void i40e_print_features(struct i40e_pf *pf) **/ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf) { - struct device_node *dp = pci_device_to_OF_node(pdev); - const unsigned char *addr; - u8 *mac_addr = pf->hw.mac.addr; - pf->flags &= ~I40E_FLAG_PF_MAC; - addr = of_get_mac_address(dp); - if (addr) { - ether_addr_copy(mac_addr, addr); + if (!eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr)) pf->flags |= I40E_FLAG_PF_MAC; -#ifdef CONFIG_SPARC - } else { - ether_addr_copy(mac_addr, idprom->id_ethaddr); - pf->flags |= I40E_FLAG_PF_MAC; -#endif /* CONFIG_SPARC */ - } } /** @@ -10575,7 +10680,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u16 wol_nvm_bits; u16 link_status; int err; - u32 len; u32 val; u32 i; u8 set_fc_aq_fail; @@ -10758,8 +10862,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * Ignore error return codes because if it was already disabled via * hardware settings this will fail */ - if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) || - (pf->hw.aq.fw_maj_ver < 4)) { + if (pf->flags & I40E_FLAG_STOP_FW_LLDP) { dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n"); i40e_aq_stop_lldp(hw, true, NULL); } @@ -10834,8 +10937,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->num_alloc_vsi = pf->hw.func_caps.num_vsis; /* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */ - len = sizeof(struct i40e_vsi *) * pf->num_alloc_vsi; - pf->vsi = kzalloc(len, GFP_KERNEL); + pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *), + GFP_KERNEL); if (!pf->vsi) { err = -ENOMEM; goto err_switch_setup; @@ -10882,12 +10985,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } - /* driver is only interested in link up/down and module qualification - * reports from firmware + /* The driver only wants link up/down and module qualification + * reports from firmware. Note the negative logic. */ err = i40e_aq_set_phy_int_mask(&pf->hw, - I40E_AQ_EVENT_LINK_UPDOWN | - I40E_AQ_EVENT_MODULE_QUAL_FAIL, NULL); + ~(I40E_AQ_EVENT_LINK_UPDOWN | + I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (err) dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n", i40e_stat_str(&pf->hw, err), @@ -10904,8 +11007,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) wr32(hw, I40E_REG_MSS, val); } - if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || - (pf->hw.aq.fw_maj_ver < 4)) { + if (pf->flags & I40E_FLAG_RESTART_AUTONEG) { msleep(75); err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (err) @@ -10939,8 +11041,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) && (pf->flags & I40E_FLAG_MSIX_ENABLED) && !test_bit(__I40E_BAD_EEPROM, &pf->state)) { - u32 val; - /* disable link interrupts for VFs */ val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM); val &= ~I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK; @@ -11051,6 +11151,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw, pf->main_vsi_seid); + if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) || + (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4)) + pf->flags |= I40E_FLAG_HAVE_10GBASET_PHY; + /* print a string summarizing features */ i40e_print_features(pf); @@ -11107,10 +11211,11 @@ static void i40e_remove(struct pci_dev *pdev) i40e_ptp_stop(pf); /* Disable RSS in hw */ - wr32(hw, I40E_PFQF_HENA(0), 0); - wr32(hw, I40E_PFQF_HENA(1), 0); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0); + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0); /* no more scheduling of any task */ + set_bit(__I40E_SUSPENDED, &pf->state); set_bit(__I40E_DOWN, &pf->state); del_timer_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); @@ -11141,8 +11246,8 @@ static void i40e_remove(struct pci_dev *pdev) i40e_vsi_release(pf->vsi[pf->lan_vsi]); /* shutdown and destroy the HMC */ - if (pf->hw.hmc.hmc_obj) { - ret_code = i40e_shutdown_lan_hmc(&pf->hw); + if (hw->hmc.hmc_obj) { + ret_code = i40e_shutdown_lan_hmc(hw); if (ret_code) dev_warn(&pdev->dev, "Failed to destroy the HMC resources: %d\n", @@ -11150,7 +11255,7 @@ static void i40e_remove(struct pci_dev *pdev) } /* shutdown the adminq */ - ret_code = i40e_shutdown_adminq(&pf->hw); + ret_code = i40e_shutdown_adminq(hw); if (ret_code) dev_warn(&pdev->dev, "Failed to destroy the Admin Queue resources: %d\n", @@ -11178,7 +11283,7 @@ static void i40e_remove(struct pci_dev *pdev) kfree(pf->qp_pile); kfree(pf->vsi); - iounmap(pf->hw.hw_addr); + iounmap(hw->hw_addr); kfree(pf); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); @@ -11413,6 +11518,16 @@ static int __init i40e_init_module(void) i40e_driver_string, i40e_driver_version_str); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); + /* we will see if single thread per module is enough for now, + * it can't be any worse than using the system workqueue which + * was already single threaded + */ + i40e_wq = create_singlethread_workqueue(i40e_driver_name); + if (!i40e_wq) { + pr_err("%s: Failed to create workqueue\n", i40e_driver_name); + return -ENOMEM; + } + i40e_dbg_init(); return pci_register_driver(&i40e_driver); } @@ -11427,6 +11542,7 @@ module_init(i40e_init_module); static void __exit i40e_exit_module(void) { pci_unregister_driver(&i40e_driver); + destroy_workqueue(i40e_wq); i40e_dbg_exit(); } module_exit(i40e_exit_module); diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 6100cdd9ad13..5730f8091e1b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -693,10 +693,11 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, /* early check for status command and debug msgs */ upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno); - i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d\n", + i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n", i40e_nvm_update_state_str[upd_cmd], hw->nvmupd_state, - hw->aq.nvm_release_on_done); + hw->aq.nvm_release_on_done, + cmd->command, cmd->config, cmd->offset, cmd->data_size); if (upd_cmd == I40E_NVMUPD_INVALID) { *perrno = -EFAULT; diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index bb9d583e5416..d51eee5bf79a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -74,6 +74,12 @@ i40e_status i40e_aq_set_rss_key(struct i40e_hw *hw, u32 i40e_led_get(struct i40e_hw *hw); void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink); +i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on, + u16 led_addr, u32 mode); +i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, + u16 *val); +i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw, + u32 time, u32 interval); /* admin send queue commands */ @@ -127,6 +133,9 @@ i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, + u16 seid, bool enable, + struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_get_vsi_params(struct i40e_hw *hw, struct i40e_vsi_context *vsi_ctx, struct i40e_asq_cmd_details *cmd_details); @@ -135,8 +144,8 @@ i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid, u16 downlink_seid, u8 enabled_tc, - bool default_port, bool enable_l2_filtering, - u16 *pveb_seid, + bool default_port, u16 *pveb_seid, + bool enable_stats, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_get_veb_parameters(struct i40e_hw *hw, u16 veb_seid, u16 *switch_id, bool *floating, @@ -149,6 +158,15 @@ i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id, i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id, struct i40e_aqc_remove_macvlan_element_data *mv_list, u16 count, struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid, + u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rule_id, u16 *rules_used, u16 *rules_free); +i40e_status i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid, + u16 rule_type, u16 rule_id, u16 count, __le16 *mr_list, + struct i40e_asq_cmd_details *cmd_details, + u16 *rules_used, u16 *rules_free); + i40e_status i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen, struct i40e_asq_cmd_details *cmd_details); @@ -324,4 +342,19 @@ i40e_status i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id, struct i40e_asq_cmd_details *cmd_details); void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw, u16 vsi_seid); +i40e_status i40e_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); +u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr); +i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); +i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, + u16 reg, u8 phy_addr, u16 *value); +i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, + u16 reg, u8 phy_addr, u16 value); +u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num); +i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw, + u32 time, u32 interval); #endif /* _I40E_PROTOTYPE_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index dc0402fe3370..86ca27f72f02 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -2045,6 +2045,14 @@ #define I40E_PRTPM_TLPIC 0x001E43C0 /* Reset: GLOBR */ #define I40E_PRTPM_TLPIC_ETLPIC_SHIFT 0 #define I40E_PRTPM_TLPIC_ETLPIC_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTPM_TLPIC_ETLPIC_SHIFT) +#define I40E_GL_PRS_FVBM(_i) (0x00269760 + ((_i) * 4)) /* _i=0...3 */ /* Reset: CORER */ +#define I40E_GL_PRS_FVBM_MAX_INDEX 3 +#define I40E_GL_PRS_FVBM_FV_BYTE_INDX_SHIFT 0 +#define I40E_GL_PRS_FVBM_FV_BYTE_INDX_MASK I40E_MASK(0x7F, I40E_GL_PRS_FVBM_FV_BYTE_INDX_SHIFT) +#define I40E_GL_PRS_FVBM_RULE_BUS_INDX_SHIFT 8 +#define I40E_GL_PRS_FVBM_RULE_BUS_INDX_MASK I40E_MASK(0x3F, I40E_GL_PRS_FVBM_RULE_BUS_INDX_SHIFT) +#define I40E_GL_PRS_FVBM_MSK_ENA_SHIFT 31 +#define I40E_GL_PRS_FVBM_MSK_ENA_MASK I40E_MASK(0x1, I40E_GL_PRS_FVBM_MSK_ENA_SHIFT) #define I40E_GLRPB_DPSS 0x000AC828 /* Reset: CORER */ #define I40E_GLRPB_DPSS_DPS_TCN_SHIFT 0 #define I40E_GLRPB_DPSS_DPS_TCN_MASK I40E_MASK(0xFFFFF, I40E_GLRPB_DPSS_DPS_TCN_SHIFT) @@ -2216,6 +2224,14 @@ #define I40E_PRTQF_FD_FLXINSET_MAX_INDEX 63 #define I40E_PRTQF_FD_FLXINSET_INSET_SHIFT 0 #define I40E_PRTQF_FD_FLXINSET_INSET_MASK I40E_MASK(0xFF, I40E_PRTQF_FD_FLXINSET_INSET_SHIFT) +#define I40E_PRTQF_FD_INSET(_i, _j) (0x00250000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */ /* Reset: CORER */ +#define I40E_PRTQF_FD_INSET_MAX_INDEX 63 +#define I40E_PRTQF_FD_INSET_INSET_SHIFT 0 +#define I40E_PRTQF_FD_INSET_INSET_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTQF_FD_INSET_INSET_SHIFT) +#define I40E_PRTQF_FD_INSET(_i, _j) (0x00250000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */ /* Reset: CORER */ +#define I40E_PRTQF_FD_INSET_MAX_INDEX 63 +#define I40E_PRTQF_FD_INSET_INSET_SHIFT 0 +#define I40E_PRTQF_FD_INSET_INSET_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTQF_FD_INSET_INSET_SHIFT) #define I40E_PRTQF_FD_MSK(_i, _j) (0x00252000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */ /* Reset: CORER */ #define I40E_PRTQF_FD_MSK_MAX_INDEX 63 #define I40E_PRTQF_FD_MSK_MASK_SHIFT 0 @@ -5155,6 +5171,38 @@ #define I40E_GLQF_FD_PCTYPES_MAX_INDEX 63 #define I40E_GLQF_FD_PCTYPES_FD_PCTYPE_SHIFT 0 #define I40E_GLQF_FD_PCTYPES_FD_PCTYPE_MASK I40E_MASK(0x3F, I40E_GLQF_FD_PCTYPES_FD_PCTYPE_SHIFT) +#define I40E_GLQF_FD_MSK(_i, _j) (0x00267200 + ((_i) * 4 + (_j) * 8)) /* _i=0...1, _j=0...63 */ /* Reset: CORER */ +#define I40E_GLQF_FD_MSK_MAX_INDEX 1 +#define I40E_GLQF_FD_MSK_MASK_SHIFT 0 +#define I40E_GLQF_FD_MSK_MASK_MASK I40E_MASK(0xFFFF, I40E_GLQF_FD_MSK_MASK_SHIFT) +#define I40E_GLQF_FD_MSK_OFFSET_SHIFT 16 +#define I40E_GLQF_FD_MSK_OFFSET_MASK I40E_MASK(0x3F, I40E_GLQF_FD_MSK_OFFSET_SHIFT) +#define I40E_GLQF_HASH_INSET(_i, _j) (0x00267600 + ((_i) * 4 + (_j) * 8)) /* _i=0...1, _j=0...63 */ /* Reset: CORER */ +#define I40E_GLQF_HASH_INSET_MAX_INDEX 1 +#define I40E_GLQF_HASH_INSET_INSET_SHIFT 0 +#define I40E_GLQF_HASH_INSET_INSET_MASK I40E_MASK(0xFFFFFFFF, I40E_GLQF_HASH_INSET_INSET_SHIFT) +#define I40E_GLQF_HASH_MSK(_i, _j) (0x00267A00 + ((_i) * 4 + (_j) * 8)) /* _i=0...1, _j=0...63 */ /* Reset: CORER */ +#define I40E_GLQF_HASH_MSK_MAX_INDEX 1 +#define I40E_GLQF_HASH_MSK_MASK_SHIFT 0 +#define I40E_GLQF_HASH_MSK_MASK_MASK I40E_MASK(0xFFFF, I40E_GLQF_HASH_MSK_MASK_SHIFT) +#define I40E_GLQF_HASH_MSK_OFFSET_SHIFT 16 +#define I40E_GLQF_HASH_MSK_OFFSET_MASK I40E_MASK(0x3F, I40E_GLQF_HASH_MSK_OFFSET_SHIFT) +#define I40E_GLQF_ORT(_i) (0x00268900 + ((_i) * 4)) /* _i=0...63 */ /* Reset: CORER */ +#define I40E_GLQF_ORT_MAX_INDEX 63 +#define I40E_GLQF_ORT_PIT_INDX_SHIFT 0 +#define I40E_GLQF_ORT_PIT_INDX_MASK I40E_MASK(0x1F, I40E_GLQF_ORT_PIT_INDX_SHIFT) +#define I40E_GLQF_ORT_FIELD_CNT_SHIFT 5 +#define I40E_GLQF_ORT_FIELD_CNT_MASK I40E_MASK(0x3, I40E_GLQF_ORT_FIELD_CNT_SHIFT) +#define I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT 7 +#define I40E_GLQF_ORT_FLX_PAYLOAD_MASK I40E_MASK(0x1, I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT) +#define I40E_GLQF_PIT(_i) (0x00268C80 + ((_i) * 4)) /* _i=0...23 */ /* Reset: CORER */ +#define I40E_GLQF_PIT_MAX_INDEX 23 +#define I40E_GLQF_PIT_SOURCE_OFF_SHIFT 0 +#define I40E_GLQF_PIT_SOURCE_OFF_MASK I40E_MASK(0x1F, I40E_GLQF_PIT_SOURCE_OFF_SHIFT) +#define I40E_GLQF_PIT_FSIZE_SHIFT 5 +#define I40E_GLQF_PIT_FSIZE_MASK I40E_MASK(0x1F, I40E_GLQF_PIT_FSIZE_SHIFT) +#define I40E_GLQF_PIT_DEST_OFF_SHIFT 10 +#define I40E_GLQF_PIT_DEST_OFF_MASK I40E_MASK(0x3F, I40E_GLQF_PIT_DEST_OFF_SHIFT) #define I40E_GLQF_FDEVICTENA(_i) (0x00270384 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */ #define I40E_GLQF_FDEVICTENA_MAX_INDEX 1 #define I40E_GLQF_FDEVICTENA_GLQF_FDEVICTENA_SHIFT 0 diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 47bd8b3145a7..084d0ab316b7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -610,15 +610,19 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) /** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors + * @in_sw: is tx_pending being checked in SW or HW * * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 i40e_get_tx_pending(struct i40e_ring *ring) +u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) { u32 head, tail; - head = i40e_get_head(ring); + if (!in_sw) + head = i40e_get_head(ring); + else + head = ring->next_to_clean; tail = readl(ring->tail); if (head != tail) @@ -741,7 +745,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) * them to be written back in case we stay in NAPI. * In this mode on X722 we do not enable Interrupt. */ - j = i40e_get_tx_pending(tx_ring); + j = i40e_get_tx_pending(tx_ring, false); if (budget && ((j / (WB_STRIDE + 1)) == 0) && (j != 0) && @@ -774,29 +778,48 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) } /** - * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors + * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled * @vsi: the VSI we care about - * @q_vector: the vector on which to force writeback + * @q_vector: the vector on which to enable writeback * **/ -void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, + struct i40e_q_vector *q_vector) { u16 flags = q_vector->tx.ring[0].flags; + u32 val; - if (flags & I40E_TXR_FLAGS_WB_ON_ITR) { - u32 val; + if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR)) + return; - if (q_vector->arm_wb_state) - return; + if (q_vector->arm_wb_state) + return; - val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK; + if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { + val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK | + I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */ wr32(&vsi->back->hw, - I40E_PFINT_DYN_CTLN(q_vector->v_idx + - vsi->base_vector - 1), + I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1), val); - q_vector->arm_wb_state = true; - } else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { + } else { + val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK | + I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */ + + wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val); + } + q_vector->arm_wb_state = true; +} + +/** + * i40e_force_wb - Issue SW Interrupt so HW does a wb + * @vsi: the VSI we care about + * @q_vector: the vector on which to force writeback + * + **/ +void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +{ + if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */ I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | @@ -1041,7 +1064,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (rx_bi->page_dma) { dma_unmap_page(dev, rx_bi->page_dma, - PAGE_SIZE / 2, + PAGE_SIZE, DMA_FROM_DEVICE); rx_bi->page_dma = 0; } @@ -1176,16 +1199,19 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; struct i40e_rx_buffer *bi; + const int current_node = numa_node_id(); /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -1193,56 +1219,79 @@ void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) if (bi->skb) /* desc is in use */ goto no_buffers; + + /* If we've been moved to a different NUMA node, release the + * page so we can get a new one on the current node. + */ + if (bi->page && page_to_nid(bi->page) != current_node) { + dma_unmap_page(rx_ring->dev, + bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page(bi->page); + bi->page = NULL; + bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else if (bi->page) { + rx_ring->rx_stats.page_reuse_count++; + } + if (!bi->page) { bi->page = alloc_page(GFP_ATOMIC); if (!bi->page) { rx_ring->rx_stats.alloc_page_failed++; goto no_buffers; } - } - - if (!bi->page_dma) { - /* use a half page if we're re-using */ - bi->page_offset ^= PAGE_SIZE / 2; bi->page_dma = dma_map_page(rx_ring->dev, bi->page, - bi->page_offset, - PAGE_SIZE / 2, + 0, + PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(rx_ring->dev, - bi->page_dma)) { + if (dma_mapping_error(rx_ring->dev, bi->page_dma)) { rx_ring->rx_stats.alloc_page_failed++; + __free_page(bi->page); + bi->page = NULL; bi->page_dma = 0; + bi->page_offset = 0; goto no_buffers; } + bi->page_offset = 0; } - dma_sync_single_range_for_device(rx_ring->dev, - bi->dma, - 0, - rx_ring->rx_hdr_len, - DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); + rx_desc->read.pkt_addr = + cpu_to_le64(bi->page_dma + bi->page_offset); rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); i++; if (i == rx_ring->count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; @@ -1251,7 +1300,7 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -1259,8 +1308,10 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) skb = bi->skb; if (!skb) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_buf_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_buf_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; goto no_buffers; @@ -1278,6 +1329,8 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) if (dma_mapping_error(rx_ring->dev, bi->dma)) { rx_ring->rx_stats.alloc_buff_failed++; bi->dma = 0; + dev_kfree_skb(bi->skb); + bi->skb = NULL; goto no_buffers; } } @@ -1289,9 +1342,19 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** @@ -1326,16 +1389,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, u16 rx_ptype) { struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); - bool ipv4 = false, ipv6 = false; - bool ipv4_tunnel, ipv6_tunnel; - __wsum rx_udp_csum; - struct iphdr *iph; - __sum16 csum; - - ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); - ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); + bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel; skb->ip_summed = CHECKSUM_NONE; @@ -1351,12 +1405,10 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (!(decoded.known && decoded.outer_ip)) return; - if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) - ipv4 = true; - else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) - ipv6 = true; + ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); + ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); if (ipv4 && (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | @@ -1380,37 +1432,17 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) return; - /* If VXLAN/GENEVE traffic has an outer UDPv4 checksum we need to check - * it in the driver, hardware does not do it for us. - * Since L3L4P bit was set we assume a valid IHL value (>=5) - * so the total length of IPv4 header is IHL*4 bytes - * The UDP_0 bit *may* bet set if the *inner* header is UDP + /* The hardware supported by this driver does not validate outer + * checksums for tunneled VXLAN or GENEVE frames. I don't agree + * with it but the specification states that you "MAY validate", it + * doesn't make it a hard requirement so if we have validated the + * inner checksum report CHECKSUM_UNNECESSARY. */ - if (!(vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) && - (ipv4_tunnel)) { - skb->transport_header = skb->mac_header + - sizeof(struct ethhdr) + - (ip_hdr(skb)->ihl * 4); - - /* Add 4 bytes for VLAN tagged packets */ - skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) - ? VLAN_HLEN : 0; - - if ((ip_hdr(skb)->protocol == IPPROTO_UDP) && - (udp_hdr(skb)->check != 0)) { - rx_udp_csum = udp_csum(skb); - iph = ip_hdr(skb); - csum = csum_tcpudp_magic( - iph->saddr, iph->daddr, - (skb->len - skb_transport_offset(skb)), - IPPROTO_UDP, rx_udp_csum); - - if (udp_hdr(skb)->check != csum) - goto checksum_fail; - - } /* else its GRE and so no outer UDP header */ - } + + ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); + ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = ipv4_tunnel || ipv6_tunnel; @@ -1475,18 +1507,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring, * * Returns true if there's any budget left (e.g. the clean is finished) **/ -static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) +static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - const int current_node = numa_mem_id(); struct i40e_vsi *vsi = rx_ring->vsi; u16 i = rx_ring->next_to_clean; union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; + bool failure = false; u8 rx_ptype; u64 qword; + u32 copysize; if (budget <= 0) return 0; @@ -1497,7 +1530,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count); + failure = failure || + i40e_alloc_rx_buffers_ps(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -1515,6 +1550,12 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) * DD bit is set. */ dma_rmb(); + /* sync header buffer for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, + rx_ring->rx_hdr_len, + DMA_FROM_DEVICE); if (i40e_rx_is_programming_status(qword)) { i40e_clean_programming_status(rx_ring, rx_desc); I40E_RX_INCREMENT(rx_ring, i); @@ -1523,10 +1564,13 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_bi = &rx_ring->rx_bi[i]; skb = rx_bi->skb; if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_hdr_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_hdr_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; + failure = true; break; } @@ -1534,8 +1578,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) skb_record_rx_queue(skb, rx_ring->queue_index); /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, - rx_bi->dma, - 0, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, rx_ring->rx_hdr_len, DMA_FROM_DEVICE); } @@ -1553,9 +1597,16 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; - prefetch(rx_bi->page); + /* sync half-page for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_bi->page_dma, + rx_bi->page_offset, + PAGE_SIZE / 2, + DMA_FROM_DEVICE); + prefetch(page_address(rx_bi->page) + rx_bi->page_offset); rx_bi->skb = NULL; cleaned_count++; + copysize = 0; if (rx_hbo || rx_sph) { int len; @@ -1566,38 +1617,50 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len); } else if (skb->len == 0) { int len; + unsigned char *va = page_address(rx_bi->page) + + rx_bi->page_offset; - len = (rx_packet_len > skb_headlen(skb) ? - skb_headlen(skb) : rx_packet_len); - memcpy(__skb_put(skb, len), - rx_bi->page + rx_bi->page_offset, - len); - rx_bi->page_offset += len; + len = min(rx_packet_len, rx_ring->rx_hdr_len); + memcpy(__skb_put(skb, len), va, len); + copysize = len; rx_packet_len -= len; } - /* Get the rest of the data if this was a header split */ if (rx_packet_len) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_bi->page, - rx_bi->page_offset, - rx_packet_len); - - skb->len += rx_packet_len; - skb->data_len += rx_packet_len; - skb->truesize += rx_packet_len; - - if ((page_count(rx_bi->page) == 1) && - (page_to_nid(rx_bi->page) == current_node)) - get_page(rx_bi->page); - else + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + rx_bi->page, + rx_bi->page_offset + copysize, + rx_packet_len, I40E_RXBUFFER_2048); + + /* If the page count is more than 2, then both halves + * of the page are used and we need to free it. Do it + * here instead of in the alloc code. Otherwise one + * of the half-pages might be released between now and + * then, and we wouldn't know which one to use. + * Don't call get_page and free_page since those are + * both expensive atomic operations that just change + * the refcount in opposite directions. Just give the + * page to the stack; he can have our refcount. + */ + if (page_count(rx_bi->page) > 2) { + dma_unmap_page(rx_ring->dev, + rx_bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); rx_bi->page = NULL; + rx_bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else { + get_page(rx_bi->page); + /* switch to the other half-page here; the + * allocation code programs the right addr + * into HW. If we haven't used this half-page, + * the address won't be changed, and HW can + * just use it next time through. + */ + rx_bi->page_offset ^= PAGE_SIZE / 2; + } - dma_unmap_page(rx_ring->dev, - rx_bi->page_dma, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - rx_bi->page_dma = 0; } I40E_RX_INCREMENT(rx_ring, i); @@ -1656,7 +1719,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } /** @@ -1674,6 +1737,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; u16 rx_packet_len; + bool failure = false; u8 rx_ptype; u64 qword; u16 i; @@ -1684,7 +1748,9 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count); + failure = failure || + i40e_alloc_rx_buffers_1buf(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -1783,7 +1849,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } static u32 i40e_buildreg_itr(const int type, const u16 itr) @@ -1791,7 +1857,9 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) u32 val; val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | + /* Don't clear PBA because that can cause lost interrupts that + * came in while we were cleaning/polling + */ (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); @@ -1814,6 +1882,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, bool rx = false, tx = false; u32 rxval, txval; int vector; + int idx = q_vector->v_idx; vector = (q_vector->v_idx + vsi->base_vector); @@ -1823,17 +1892,17 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) && + !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } @@ -1906,7 +1975,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ i40e_for_each_ring(ring, q_vector->tx) { - clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); + clean_complete = clean_complete && + i40e_clean_tx_irq(ring, vsi->work_limit); arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1930,7 +2000,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) work_done += cleaned; /* if we didn't clean as many as budgeted, we must be done */ - clean_complete &= (budget_per_ring != cleaned); + clean_complete = clean_complete && (budget_per_ring > cleaned); } /* If work not completed, return budget and polling will return */ @@ -1938,7 +2008,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) tx_only: if (arm_wb) { q_vector->tx.ring[0].tx_stats.tx_force_wb++; - i40e_force_wb(vsi, q_vector); + i40e_enable_wb_on_itr(vsi, q_vector); } return budget; } @@ -1951,20 +2021,7 @@ tx_only: if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) { i40e_update_enable_itr(vsi, q_vector); } else { /* Legacy mode */ - struct i40e_hw *hw = &vsi->back->hw; - /* We re-enable the queue 0 cause, but - * don't worry about dynamic_enable - * because we left it on for the other - * possible interrupts during napi - */ - u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) | - I40E_QINT_RQCTL_CAUSE_ENA_MASK; - - wr32(hw, I40E_QINT_RQCTL(0), qval); - qval = rd32(hw, I40E_QINT_TQCTL(0)) | - I40E_QINT_TQCTL_CAUSE_ENA_MASK; - wr32(hw, I40E_QINT_TQCTL(0), qval); - i40e_irq_dynamic_enable_icr0(vsi->back); + i40e_irq_dynamic_enable_icr0(vsi->back, false); } return 0; } @@ -1974,10 +2031,9 @@ tx_only: * @tx_ring: ring to add programming descriptor to * @skb: send buffer * @tx_flags: send tx flags - * @protocol: wire protocol **/ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, - u32 tx_flags, __be16 protocol) + u32 tx_flags) { struct i40e_filter_program_desc *fdir_desc; struct i40e_pf *pf = tx_ring->vsi->back; @@ -1989,6 +2045,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, struct tcphdr *th; unsigned int hlen; u32 flex_ptype, dtype_cmd; + int l4_proto; u16 i; /* make sure ATR is enabled */ @@ -2002,36 +2059,28 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, if (!tx_ring->atr_sample_rate) return; + /* Currently only IPv4/IPv6 with TCP is supported */ if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6))) return; - if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) { - /* snag network header to get L4 type and address */ - hdr.network = skb_network_header(skb); + /* snag network header to get L4 type and address */ + hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ? + skb_inner_network_header(skb) : skb_network_header(skb); - /* Currently only IPv4/IPv6 with TCP is supported - * access ihl as u8 to avoid unaligned access on ia64 - */ - if (tx_flags & I40E_TX_FLAGS_IPV4) - hlen = (hdr.network[0] & 0x0F) << 2; - else if (protocol == htons(ETH_P_IPV6)) - hlen = sizeof(struct ipv6hdr); - else - return; + /* Note: tx_flags gets modified to reflect inner protocols in + * tx_enable_csum function if encap is enabled. + */ + if (tx_flags & I40E_TX_FLAGS_IPV4) { + /* access ihl as u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[0] & 0x0F) << 2; + l4_proto = hdr.ipv4->protocol; } else { - hdr.network = skb_inner_network_header(skb); - hlen = skb_inner_network_header_len(skb); + hlen = hdr.network - skb->data; + l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL); + hlen -= hdr.network - skb->data; } - /* Currently only IPv4/IPv6 with TCP is supported - * Note: tx_flags gets modified to reflect inner protocols in - * tx_enable_csum function if encap is enabled. - */ - if ((tx_flags & I40E_TX_FLAGS_IPV4) && - (hdr.ipv4->protocol != IPPROTO_TCP)) - return; - else if ((tx_flags & I40E_TX_FLAGS_IPV6) && - (hdr.ipv6->nexthdr != IPPROTO_TCP)) + if (l4_proto != IPPROTO_TCP) return; th = (struct tcphdr *)(hdr.network + hlen); @@ -2039,7 +2088,8 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, /* Due to lack of space, no more new filters can be programmed */ if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) return; - if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) { + if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && + (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) { /* HW ATR eviction will take care of removing filters on FIN * and RST packets. */ @@ -2067,7 +2117,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & I40E_TXD_FLTR_QW0_QINDEX_MASK; - flex_ptype |= (protocol == htons(ETH_P_IP)) ? + flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ? (I40E_FILTER_PCTYPE_NONF_IPV4_TCP << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) : (I40E_FILTER_PCTYPE_NONF_IPV6_TCP << @@ -2101,7 +2151,8 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & I40E_TXD_FLTR_QW1_CNTINDEX_MASK; - if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) + if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && + (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK; fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); @@ -2206,13 +2257,23 @@ out: static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { - u32 cd_cmd, cd_tso_len, cd_mss; - struct ipv6hdr *ipv6h; - struct tcphdr *tcph; - struct iphdr *iph; - u32 l4len; + u64 cd_cmd, cd_tso_len, cd_mss; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_offset; int err; + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + if (!skb_is_gso(skb)) return 0; @@ -2220,35 +2281,60 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, if (err < 0) return err; - iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); - ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); - - if (iph->version == 4) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - iph->tot_len = 0; - iph->check = 0; - tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - 0, IPPROTO_TCP, 0); - } else if (ipv6h->version == 6) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - ipv6h->payload_len = 0; - tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - 0, IPPROTO_TCP, 0); + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; } - l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); - *hdr_len = (skb->encapsulation - ? (skb_inner_transport_header(skb) - skb->data) - : skb_transport_offset(skb)) + l4len; + if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL_CSUM)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) { + /* determine offset of outer transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from outer checksum */ + paylen = (__force u16)l4.udp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.udp->check = ~csum_fold((__force __wsum)paylen); + } + + /* reset pointers to inner headers */ + ip.hdr = skb_inner_network_header(skb); + l4.hdr = skb_inner_transport_header(skb); + + /* initialize inner IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } + } + + /* determine offset of inner transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from inner checksum */ + paylen = (__force u16)l4.tcp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.tcp->check = ~csum_fold((__force __wsum)paylen); + + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; /* find the field values */ cd_cmd = I40E_TX_CTX_DESC_TSO; cd_tso_len = skb->len - *hdr_len; cd_mss = skb_shinfo(skb)->gso_size; - *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | - ((u64)cd_tso_len << - I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | - ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); + *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | + (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | + (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); return 1; } @@ -2303,129 +2389,154 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_ring: Tx descriptor ring * @cd_tunneling: ptr to context desc bits **/ -static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, - u32 *td_cmd, u32 *td_offset, - struct i40e_ring *tx_ring, - u32 *cd_tunneling) +static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, + u32 *td_cmd, u32 *td_offset, + struct i40e_ring *tx_ring, + u32 *cd_tunneling) { - struct ipv6hdr *this_ipv6_hdr; - unsigned int this_tcp_hdrlen; - struct iphdr *this_ip_hdr; - u32 network_hdr_len; - u8 l4_hdr = 0; - struct udphdr *oudph = NULL; - struct iphdr *oiph = NULL; - u32 l4_tunnel = 0; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + unsigned char *exthdr; + u32 offset, cmd = 0, tunnel = 0; + __be16 frag_off; + u8 l4_proto = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; if (skb->encapsulation) { - switch (ip_hdr(skb)->protocol) { + /* define outer network header type */ + if (*tx_flags & I40E_TX_FLAGS_IPV4) { + tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_CTX_EXT_IP_IPV4 : + I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; + + l4_proto = ip.v4->protocol; + } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { + tunnel |= I40E_TX_CTX_EXT_IP_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + } + + /* compute outer L3 header size */ + tunnel |= ((l4.hdr - ip.hdr) / 4) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + + /* define outer transport */ + switch (l4_proto) { case IPPROTO_UDP: - oudph = udp_hdr(skb); - oiph = ip_hdr(skb); - l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; + tunnel |= I40E_TXD_CTX_UDP_TUNNELING; *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; break; case IPPROTO_GRE: - l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING; + tunnel |= I40E_TXD_CTX_GRE_TUNNELING; + *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL; break; default: - return; - } - network_hdr_len = skb_inner_network_header_len(skb); - this_ip_hdr = inner_ip_hdr(skb); - this_ipv6_hdr = inner_ipv6_hdr(skb); - this_tcp_hdrlen = inner_tcp_hdrlen(skb); - - if (*tx_flags & I40E_TX_FLAGS_IPV4) { - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; - ip_hdr(skb)->check = 0; - } else { - *cd_tunneling |= - I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - } - } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; if (*tx_flags & I40E_TX_FLAGS_TSO) - ip_hdr(skb)->check = 0; + return -1; + + skb_checksum_help(skb); + return 0; } - /* Now set the ctx descriptor fields */ - *cd_tunneling |= (skb_network_header_len(skb) >> 2) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | - l4_tunnel | - ((skb_inner_network_offset(skb) - - skb_transport_offset(skb)) >> 1) << - I40E_TXD_CTX_QW0_NATLEN_SHIFT; - if (this_ip_hdr->version == 6) { - *tx_flags &= ~I40E_TX_FLAGS_IPV4; + /* compute tunnel header size */ + tunnel |= ((ip.hdr - l4.hdr) / 2) << + I40E_TXD_CTX_QW0_NATLEN_SHIFT; + + /* indicate if we need to offload outer UDP header */ + if ((*tx_flags & I40E_TX_FLAGS_TSO) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; + + /* record tunnel offload values */ + *cd_tunneling |= tunnel; + + /* switch L4 header pointer from outer to inner */ + l4.hdr = skb_inner_transport_header(skb); + l4_proto = 0; + + /* reset type as we transition from outer to inner headers */ + *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6); + if (ip.v4->version == 4) + *tx_flags |= I40E_TX_FLAGS_IPV4; + if (ip.v6->version == 6) *tx_flags |= I40E_TX_FLAGS_IPV6; - } - if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) && - (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) && - (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) { - oudph->check = ~csum_tcpudp_magic(oiph->saddr, - oiph->daddr, - (skb->len - skb_transport_offset(skb)), - IPPROTO_UDP, 0); - *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK; - } - } else { - network_hdr_len = skb_network_header_len(skb); - this_ip_hdr = ip_hdr(skb); - this_ipv6_hdr = ipv6_hdr(skb); - this_tcp_hdrlen = tcp_hdrlen(skb); } /* Enable IP checksum offloads */ if (*tx_flags & I40E_TX_FLAGS_IPV4) { - l4_hdr = this_ip_hdr->protocol; + l4_proto = ip.v4->protocol; /* the stack computes the IP header already, the only time we * need the hardware to recompute it is in the case of TSO. */ - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; - this_ip_hdr->check = 0; - } else { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; - } - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_DESC_CMD_IIPT_IPV4_CSUM : + I40E_TX_DESC_CMD_IIPT_IPV4; } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - l4_hdr = this_ipv6_hdr->nexthdr; - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); } - /* words in MACLEN + dwords in IPLEN + dwords in L4Len */ - *td_offset |= (skb_network_offset(skb) >> 1) << - I40E_TX_DESC_LENGTH_MACLEN_SHIFT; + + /* compute inner L3 header size */ + offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; /* Enable L4 checksum offloads */ - switch (l4_hdr) { + switch (l4_proto) { case IPPROTO_TCP: /* enable checksum offloads */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; - *td_offset |= (this_tcp_hdrlen >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; + offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_SCTP: /* enable SCTP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; - *td_offset |= (sizeof(struct sctphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; + offset |= (sizeof(struct sctphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_UDP: /* enable UDP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; - *td_offset |= (sizeof(struct udphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; + offset |= (sizeof(struct udphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; default: - break; + if (*tx_flags & I40E_TX_FLAGS_TSO) + return -1; + skb_checksum_help(skb); + return 0; } + + *td_cmd |= cmd; + *td_offset |= offset; + + return 1; } /** @@ -2466,7 +2577,7 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, * * Returns -EBUSY if a stop is needed, else 0 **/ -static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); /* Memory barrier before checking head and tail */ @@ -2483,77 +2594,71 @@ static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) } /** - * i40e_maybe_stop_tx - 1st level check for tx stop conditions - * @tx_ring: the ring to be checked - * @size: the size buffer we want to assure is available - * - * Returns 0 if stop is not needed - **/ -#ifdef I40E_FCOE -inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -#else -static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -#endif -{ - if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) - return 0; - return __i40e_maybe_stop_tx(tx_ring, size); -} - -/** - * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * __i40e_chk_linearize - Check if there are more than 8 fragments per packet * @skb: send buffer - * @tx_flags: collected send information * * Note: Our HW can't scatter-gather more than 8 fragments to build * a packet on the wire and so we need to figure out the cases where we * need to linearize the skb. **/ -static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags) +bool __i40e_chk_linearize(struct sk_buff *skb) { - struct skb_frag_struct *frag; - bool linearize = false; - unsigned int size = 0; - u16 num_frags; - u16 gso_segs; + const struct skb_frag_struct *frag, *stale; + int gso_size, nr_frags, sum; - num_frags = skb_shinfo(skb)->nr_frags; - gso_segs = skb_shinfo(skb)->gso_segs; + /* check to see if TSO is enabled, if so we may get a repreive */ + gso_size = skb_shinfo(skb)->gso_size; + if (unlikely(!gso_size)) + return true; - if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { - u16 j = 0; + /* no need to check if number of frags is less than 8 */ + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags < I40E_MAX_BUFFER_TXD) + return false; - if (num_frags < (I40E_MAX_BUFFER_TXD)) - goto linearize_chk_done; - /* try the simple math, if we have too many frags per segment */ - if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > - I40E_MAX_BUFFER_TXD) { - linearize = true; - goto linearize_chk_done; - } - frag = &skb_shinfo(skb)->frags[0]; - /* we might still have more fragments per segment */ - do { - size += skb_frag_size(frag); - frag++; j++; - if ((size >= skb_shinfo(skb)->gso_size) && - (j < I40E_MAX_BUFFER_TXD)) { - size = (size % skb_shinfo(skb)->gso_size); - j = (size) ? 1 : 0; - } - if (j == I40E_MAX_BUFFER_TXD) { - linearize = true; - break; - } - num_frags--; - } while (num_frags); - } else { - if (num_frags >= I40E_MAX_BUFFER_TXD) - linearize = true; + /* We need to walk through the list and validate that each group + * of 6 fragments totals at least gso_size. However we don't need + * to perform such validation on the first or last 6 since the first + * 6 cannot inherit any data from a descriptor before them, and the + * last 6 cannot inherit any data from a descriptor after them. + */ + nr_frags -= I40E_MAX_BUFFER_TXD - 1; + frag = &skb_shinfo(skb)->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We + * use this as the worst case scenerio in which the frag ahead + * of us only provides one byte which is why we are limited to 6 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - gso_size; + + /* Add size of frags 1 through 5 to create our initial sum */ + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + stale = &skb_shinfo(skb)->frags[0]; + for (;;) { + sum += skb_frag_size(++frag); + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + /* use pre-decrement to avoid processing last fragment */ + if (!--nr_frags) + break; + + sum -= skb_frag_size(++stale); } -linearize_chk_done: - return linearize; + return false; } /** @@ -2760,43 +2865,6 @@ dma_error: } /** - * i40e_xmit_descriptor_count - calculate number of tx descriptors needed - * @skb: send buffer - * @tx_ring: ring to send buffer on - * - * Returns number of data descriptors needed for this skb. Returns 0 to indicate - * there is not enough descriptors available in this ring since we need at least - * one descriptor. - **/ -#ifdef I40E_FCOE -inline int i40e_xmit_descriptor_count(struct sk_buff *skb, - struct i40e_ring *tx_ring) -#else -static inline int i40e_xmit_descriptor_count(struct sk_buff *skb, - struct i40e_ring *tx_ring) -#endif -{ - unsigned int f; - int count = 0; - - /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, - * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, - * + 4 desc gap to avoid the cache line where head is, - * + 1 desc for context descriptor, - * otherwise try next time - */ - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); - - count += TXD_USE_COUNT(skb_headlen(skb)); - if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { - tx_ring->tx_stats.tx_busy++; - return 0; - } - return count; -} - -/** * i40e_xmit_frame_ring - Sends buffer on Tx ring * @skb: send buffer * @tx_ring: ring to send buffer on @@ -2814,14 +2882,30 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, __be16 protocol; u32 td_cmd = 0; u8 hdr_len = 0; + int tso, count; int tsyn; - int tso; /* prefetch the data, we'll need it later */ prefetch(skb->data); - if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) + count = i40e_xmit_descriptor_count(skb); + if (i40e_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) + goto out_drop; + count = TXD_USE_COUNT(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; + } /* prepare the xmit flags */ if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) @@ -2846,29 +2930,22 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (tso) tx_flags |= I40E_TX_FLAGS_TSO; + /* Always offload the checksum, since it's in the data descriptor */ + tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, + tx_ring, &cd_tunneling); + if (tso < 0) + goto out_drop; + tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss); if (tsyn) tx_flags |= I40E_TX_FLAGS_TSYN; - if (i40e_chk_linearize(skb, tx_flags)) { - if (skb_linearize(skb)) - goto out_drop; - tx_ring->tx_stats.tx_linearize++; - } skb_tx_timestamp(skb); /* always enable CRC insertion offload */ td_cmd |= I40E_TX_DESC_CMD_ICRC; - /* Always offload the checksum, since it's in the data descriptor */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - tx_flags |= I40E_TX_FLAGS_CSUM; - - i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, - tx_ring, &cd_tunneling); - } - i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, cd_tunneling, cd_l2tag2); @@ -2876,7 +2953,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, * * NOTE: this must always be directly before the data descriptor. */ - i40e_atr(tx_ring, skb, tx_flags, protocol); + i40e_atr(tx_ring, skb, tx_flags); i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len, td_cmd, td_offset); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 3f081e25e097..cdd5dc00aec5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -153,7 +153,6 @@ enum i40e_dyn_idx_t { #define DESC_NEEDED (MAX_SKB_FRAGS + 4) #define I40E_MIN_DESC_PENDING 4 -#define I40E_TX_FLAGS_CSUM BIT(0) #define I40E_TX_FLAGS_HW_VLAN BIT(1) #define I40E_TX_FLAGS_SW_VLAN BIT(2) #define I40E_TX_FLAGS_TSO BIT(3) @@ -203,12 +202,15 @@ struct i40e_tx_queue_stats { u64 tx_done_old; u64 tx_linearize; u64 tx_force_wb; + u64 tx_lost_interrupt; }; struct i40e_rx_queue_stats { u64 non_eop_descs; u64 alloc_page_failed; u64 alloc_buff_failed; + u64 page_reuse_count; + u64 realloc_count; }; enum i40e_ring_state_t { @@ -246,6 +248,14 @@ struct i40e_ring { u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; + /* high bit set means dynamic, use accessor routines to read/write. + * hardware only supports 2us resolution for the ITR registers. + * these values always store the USER setting, and must be converted + * before programming to a register. + */ + u16 rx_itr_setting; + u16 tx_itr_setting; + u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ u16 rx_hdr_len; @@ -254,7 +264,6 @@ struct i40e_ring { #define I40E_RX_DTYPE_NO_SPLIT 0 #define I40E_RX_DTYPE_HEADER_SPLIT 1 #define I40E_RX_DTYPE_SPLIT_ALWAYS 2 - u8 hsplit; #define I40E_RX_SPLIT_L2 0x1 #define I40E_RX_SPLIT_IP 0x2 #define I40E_RX_SPLIT_TCP_UDP 0x4 @@ -275,7 +284,6 @@ struct i40e_ring { u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) -#define I40E_TXR_FLAGS_OUTER_UDP_CSUM BIT(1) #define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2) /* stats structs */ @@ -316,8 +324,8 @@ struct i40e_ring_container { #define i40e_for_each_ring(pos, head) \ for (pos = (head).ring; pos != NULL; pos = pos->next) -void i40e_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); -void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count); +bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); +bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count); void i40e_alloc_rx_headers(struct i40e_ring *rxr); netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev); void i40e_clean_tx_ring(struct i40e_ring *tx_ring); @@ -331,13 +339,13 @@ int i40e_napi_poll(struct napi_struct *napi, int budget); void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, struct i40e_tx_buffer *first, u32 tx_flags, const u8 hdr_len, u32 td_cmd, u32 td_offset); -int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); -int i40e_xmit_descriptor_count(struct sk_buff *skb, struct i40e_ring *tx_ring); int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, struct i40e_ring *tx_ring, u32 *flags); #endif void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); -u32 i40e_get_tx_pending(struct i40e_ring *ring); +u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); +int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); +bool __i40e_chk_linearize(struct sk_buff *skb); /** * i40e_get_head - Retrieve head from head writeback @@ -352,4 +360,63 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring) return le32_to_cpu(*(volatile __le32 *)head); } + +/** + * i40e_xmit_descriptor_count - calculate number of Tx descriptors needed + * @skb: send buffer + * @tx_ring: ring to send buffer on + * + * Returns number of data descriptors needed for this skb. Returns 0 to indicate + * there is not enough descriptors available in this ring since we need at least + * one descriptor. + **/ +static inline int i40e_xmit_descriptor_count(struct sk_buff *skb) +{ + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + int count = 0, size = skb_headlen(skb); + + for (;;) { + count += TXD_USE_COUNT(size); + + if (!nr_frags--) + break; + + size = skb_frag_size(frag++); + } + + return count; +} + +/** + * i40e_maybe_stop_tx - 1st level check for Tx stop conditions + * @tx_ring: the ring to be checked + * @size: the size buffer we want to assure is available + * + * Returns 0 if stop is not needed + **/ +static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +{ + if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) + return 0; + return __i40e_maybe_stop_tx(tx_ring, size); +} + +/** + * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @count: number of buffers used + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + **/ +static inline bool i40e_chk_linearize(struct sk_buff *skb, int count) +{ + /* we can only support up to 8 data buffers for a single send */ + if (likely(count <= I40E_MAX_BUFFER_TXD)) + return false; + + return __i40e_chk_linearize(skb); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index dd2da356d9a1..0a0baf71041b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -90,6 +90,22 @@ enum i40e_debug_mask { I40E_DEBUG_ALL = 0xFFFFFFFF }; +#define I40E_MDIO_STCODE 0 +#define I40E_MDIO_OPCODE_ADDRESS 0 +#define I40E_MDIO_OPCODE_WRITE I40E_MASK(1, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) +#define I40E_MDIO_OPCODE_READ_INC_ADDR I40E_MASK(2, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) +#define I40E_MDIO_OPCODE_READ I40E_MASK(3, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) + +#define I40E_PHY_COM_REG_PAGE 0x1E +#define I40E_PHY_LED_LINK_MODE_MASK 0xF0 +#define I40E_PHY_LED_MANUAL_ON 0x100 +#define I40E_PHY_LED_PROV_REG_1 0xC430 +#define I40E_PHY_LED_MODE_MASK 0xFFFF +#define I40E_PHY_LED_MODE_ORIG 0x80000000 + /* These are structs for managing the hardware information and the operations. * The structures of function pointers are filled out at init time when we * know for sure exactly which hardware we're working with. This gives us the @@ -1098,6 +1114,10 @@ enum i40e_filter_program_desc_pcmd { I40E_TXD_FLTR_QW1_CMD_SHIFT) #define I40E_TXD_FLTR_QW1_ATR_MASK BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT) +#define I40E_TXD_FLTR_QW1_ATR_SHIFT (0xEULL + \ + I40E_TXD_FLTR_QW1_CMD_SHIFT) +#define I40E_TXD_FLTR_QW1_ATR_MASK BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT) + #define I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT 20 #define I40E_TXD_FLTR_QW1_CNTINDEX_MASK (0x1FFUL << \ I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 63e62f9aec6e..acd2693a4e97 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -461,7 +461,7 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, rx_ctx.hbuff = info->hdr_size >> I40E_RXQ_CTX_HBUFF_SHIFT; /* set splitalways mode 10b */ - rx_ctx.dtype = 0x2; + rx_ctx.dtype = I40E_RX_DTYPE_HEADER_SPLIT; } /* databuffer length validation */ @@ -602,8 +602,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf) * that VF queues be mapped using this method, even when they are * contiguous in real life */ - wr32(hw, I40E_VSILAN_QBASE(vf->lan_vsi_id), - I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK); + i40e_write_rx_ctl(hw, I40E_VSILAN_QBASE(vf->lan_vsi_id), + I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK); /* enable VF vplan_qtable mappings */ reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK; @@ -630,7 +630,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf) (j * 2) + 1); reg |= qid << 16; } - wr32(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id), reg); + i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id), + reg); } i40e_flush(hw); @@ -980,7 +981,7 @@ err_alloc: i40e_free_vfs(pf); err_iov: /* Re-enable interrupt 0. */ - i40e_irq_dynamic_enable_icr0(pf); + i40e_irq_dynamic_enable_icr0(pf, false); return ret; } @@ -1213,9 +1214,21 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RSS_REG; } + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) { + if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + vfres->vf_offload_flags |= + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2; + } + if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING) vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING; + if (pf->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) { + if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) + vfres->vf_offload_flags |= + I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR; + } + vfres->num_vsis = num_vsis; vfres->num_queue_pairs = vf->num_queue_pairs; vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; @@ -2025,7 +2038,11 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) if (!test_bit(__I40E_VFLR_EVENT_PENDING, &pf->state)) return 0; - /* re-enable vflr interrupt cause */ + /* Re-enable the VFLR interrupt cause here, before looking for which + * VF got reset. Otherwise, if another VF gets a reset while the + * first one is being processed, that interrupt will be lost, and + * that VF will be stuck in reset forever. + */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_VFLR_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); @@ -2186,6 +2203,8 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, * and then reloading the VF driver. */ i40e_vc_disable_vf(pf, vf); + /* During reset the VF got a new VSI, so refresh the pointer. */ + vsi = pf->vsi[vf->lan_vsi_idx]; } /* Check for condition where there was already a port VLAN ID @@ -2294,6 +2313,9 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, case I40E_LINK_SPEED_40GB: speed = 40000; break; + case I40E_LINK_SPEED_20GB: + speed = 20000; + break; case I40E_LINK_SPEED_10GB: speed = 10000; break; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index da44995def42..e74642a0c42e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -91,8 +91,8 @@ struct i40e_vf { * When assigned, these will be non-zero, because VSI 0 is always * the main LAN VSI for the PF. */ - u8 lan_vsi_idx; /* index into PF struct */ - u8 lan_vsi_id; /* ID as used by firmware */ + u16 lan_vsi_idx; /* index into PF struct */ + u16 lan_vsi_id; /* ID as used by firmware */ u8 num_queue_pairs; /* num of qps assigned to VF vsis */ u64 num_mdd_events; /* num of mdd events detected */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c index 3f65e39b3fe4..44f7ed7583dd 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -887,6 +887,9 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw, u16 flags; u16 ntu; + /* pre-clean the event info */ + memset(&e->desc, 0, sizeof(e->desc)); + /* take the lock before we start messing with the ring */ mutex_lock(&hw->aq.arq_mutex); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index f5b2b369dc7c..aad8d6277110 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -34,7 +34,7 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0004 +#define I40E_FW_API_VERSION_MINOR 0x0005 struct i40e_aq_desc { __le16 flags; @@ -145,6 +145,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_remove_statistics = 0x0202, i40e_aqc_opc_set_port_parameters = 0x0203, i40e_aqc_opc_get_switch_resource_alloc = 0x0204, + i40e_aqc_opc_set_switch_config = 0x0205, + i40e_aqc_opc_rx_ctl_reg_read = 0x0206, + i40e_aqc_opc_rx_ctl_reg_write = 0x0207, i40e_aqc_opc_add_vsi = 0x0210, i40e_aqc_opc_update_vsi_parameters = 0x0211, @@ -220,6 +223,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_get_phy_wol_caps = 0x0621, i40e_aqc_opc_set_phy_debug = 0x0622, i40e_aqc_opc_upload_ext_phy_fm = 0x0625, + i40e_aqc_opc_run_phy_activity = 0x0626, /* NVM commands */ i40e_aqc_opc_nvm_read = 0x0701, @@ -228,6 +232,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_nvm_config_read = 0x0704, i40e_aqc_opc_nvm_config_write = 0x0705, i40e_aqc_opc_oem_post_update = 0x0720, + i40e_aqc_opc_thermal_sensor = 0x0721, /* virtualization commands */ i40e_aqc_opc_send_msg_to_pf = 0x0801, @@ -399,6 +404,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_OS2BMC_CAP 0x0004 #define I40E_AQ_CAP_ID_FUNCTIONS_VALID 0x0005 #define I40E_AQ_CAP_ID_ALTERNATE_RAM 0x0006 +#define I40E_AQ_CAP_ID_WOL_AND_PROXY 0x0008 #define I40E_AQ_CAP_ID_SRIOV 0x0012 #define I40E_AQ_CAP_ID_VF 0x0013 #define I40E_AQ_CAP_ID_VMDQ 0x0014 @@ -419,6 +425,7 @@ struct i40e_aqc_list_capabilities_element_resp { #define I40E_AQ_CAP_ID_LED 0x0061 #define I40E_AQ_CAP_ID_SDP 0x0062 #define I40E_AQ_CAP_ID_MDIO 0x0063 +#define I40E_AQ_CAP_ID_WSR_PROT 0x0064 #define I40E_AQ_CAP_ID_FLEX10 0x00F1 #define I40E_AQ_CAP_ID_CEM 0x00F2 @@ -677,6 +684,31 @@ struct i40e_aqc_switch_resource_alloc_element_resp { I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp); +/* Set Switch Configuration (direct 0x0205) */ +struct i40e_aqc_set_switch_config { + __le16 flags; +#define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 +#define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 + __le16 valid_flags; + u8 reserved[12]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); + +/* Read Receive control registers (direct 0x0206) + * Write Receive control registers (direct 0x0207) + * used for accessing Rx control registers that can be + * slow and need special handling when under high Rx load + */ +struct i40e_aqc_rx_ctl_reg_read_write { + __le32 reserved1; + __le32 address; + __le32 reserved2; + __le32 value; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_rx_ctl_reg_read_write); + /* Add VSI (indirect 0x0210) * this indirect command uses struct i40e_aqc_vsi_properties_data * as the indirect buffer (128 bytes) @@ -903,7 +935,8 @@ struct i40e_aqc_add_veb { I40E_AQC_ADD_VEB_PORT_TYPE_SHIFT) #define I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT 0x2 #define I40E_AQC_ADD_VEB_PORT_TYPE_DATA 0x4 -#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8 +#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER 0x8 /* deprecated */ +#define I40E_AQC_ADD_VEB_ENABLE_DISABLE_STATS 0x10 u8 enable_tcs; u8 reserved[9]; }; @@ -970,6 +1003,7 @@ struct i40e_aqc_add_macvlan_element_data { #define I40E_AQC_MACVLAN_ADD_HASH_MATCH 0x0002 #define I40E_AQC_MACVLAN_ADD_IGNORE_VLAN 0x0004 #define I40E_AQC_MACVLAN_ADD_TO_QUEUE 0x0008 +#define I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC 0x0010 __le16 queue_number; #define I40E_AQC_MACVLAN_CMD_QUEUE_SHIFT 0 #define I40E_AQC_MACVLAN_CMD_QUEUE_MASK (0x7FF << \ @@ -1066,6 +1100,7 @@ struct i40e_aqc_set_vsi_promiscuous_modes { #define I40E_AQC_SET_VSI_PROMISC_BROADCAST 0x04 #define I40E_AQC_SET_VSI_DEFAULT 0x08 #define I40E_AQC_SET_VSI_PROMISC_VLAN 0x10 +#define I40E_AQC_SET_VSI_PROMISC_TX 0x8000 __le16 seid; #define I40E_AQC_VSI_PROM_CMD_SEID_MASK 0x3FF __le16 vlan_tag; @@ -1254,10 +1289,16 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { #define I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT 9 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK 0x1E00 -#define I40E_AQC_ADD_CLOUD_TNL_TYPE_XVLAN 0 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN 0 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_NVGRE_OMAC 1 -#define I40E_AQC_ADD_CLOUD_TNL_TYPE_NGE 2 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE 2 #define I40E_AQC_ADD_CLOUD_TNL_TYPE_IP 3 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_RESERVED 4 +#define I40E_AQC_ADD_CLOUD_TNL_TYPE_VXLAN_GPE 5 + +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_MAC 0x2000 +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_INNER_MAC 0x4000 +#define I40E_AQC_ADD_CLOUD_FLAGS_SHARED_OUTER_IP 0x8000 __le32 tenant_id; u8 reserved[4]; @@ -1752,7 +1793,12 @@ struct i40e_aqc_get_link_status { u8 config; #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 reserved[5]; + u8 external_power_ability; +#define I40E_AQ_LINK_POWER_CLASS_1 0x00 +#define I40E_AQ_LINK_POWER_CLASS_2 0x01 +#define I40E_AQ_LINK_POWER_CLASS_3 0x02 +#define I40E_AQ_LINK_POWER_CLASS_4 0x03 + u8 reserved[4]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status); @@ -1820,6 +1866,18 @@ enum i40e_aq_phy_reg_type { I40E_AQC_PHY_REG_EXERNAL_MODULE = 0x3 }; +/* Run PHY Activity (0x0626) */ +struct i40e_aqc_run_phy_activity { + __le16 activity_id; + u8 flags; + u8 reserved1; + __le32 control; + __le32 data; + u8 reserved2[4]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity); + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) @@ -1909,6 +1967,22 @@ struct i40e_aqc_nvm_oem_post_update_buffer { I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer); +/* Thermal Sensor (indirect 0x0721) + * read or set thermal sensor configs and values + * takes a sensor and command specific data buffer, not detailed here + */ +struct i40e_aqc_thermal_sensor { + u8 sensor_action; +#define I40E_AQ_THERMAL_SENSOR_READ_CONFIG 0 +#define I40E_AQ_THERMAL_SENSOR_SET_CONFIG 1 +#define I40E_AQ_THERMAL_SENSOR_READ_TEMP 2 + u8 reserved[7]; + __le32 addr_high; + __le32 addr_low; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_thermal_sensor); + /* Send to PF command (indirect 0x0801) id is only used by PF * Send to VF command (indirect 0x0802) id is only used by PF * Send to Peer PF command (indirect 0x0803) @@ -2083,6 +2157,7 @@ struct i40e_aqc_add_udp_tunnel { #define I40E_AQC_TUNNEL_TYPE_VXLAN 0x00 #define I40E_AQC_TUNNEL_TYPE_NGE 0x01 #define I40E_AQC_TUNNEL_TYPE_TEREDO 0x10 +#define I40E_AQC_TUNNEL_TYPE_VXLAN_GPE 0x11 u8 reserved1[10]; }; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 938783e0baac..771ac6ad8cda 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -904,6 +904,131 @@ struct i40e_rx_ptype_decoded i40evf_ptype_lookup[] = { }; /** + * i40evf_aq_rx_ctl_read_register - use FW to read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: ptr to register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to read the Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +i40e_status i40evf_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd_resp = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + i40e_status status; + + if (!reg_val) + return I40E_ERR_PARAM; + + i40evf_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_rx_ctl_reg_read); + + cmd_resp->address = cpu_to_le32(reg_addr); + + status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + if (status == 0) + *reg_val = le32_to_cpu(cmd_resp->value); + + return status; +} + +/** + * i40evf_read_rx_ctl - read from an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + **/ +u32 i40evf_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr) +{ + i40e_status status = 0; + bool use_register; + int retry = 5; + u32 val = 0; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40evf_aq_rx_ctl_read_register(hw, reg_addr, + &val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + usleep_range(1000, 2000); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + val = rd32(hw, reg_addr); + + return val; +} + +/** + * i40evf_aq_rx_ctl_write_register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + * @cmd_details: pointer to command details structure or NULL + * + * Use the firmware to write to an Rx control register, + * especially useful if the Rx unit is under heavy pressure + **/ +i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_rx_ctl_reg_read_write *cmd = + (struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw; + i40e_status status; + + i40evf_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_rx_ctl_reg_write); + + cmd->address = cpu_to_le32(reg_addr); + cmd->value = cpu_to_le32(reg_val); + + status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40evf_write_rx_ctl - write to an Rx control register + * @hw: pointer to the hw struct + * @reg_addr: register address + * @reg_val: register value + **/ +void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val) +{ + i40e_status status = 0; + bool use_register; + int retry = 5; + + use_register = (hw->aq.api_maj_ver == 1) && (hw->aq.api_min_ver < 5); + if (!use_register) { +do_retry: + status = i40evf_aq_rx_ctl_write_register(hw, reg_addr, + reg_val, NULL); + if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) { + usleep_range(1000, 2000); + retry--; + goto do_retry; + } + } + + /* if the AQ access failed, try the old-fashioned way */ + if (status || use_register) + wr32(hw, reg_addr, reg_val); +} + +/** * i40e_aq_send_msg_to_pf * @hw: pointer to the hardware structure * @v_opcode: opcodes for VF-PF communication diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h index cbd9a1b078ab..d89d52109efa 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h @@ -103,4 +103,19 @@ i40e_status i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw, u16 vsi_seid); +i40e_status i40evf_aq_rx_ctl_read_register(struct i40e_hw *hw, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); +u32 i40evf_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr); +i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); +i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, + u16 reg, u8 phy_addr, u16 *value); +i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, + u16 reg, u8 phy_addr, u16 value); +u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num); +i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw, + u32 time, u32 interval); #endif /* _I40E_PROTOTYPE_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 7a00657dacda..ebcc25c05796 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -129,15 +129,19 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) /** * i40evf_get_tx_pending - how many Tx descriptors not processed * @tx_ring: the ring of descriptors + * @in_sw: is tx_pending being checked in SW or HW * * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 i40evf_get_tx_pending(struct i40e_ring *ring) +u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw) { u32 head, tail; - head = i40e_get_head(ring); + if (!in_sw) + head = i40e_get_head(ring); + else + head = ring->next_to_clean; tail = readl(ring->tail); if (head != tail) @@ -252,6 +256,22 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; + if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) { + unsigned int j = 0; + /* check to see if there are < 4 descriptors + * waiting to be written back, then kick the hardware to force + * them to be written back in case we stay in NAPI. + * In this mode on X722 we do not enable Interrupt. + */ + j = i40evf_get_tx_pending(tx_ring, false); + + if (budget && + ((j / (WB_STRIDE + 1)) == 0) && (j > 0) && + !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && + (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) + tx_ring->arm_wb = true; + } + netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), total_packets, total_bytes); @@ -276,39 +296,49 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) } /** - * i40evf_force_wb -Arm hardware to do a wb on noncache aligned descriptors + * i40evf_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled * @vsi: the VSI we care about - * @q_vector: the vector on which to force writeback + * @q_vector: the vector on which to enable writeback * **/ -static void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi, + struct i40e_q_vector *q_vector) { u16 flags = q_vector->tx.ring[0].flags; + u32 val; - if (flags & I40E_TXR_FLAGS_WB_ON_ITR) { - u32 val; + if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR)) + return; - if (q_vector->arm_wb_state) - return; + if (q_vector->arm_wb_state) + return; - val = I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK; + val = I40E_VFINT_DYN_CTLN1_WB_ON_ITR_MASK | + I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK; /* set noitr */ - wr32(&vsi->back->hw, - I40E_VFINT_DYN_CTLN1(q_vector->v_idx + - vsi->base_vector - 1), - val); - q_vector->arm_wb_state = true; - } else { - u32 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ - I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | - I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK; - /* allow 00 to be written to the index */ - - wr32(&vsi->back->hw, - I40E_VFINT_DYN_CTLN1(q_vector->v_idx + - vsi->base_vector - 1), val); - } + wr32(&vsi->back->hw, + I40E_VFINT_DYN_CTLN1(q_vector->v_idx + + vsi->base_vector - 1), val); + q_vector->arm_wb_state = true; +} + +/** + * i40evf_force_wb - Issue SW Interrupt so HW does a wb + * @vsi: the VSI we care about + * @q_vector: the vector on which to force writeback + * + **/ +void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) +{ + u32 val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | + I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ + I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | + I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK + /* allow 00 to be written to the index */; + + wr32(&vsi->back->hw, + I40E_VFINT_DYN_CTLN1(q_vector->v_idx + vsi->base_vector - 1), + val); } /** @@ -506,7 +536,7 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring) if (rx_bi->page_dma) { dma_unmap_page(dev, rx_bi->page_dma, - PAGE_SIZE / 2, + PAGE_SIZE, DMA_FROM_DEVICE); rx_bi->page_dma = 0; } @@ -641,16 +671,19 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) * i40evf_alloc_rx_buffers_ps - Replace used receive buffers; packet split * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40evf_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40evf_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; struct i40e_rx_buffer *bi; + const int current_node = numa_node_id(); /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -658,56 +691,79 @@ void i40evf_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count) if (bi->skb) /* desc is in use */ goto no_buffers; + + /* If we've been moved to a different NUMA node, release the + * page so we can get a new one on the current node. + */ + if (bi->page && page_to_nid(bi->page) != current_node) { + dma_unmap_page(rx_ring->dev, + bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page(bi->page); + bi->page = NULL; + bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else if (bi->page) { + rx_ring->rx_stats.page_reuse_count++; + } + if (!bi->page) { bi->page = alloc_page(GFP_ATOMIC); if (!bi->page) { rx_ring->rx_stats.alloc_page_failed++; goto no_buffers; } - } - - if (!bi->page_dma) { - /* use a half page if we're re-using */ - bi->page_offset ^= PAGE_SIZE / 2; bi->page_dma = dma_map_page(rx_ring->dev, bi->page, - bi->page_offset, - PAGE_SIZE / 2, + 0, + PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(rx_ring->dev, - bi->page_dma)) { + if (dma_mapping_error(rx_ring->dev, bi->page_dma)) { rx_ring->rx_stats.alloc_page_failed++; + __free_page(bi->page); + bi->page = NULL; bi->page_dma = 0; + bi->page_offset = 0; goto no_buffers; } + bi->page_offset = 0; } - dma_sync_single_range_for_device(rx_ring->dev, - bi->dma, - 0, - rx_ring->rx_hdr_len, - DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); + rx_desc->read.pkt_addr = + cpu_to_le64(bi->page_dma + bi->page_offset); rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); i++; if (i == rx_ring->count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** * i40evf_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace + * + * Returns true if any errors on allocation **/ -void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) +bool i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) { u16 i = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; @@ -716,7 +772,7 @@ void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) /* do nothing if no valid netdev defined */ if (!rx_ring->netdev || !cleaned_count) - return; + return false; while (cleaned_count--) { rx_desc = I40E_RX_DESC(rx_ring, i); @@ -724,8 +780,10 @@ void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) skb = bi->skb; if (!skb) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_buf_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_buf_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; goto no_buffers; @@ -743,6 +801,8 @@ void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) if (dma_mapping_error(rx_ring->dev, bi->dma)) { rx_ring->rx_stats.alloc_buff_failed++; bi->dma = 0; + dev_kfree_skb(bi->skb); + bi->skb = NULL; goto no_buffers; } } @@ -754,9 +814,19 @@ void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count) i = 0; } + if (rx_ring->next_to_use != i) + i40e_release_rx_desc(rx_ring, i); + + return false; + no_buffers: if (rx_ring->next_to_use != i) i40e_release_rx_desc(rx_ring, i); + + /* make sure to come back via polling to try again after + * allocation failure + */ + return true; } /** @@ -791,16 +861,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, u16 rx_ptype) { struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); - bool ipv4 = false, ipv6 = false; - bool ipv4_tunnel, ipv6_tunnel; - __wsum rx_udp_csum; - struct iphdr *iph; - __sum16 csum; - - ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); - ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && - (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); + bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel; skb->ip_summed = CHECKSUM_NONE; @@ -816,12 +877,10 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (!(decoded.known && decoded.outer_ip)) return; - if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) - ipv4 = true; - else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && - decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) - ipv6 = true; + ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4); + ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) && + (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6); if (ipv4 && (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) | @@ -845,36 +904,17 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) return; - /* If VXLAN traffic has an outer UDPv4 checksum we need to check - * it in the driver, hardware does not do it for us. - * Since L3L4P bit was set we assume a valid IHL value (>=5) - * so the total length of IPv4 header is IHL*4 bytes - * The UDP_0 bit *may* bet set if the *inner* header is UDP + /* The hardware supported by this driver does not validate outer + * checksums for tunneled VXLAN or GENEVE frames. I don't agree + * with it but the specification states that you "MAY validate", it + * doesn't make it a hard requirement so if we have validated the + * inner checksum report CHECKSUM_UNNECESSARY. */ - if (ipv4_tunnel) { - skb->transport_header = skb->mac_header + - sizeof(struct ethhdr) + - (ip_hdr(skb)->ihl * 4); - - /* Add 4 bytes for VLAN tagged packets */ - skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) - ? VLAN_HLEN : 0; - - if ((ip_hdr(skb)->protocol == IPPROTO_UDP) && - (udp_hdr(skb)->check != 0)) { - rx_udp_csum = udp_csum(skb); - iph = ip_hdr(skb); - csum = csum_tcpudp_magic(iph->saddr, iph->daddr, - (skb->len - - skb_transport_offset(skb)), - IPPROTO_UDP, rx_udp_csum); - - if (udp_hdr(skb)->check != csum) - goto checksum_fail; - - } /* else its GRE and so no outer UDP header */ - } + + ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); + ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && + (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = ipv4_tunnel || ipv6_tunnel; @@ -939,18 +979,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring, * * Returns true if there's any budget left (e.g. the clean is finished) **/ -static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) +static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - const int current_node = numa_mem_id(); struct i40e_vsi *vsi = rx_ring->vsi; u16 i = rx_ring->next_to_clean; union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; + bool failure = false; u8 rx_ptype; u64 qword; + u32 copysize; do { struct i40e_rx_buffer *rx_bi; @@ -958,7 +999,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40evf_alloc_rx_buffers_ps(rx_ring, cleaned_count); + failure = failure || + i40evf_alloc_rx_buffers_ps(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -976,13 +1019,22 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) * DD bit is set. */ dma_rmb(); + /* sync header buffer for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, + rx_ring->rx_hdr_len, + DMA_FROM_DEVICE); rx_bi = &rx_ring->rx_bi[i]; skb = rx_bi->skb; if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_hdr_len); + skb = __netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_hdr_len, + GFP_ATOMIC | + __GFP_NOWARN); if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; + failure = true; break; } @@ -990,8 +1042,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) skb_record_rx_queue(skb, rx_ring->queue_index); /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, - rx_bi->dma, - 0, + rx_ring->rx_bi[0].dma, + i * rx_ring->rx_hdr_len, rx_ring->rx_hdr_len, DMA_FROM_DEVICE); } @@ -1009,9 +1061,16 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT; - prefetch(rx_bi->page); + /* sync half-page for reading */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_bi->page_dma, + rx_bi->page_offset, + PAGE_SIZE / 2, + DMA_FROM_DEVICE); + prefetch(page_address(rx_bi->page) + rx_bi->page_offset); rx_bi->skb = NULL; cleaned_count++; + copysize = 0; if (rx_hbo || rx_sph) { int len; @@ -1022,38 +1081,50 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len); } else if (skb->len == 0) { int len; + unsigned char *va = page_address(rx_bi->page) + + rx_bi->page_offset; - len = (rx_packet_len > skb_headlen(skb) ? - skb_headlen(skb) : rx_packet_len); - memcpy(__skb_put(skb, len), - rx_bi->page + rx_bi->page_offset, - len); - rx_bi->page_offset += len; + len = min(rx_packet_len, rx_ring->rx_hdr_len); + memcpy(__skb_put(skb, len), va, len); + copysize = len; rx_packet_len -= len; } - /* Get the rest of the data if this was a header split */ if (rx_packet_len) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_bi->page, - rx_bi->page_offset, - rx_packet_len); - - skb->len += rx_packet_len; - skb->data_len += rx_packet_len; - skb->truesize += rx_packet_len; - - if ((page_count(rx_bi->page) == 1) && - (page_to_nid(rx_bi->page) == current_node)) - get_page(rx_bi->page); - else + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + rx_bi->page, + rx_bi->page_offset + copysize, + rx_packet_len, I40E_RXBUFFER_2048); + + /* If the page count is more than 2, then both halves + * of the page are used and we need to free it. Do it + * here instead of in the alloc code. Otherwise one + * of the half-pages might be released between now and + * then, and we wouldn't know which one to use. + * Don't call get_page and free_page since those are + * both expensive atomic operations that just change + * the refcount in opposite directions. Just give the + * page to the stack; he can have our refcount. + */ + if (page_count(rx_bi->page) > 2) { + dma_unmap_page(rx_ring->dev, + rx_bi->page_dma, + PAGE_SIZE, + DMA_FROM_DEVICE); rx_bi->page = NULL; + rx_bi->page_dma = 0; + rx_ring->rx_stats.realloc_count++; + } else { + get_page(rx_bi->page); + /* switch to the other half-page here; the + * allocation code programs the right addr + * into HW. If we haven't used this half-page, + * the address won't be changed, and HW can + * just use it next time through. + */ + rx_bi->page_offset ^= PAGE_SIZE / 2; + } - dma_unmap_page(rx_ring->dev, - rx_bi->page_dma, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - rx_bi->page_dma = 0; } I40E_RX_INCREMENT(rx_ring, i); @@ -1105,7 +1176,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } /** @@ -1123,6 +1194,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; u16 rx_packet_len; + bool failure = false; u8 rx_ptype; u64 qword; u16 i; @@ -1133,7 +1205,9 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) u16 vlan_tag; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - i40evf_alloc_rx_buffers_1buf(rx_ring, cleaned_count); + failure = failure || + i40evf_alloc_rx_buffers_1buf(rx_ring, + cleaned_count); cleaned_count = 0; } @@ -1214,7 +1288,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; - return total_rx_packets; + return failure ? budget : total_rx_packets; } static u32 i40e_buildreg_itr(const int type, const u16 itr) @@ -1222,7 +1296,9 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) u32 val; val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | + /* Don't clear PBA because that can cause lost interrupts that + * came in while we were cleaning/polling + */ (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); @@ -1335,7 +1411,8 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ i40e_for_each_ring(ring, q_vector->tx) { - clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); + clean_complete = clean_complete && + i40e_clean_tx_irq(ring, vsi->work_limit); arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1359,7 +1436,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) work_done += cleaned; /* if we didn't clean as many as budgeted, we must be done */ - clean_complete &= (budget_per_ring != cleaned); + clean_complete = clean_complete && (budget_per_ring > cleaned); } /* If work not completed, return budget and polling will return */ @@ -1367,7 +1444,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) tx_only: if (arm_wb) { q_vector->tx.ring[0].tx_stats.tx_force_wb++; - i40evf_force_wb(vsi, q_vector); + i40e_enable_wb_on_itr(vsi, q_vector); } return budget; } @@ -1447,13 +1524,23 @@ out: static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { - u32 cd_cmd, cd_tso_len, cd_mss; - struct ipv6hdr *ipv6h; - struct tcphdr *tcph; - struct iphdr *iph; - u32 l4len; + u64 cd_cmd, cd_tso_len, cd_mss; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_offset; int err; + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + if (!skb_is_gso(skb)) return 0; @@ -1461,35 +1548,60 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, if (err < 0) return err; - iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); - ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); - - if (iph->version == 4) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - iph->tot_len = 0; - iph->check = 0; - tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - 0, IPPROTO_TCP, 0); - } else if (ipv6h->version == 6) { - tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); - ipv6h->payload_len = 0; - tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, - 0, IPPROTO_TCP, 0); + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } + + if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL_CSUM)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) { + /* determine offset of outer transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from outer checksum */ + paylen = (__force u16)l4.udp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.udp->check = ~csum_fold((__force __wsum)paylen); + } + + /* reset pointers to inner headers */ + ip.hdr = skb_inner_network_header(skb); + l4.hdr = skb_inner_transport_header(skb); + + /* initialize inner IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } } - l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); - *hdr_len = (skb->encapsulation - ? (skb_inner_transport_header(skb) - skb->data) - : skb_transport_offset(skb)) + l4len; + /* determine offset of inner transport header */ + l4_offset = l4.hdr - skb->data; + + /* remove payload length from inner checksum */ + paylen = (__force u16)l4.tcp->check; + paylen += ntohs(1) * (u16)~(skb->len - l4_offset); + l4.tcp->check = ~csum_fold((__force __wsum)paylen); + + /* compute length of segmentation header */ + *hdr_len = (l4.tcp->doff * 4) + l4_offset; /* find the field values */ cd_cmd = I40E_TX_CTX_DESC_TSO; cd_tso_len = skb->len - *hdr_len; cd_mss = skb_shinfo(skb)->gso_size; - *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | - ((u64)cd_tso_len << - I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | - ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); + *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | + (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | + (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); return 1; } @@ -1499,129 +1611,157 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_flags: pointer to Tx flags currently set * @td_cmd: Tx descriptor command bits to set * @td_offset: Tx descriptor header offsets to set + * @tx_ring: Tx descriptor ring * @cd_tunneling: ptr to context desc bits **/ -static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, - u32 *td_cmd, u32 *td_offset, - struct i40e_ring *tx_ring, - u32 *cd_tunneling) +static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, + u32 *td_cmd, u32 *td_offset, + struct i40e_ring *tx_ring, + u32 *cd_tunneling) { - struct ipv6hdr *this_ipv6_hdr; - unsigned int this_tcp_hdrlen; - struct iphdr *this_ip_hdr; - u32 network_hdr_len; - u8 l4_hdr = 0; - struct udphdr *oudph; - struct iphdr *oiph; - u32 l4_tunnel = 0; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + unsigned char *exthdr; + u32 offset, cmd = 0, tunnel = 0; + __be16 frag_off; + u8 l4_proto = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* compute outer L2 header size */ + offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; if (skb->encapsulation) { - switch (ip_hdr(skb)->protocol) { + /* define outer network header type */ + if (*tx_flags & I40E_TX_FLAGS_IPV4) { + tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_CTX_EXT_IP_IPV4 : + I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; + + l4_proto = ip.v4->protocol; + } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { + tunnel |= I40E_TX_CTX_EXT_IP_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + } + + /* compute outer L3 header size */ + tunnel |= ((l4.hdr - ip.hdr) / 4) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + + /* define outer transport */ + switch (l4_proto) { case IPPROTO_UDP: - oudph = udp_hdr(skb); - oiph = ip_hdr(skb); - l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING; + tunnel |= I40E_TXD_CTX_UDP_TUNNELING; + *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; + break; + case IPPROTO_GRE: + tunnel |= I40E_TXD_CTX_GRE_TUNNELING; *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL; break; default: - return; - } - network_hdr_len = skb_inner_network_header_len(skb); - this_ip_hdr = inner_ip_hdr(skb); - this_ipv6_hdr = inner_ipv6_hdr(skb); - this_tcp_hdrlen = inner_tcp_hdrlen(skb); - - if (*tx_flags & I40E_TX_FLAGS_IPV4) { - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; - ip_hdr(skb)->check = 0; - } else { - *cd_tunneling |= - I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - } - } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; if (*tx_flags & I40E_TX_FLAGS_TSO) - ip_hdr(skb)->check = 0; - } + return -1; - /* Now set the ctx descriptor fields */ - *cd_tunneling |= (skb_network_header_len(skb) >> 2) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | - l4_tunnel | - ((skb_inner_network_offset(skb) - - skb_transport_offset(skb)) >> 1) << - I40E_TXD_CTX_QW0_NATLEN_SHIFT; - if (this_ip_hdr->version == 6) { - *tx_flags &= ~I40E_TX_FLAGS_IPV4; - *tx_flags |= I40E_TX_FLAGS_IPV6; + skb_checksum_help(skb); + return 0; } - if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) && - (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) && - (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) { - oudph->check = ~csum_tcpudp_magic(oiph->saddr, - oiph->daddr, - (skb->len - skb_transport_offset(skb)), - IPPROTO_UDP, 0); - *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK; - } - } else { - network_hdr_len = skb_network_header_len(skb); - this_ip_hdr = ip_hdr(skb); - this_ipv6_hdr = ipv6_hdr(skb); - this_tcp_hdrlen = tcp_hdrlen(skb); + /* compute tunnel header size */ + tunnel |= ((ip.hdr - l4.hdr) / 2) << + I40E_TXD_CTX_QW0_NATLEN_SHIFT; + + /* indicate if we need to offload outer UDP header */ + if ((*tx_flags & I40E_TX_FLAGS_TSO) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK; + + /* record tunnel offload values */ + *cd_tunneling |= tunnel; + + /* switch L4 header pointer from outer to inner */ + l4.hdr = skb_inner_transport_header(skb); + l4_proto = 0; + + /* reset type as we transition from outer to inner headers */ + *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6); + if (ip.v4->version == 4) + *tx_flags |= I40E_TX_FLAGS_IPV4; + if (ip.v6->version == 6) + *tx_flags |= I40E_TX_FLAGS_IPV6; } /* Enable IP checksum offloads */ if (*tx_flags & I40E_TX_FLAGS_IPV4) { - l4_hdr = this_ip_hdr->protocol; + l4_proto = ip.v4->protocol; /* the stack computes the IP header already, the only time we * need the hardware to recompute it is in the case of TSO. */ - if (*tx_flags & I40E_TX_FLAGS_TSO) { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; - this_ip_hdr->check = 0; - } else { - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4; - } - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ? + I40E_TX_DESC_CMD_IIPT_IPV4_CSUM : + I40E_TX_DESC_CMD_IIPT_IPV4; } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { - l4_hdr = this_ipv6_hdr->nexthdr; - *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; - /* Now set the td_offset for IP header length */ - *td_offset = (network_hdr_len >> 2) << - I40E_TX_DESC_LENGTH_IPLEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_IIPT_IPV6; + + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); } - /* words in MACLEN + dwords in IPLEN + dwords in L4Len */ - *td_offset |= (skb_network_offset(skb) >> 1) << - I40E_TX_DESC_LENGTH_MACLEN_SHIFT; + + /* compute inner L3 header size */ + offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT; /* Enable L4 checksum offloads */ - switch (l4_hdr) { + switch (l4_proto) { case IPPROTO_TCP: /* enable checksum offloads */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; - *td_offset |= (this_tcp_hdrlen >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP; + offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_SCTP: /* enable SCTP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; - *td_offset |= (sizeof(struct sctphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP; + offset |= (sizeof(struct sctphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; case IPPROTO_UDP: /* enable UDP checksum offload */ - *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; - *td_offset |= (sizeof(struct udphdr) >> 2) << - I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; + cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP; + offset |= (sizeof(struct udphdr) >> 2) << + I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; break; default: - break; + if (*tx_flags & I40E_TX_FLAGS_TSO) + return -1; + skb_checksum_help(skb); + return 0; } + + *td_cmd |= cmd; + *td_offset |= offset; + + return 1; } /** @@ -1656,59 +1796,71 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, } /** - * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * __i40evf_chk_linearize - Check if there are more than 8 fragments per packet * @skb: send buffer - * @tx_flags: collected send information * * Note: Our HW can't scatter-gather more than 8 fragments to build * a packet on the wire and so we need to figure out the cases where we * need to linearize the skb. **/ -static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags) +bool __i40evf_chk_linearize(struct sk_buff *skb) { - struct skb_frag_struct *frag; - bool linearize = false; - unsigned int size = 0; - u16 num_frags; - u16 gso_segs; + const struct skb_frag_struct *frag, *stale; + int gso_size, nr_frags, sum; - num_frags = skb_shinfo(skb)->nr_frags; - gso_segs = skb_shinfo(skb)->gso_segs; + /* check to see if TSO is enabled, if so we may get a repreive */ + gso_size = skb_shinfo(skb)->gso_size; + if (unlikely(!gso_size)) + return true; - if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { - u16 j = 0; + /* no need to check if number of frags is less than 8 */ + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags < I40E_MAX_BUFFER_TXD) + return false; - if (num_frags < (I40E_MAX_BUFFER_TXD)) - goto linearize_chk_done; - /* try the simple math, if we have too many frags per segment */ - if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > - I40E_MAX_BUFFER_TXD) { - linearize = true; - goto linearize_chk_done; - } - frag = &skb_shinfo(skb)->frags[0]; - /* we might still have more fragments per segment */ - do { - size += skb_frag_size(frag); - frag++; j++; - if ((size >= skb_shinfo(skb)->gso_size) && - (j < I40E_MAX_BUFFER_TXD)) { - size = (size % skb_shinfo(skb)->gso_size); - j = (size) ? 1 : 0; - } - if (j == I40E_MAX_BUFFER_TXD) { - linearize = true; - break; - } - num_frags--; - } while (num_frags); - } else { - if (num_frags >= I40E_MAX_BUFFER_TXD) - linearize = true; + /* We need to walk through the list and validate that each group + * of 6 fragments totals at least gso_size. However we don't need + * to perform such validation on the first or last 6 since the first + * 6 cannot inherit any data from a descriptor before them, and the + * last 6 cannot inherit any data from a descriptor after them. + */ + nr_frags -= I40E_MAX_BUFFER_TXD - 1; + frag = &skb_shinfo(skb)->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We + * use this as the worst case scenerio in which the frag ahead + * of us only provides one byte which is why we are limited to 6 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - gso_size; + + /* Add size of frags 1 through 5 to create our initial sum */ + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + sum += skb_frag_size(++frag); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + stale = &skb_shinfo(skb)->frags[0]; + for (;;) { + sum += skb_frag_size(++frag); + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + /* use pre-decrement to avoid processing last fragment */ + if (!--nr_frags) + break; + + sum -= skb_frag_size(++stale); } -linearize_chk_done: - return linearize; + return false; } /** @@ -1718,7 +1870,7 @@ linearize_chk_done: * * Returns -EBUSY if a stop is needed, else 0 **/ -static inline int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); /* Memory barrier before checking head and tail */ @@ -1735,20 +1887,6 @@ static inline int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) } /** - * i40evf_maybe_stop_tx - 1st level check for tx stop conditions - * @tx_ring: the ring to be checked - * @size: the size buffer we want to assure is available - * - * Returns 0 if stop is not needed - **/ -static inline int i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size) -{ - if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) - return 0; - return __i40evf_maybe_stop_tx(tx_ring, size); -} - -/** * i40evf_tx_map - Build the Tx descriptor * @tx_ring: ring to send buffer on * @skb: send buffer @@ -1863,7 +2001,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), first->bytecount); - i40evf_maybe_stop_tx(tx_ring, DESC_NEEDED); + i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); /* Algorithm to optimize tail and RS bit setting: * if xmit_more is supported @@ -1946,38 +2084,6 @@ dma_error: } /** - * i40evf_xmit_descriptor_count - calculate number of tx descriptors needed - * @skb: send buffer - * @tx_ring: ring to send buffer on - * - * Returns number of data descriptors needed for this skb. Returns 0 to indicate - * there is not enough descriptors available in this ring since we need at least - * one descriptor. - **/ -static inline int i40evf_xmit_descriptor_count(struct sk_buff *skb, - struct i40e_ring *tx_ring) -{ - unsigned int f; - int count = 0; - - /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, - * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, - * + 4 desc gap to avoid the cache line where head is, - * + 1 desc for context descriptor, - * otherwise try next time - */ - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); - - count += TXD_USE_COUNT(skb_headlen(skb)); - if (i40evf_maybe_stop_tx(tx_ring, count + 4 + 1)) { - tx_ring->tx_stats.tx_busy++; - return 0; - } - return count; -} - -/** * i40e_xmit_frame_ring - Sends buffer on Tx ring * @skb: send buffer * @tx_ring: ring to send buffer on @@ -1995,13 +2101,29 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, __be16 protocol; u32 td_cmd = 0; u8 hdr_len = 0; - int tso; + int tso, count; /* prefetch the data, we'll need it later */ prefetch(skb->data); - if (0 == i40evf_xmit_descriptor_count(skb, tx_ring)) + count = i40e_xmit_descriptor_count(skb); + if (i40e_chk_linearize(skb, count)) { + if (__skb_linearize(skb)) + goto out_drop; + count = TXD_USE_COUNT(skb->len); + tx_ring->tx_stats.tx_linearize++; + } + + /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, + * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, + * + 4 desc gap to avoid the cache line where head is, + * + 1 desc for context descriptor, + * otherwise try next time + */ + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { + tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; + } /* prepare the xmit flags */ if (i40evf_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags)) @@ -2026,24 +2148,17 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (tso) tx_flags |= I40E_TX_FLAGS_TSO; - if (i40e_chk_linearize(skb, tx_flags)) { - if (skb_linearize(skb)) - goto out_drop; - tx_ring->tx_stats.tx_linearize++; - } + /* Always offload the checksum, since it's in the data descriptor */ + tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, + tx_ring, &cd_tunneling); + if (tso < 0) + goto out_drop; + skb_tx_timestamp(skb); /* always enable CRC insertion offload */ td_cmd |= I40E_TX_DESC_CMD_ICRC; - /* Always offload the checksum, since it's in the data descriptor */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - tx_flags |= I40E_TX_FLAGS_CSUM; - - i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset, - tx_ring, &cd_tunneling); - } - i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, cd_tunneling, cd_l2tag2); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index e29bb3e86cfd..c1dd8c5c9666 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -153,7 +153,6 @@ enum i40e_dyn_idx_t { #define DESC_NEEDED (MAX_SKB_FRAGS + 4) #define I40E_MIN_DESC_PENDING 4 -#define I40E_TX_FLAGS_CSUM BIT(0) #define I40E_TX_FLAGS_HW_VLAN BIT(1) #define I40E_TX_FLAGS_SW_VLAN BIT(2) #define I40E_TX_FLAGS_TSO BIT(3) @@ -202,12 +201,15 @@ struct i40e_tx_queue_stats { u64 tx_done_old; u64 tx_linearize; u64 tx_force_wb; + u64 tx_lost_interrupt; }; struct i40e_rx_queue_stats { u64 non_eop_descs; u64 alloc_page_failed; u64 alloc_buff_failed; + u64 page_reuse_count; + u64 realloc_count; }; enum i40e_ring_state_t { @@ -253,7 +255,6 @@ struct i40e_ring { #define I40E_RX_DTYPE_NO_SPLIT 0 #define I40E_RX_DTYPE_HEADER_SPLIT 1 #define I40E_RX_DTYPE_SPLIT_ALWAYS 2 - u8 hsplit; #define I40E_RX_SPLIT_L2 0x1 #define I40E_RX_SPLIT_IP 0x2 #define I40E_RX_SPLIT_TCP_UDP 0x4 @@ -273,7 +274,6 @@ struct i40e_ring { u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) -#define I40E_TXR_FLAGS_OUTER_UDP_CSUM BIT(1) /* stats structs */ struct i40e_queue_stats stats; @@ -313,8 +313,8 @@ struct i40e_ring_container { #define i40e_for_each_ring(pos, head) \ for (pos = (head).ring; pos != NULL; pos = pos->next) -void i40evf_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); -void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count); +bool i40evf_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count); +bool i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count); void i40evf_alloc_rx_headers(struct i40e_ring *rxr); netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); void i40evf_clean_tx_ring(struct i40e_ring *tx_ring); @@ -324,7 +324,10 @@ int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring); void i40evf_free_tx_resources(struct i40e_ring *tx_ring); void i40evf_free_rx_resources(struct i40e_ring *rx_ring); int i40evf_napi_poll(struct napi_struct *napi, int budget); -u32 i40evf_get_tx_pending(struct i40e_ring *ring); +void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); +u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw); +int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size); +bool __i40evf_chk_linearize(struct sk_buff *skb); /** * i40e_get_head - Retrieve head from head writeback @@ -339,4 +342,63 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring) return le32_to_cpu(*(volatile __le32 *)head); } + +/** + * i40e_xmit_descriptor_count - calculate number of Tx descriptors needed + * @skb: send buffer + * @tx_ring: ring to send buffer on + * + * Returns number of data descriptors needed for this skb. Returns 0 to indicate + * there is not enough descriptors available in this ring since we need at least + * one descriptor. + **/ +static inline int i40e_xmit_descriptor_count(struct sk_buff *skb) +{ + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + int count = 0, size = skb_headlen(skb); + + for (;;) { + count += TXD_USE_COUNT(size); + + if (!nr_frags--) + break; + + size = skb_frag_size(frag++); + } + + return count; +} + +/** + * i40e_maybe_stop_tx - 1st level check for Tx stop conditions + * @tx_ring: the ring to be checked + * @size: the size buffer we want to assure is available + * + * Returns 0 if stop is not needed + **/ +static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) +{ + if (likely(I40E_DESC_UNUSED(tx_ring) >= size)) + return 0; + return __i40evf_maybe_stop_tx(tx_ring, size); +} + +/** + * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @count: number of buffers used + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + **/ +static inline bool i40e_chk_linearize(struct sk_buff *skb, int count) +{ + /* we can only support up to 8 data buffers for a single send */ + if (likely(count <= I40E_MAX_BUFFER_TXD)) + return false; + + return __i40evf_chk_linearize(skb); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index be1b72b93888..e657eccd232c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2014 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -173,6 +173,7 @@ enum i40evf_state_t { __I40EVF_RESETTING, /* in reset */ /* Below here, watchdog is running */ __I40EVF_DOWN, /* ready, can be opened */ + __I40EVF_DOWN_PENDING, /* descending, waiting for watchdog */ __I40EVF_TESTING, /* in ethtool self-test */ __I40EVF_RUNNING, /* opened, working */ }; @@ -273,6 +274,9 @@ struct i40evf_adapter { }; +/* Ethtool Private Flags */ +#define I40EVF_PRIV_FLAGS_PS BIT(0) + /* needed by i40evf_ethtool.c */ extern char i40evf_driver_name[]; extern const char i40evf_driver_version[]; @@ -280,6 +284,7 @@ extern const char i40evf_driver_version[]; int i40evf_up(struct i40evf_adapter *adapter); void i40evf_down(struct i40evf_adapter *adapter); int i40evf_process_config(struct i40evf_adapter *adapter); +void i40evf_schedule_reset(struct i40evf_adapter *adapter); void i40evf_reset(struct i40evf_adapter *adapter); void i40evf_set_ethtool_ops(struct net_device *netdev); void i40evf_update_stats(struct i40evf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index a4c9feb589e7..dd4430aae7fa 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -63,6 +63,12 @@ static const struct i40evf_stats i40evf_gstrings_stats[] = { #define I40EVF_STATS_LEN(_dev) \ (I40EVF_GLOBAL_STATS_LEN + I40EVF_QUEUE_STATS_LEN(_dev)) +static const char i40evf_priv_flags_strings[][ETH_GSTRING_LEN] = { + "packet-split", +}; + +#define I40EVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40evf_priv_flags_strings) + /** * i40evf_get_settings - Get Link Speed and Duplex settings * @netdev: network interface device structure @@ -97,6 +103,8 @@ static int i40evf_get_sset_count(struct net_device *netdev, int sset) { if (sset == ETH_SS_STATS) return I40EVF_STATS_LEN(netdev); + else if (sset == ETH_SS_PRIV_FLAGS) + return I40EVF_PRIV_FLAGS_STR_LEN; else return -EINVAL; } @@ -162,6 +170,12 @@ static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data) snprintf(p, ETH_GSTRING_LEN, "rx-%u.bytes", i); p += ETH_GSTRING_LEN; } + } else if (sset == ETH_SS_PRIV_FLAGS) { + for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) { + memcpy(data, i40evf_priv_flags_strings[i], + ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } } } @@ -211,6 +225,7 @@ static void i40evf_get_drvinfo(struct net_device *netdev, strlcpy(drvinfo->version, i40evf_driver_version, 32); strlcpy(drvinfo->fw_version, "N/A", 4); strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), 32); + drvinfo->n_priv_flags = I40EVF_PRIV_FLAGS_STR_LEN; } /** @@ -459,6 +474,7 @@ static int i40evf_set_rss_hash_opt(struct i40evf_adapter *adapter, struct ethtool_rxnfc *nfc) { struct i40e_hw *hw = &adapter->hw; + u32 flags = adapter->vf_res->vf_offload_flags; u64 hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) | ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32); @@ -477,54 +493,50 @@ static int i40evf_set_rss_hash_opt(struct i40evf_adapter *adapter, switch (nfc->flow_type) { case TCP_V4_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - hena &= ~BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); + hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); - break; - default: + } else { return -EINVAL; } break; case TCP_V6_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - hena &= ~BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); + hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); - break; - default: + } else { return -EINVAL; } break; case UDP_V4_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - hena &= ~(BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); + hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - break; - default: + } else { return -EINVAL; } break; case UDP_V6_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - hena &= ~(BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - break; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + if (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + if (flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); + hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - break; - default: + } else { return -EINVAL; } break; @@ -713,6 +725,54 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, I40EVF_HLUT_ARRAY_SIZE); } +/** + * i40evf_get_priv_flags - report device private flags + * @dev: network interface device structure + * + * The get string set count and the string set should be matched for each + * flag returned. Add new strings for each flag to the i40e_priv_flags_strings + * array. + * + * Returns a u32 bitmap of flags. + **/ +static u32 i40evf_get_priv_flags(struct net_device *dev) +{ + struct i40evf_adapter *adapter = netdev_priv(dev); + u32 ret_flags = 0; + + ret_flags |= adapter->flags & I40EVF_FLAG_RX_PS_ENABLED ? + I40EVF_PRIV_FLAGS_PS : 0; + + return ret_flags; +} + +/** + * i40evf_set_priv_flags - set private flags + * @dev: network interface device structure + * @flags: bit flags to be set + **/ +static int i40evf_set_priv_flags(struct net_device *dev, u32 flags) +{ + struct i40evf_adapter *adapter = netdev_priv(dev); + bool reset_required = false; + + if ((flags & I40EVF_PRIV_FLAGS_PS) && + !(adapter->flags & I40EVF_FLAG_RX_PS_ENABLED)) { + adapter->flags |= I40EVF_FLAG_RX_PS_ENABLED; + reset_required = true; + } else if (!(flags & I40EVF_PRIV_FLAGS_PS) && + (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED)) { + adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED; + reset_required = true; + } + + /* if needed, issue reset to cause things to take effect */ + if (reset_required) + i40evf_schedule_reset(adapter); + + return 0; +} + static const struct ethtool_ops i40evf_ethtool_ops = { .get_settings = i40evf_get_settings, .get_drvinfo = i40evf_get_drvinfo, @@ -722,6 +782,8 @@ static const struct ethtool_ops i40evf_ethtool_ops = { .get_strings = i40evf_get_strings, .get_ethtool_stats = i40evf_get_ethtool_stats, .get_sset_count = i40evf_get_sset_count, + .get_priv_flags = i40evf_get_priv_flags, + .set_priv_flags = i40evf_set_priv_flags, .get_msglevel = i40evf_get_msglevel, .set_msglevel = i40evf_set_msglevel, .get_coalesce = i40evf_get_coalesce, diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 94da913b151d..4b70aae2fa84 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -32,13 +32,13 @@ static int i40evf_close(struct net_device *netdev); char i40evf_driver_name[] = "i40evf"; static const char i40evf_driver_string[] = - "Intel(R) XL710/X710 Virtual Function Network Driver"; + "Intel(R) 40-10 Gigabit Virtual Function Network Driver"; #define DRV_KERN "-k" #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 4 -#define DRV_VERSION_BUILD 4 +#define DRV_VERSION_BUILD 15 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ @@ -69,6 +69,8 @@ MODULE_DESCRIPTION("Intel(R) XL710 X710 Virtual Function Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +static struct workqueue_struct *i40evf_wq; + /** * i40evf_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure @@ -171,6 +173,19 @@ void i40evf_debug_d(void *hw, u32 mask, char *fmt_str, ...) } /** + * i40evf_schedule_reset - Set the flags and schedule a reset event + * @adapter: board private structure + **/ +void i40evf_schedule_reset(struct i40evf_adapter *adapter) +{ + if (!(adapter->flags & + (I40EVF_FLAG_RESET_PENDING | I40EVF_FLAG_RESET_NEEDED))) { + adapter->flags |= I40EVF_FLAG_RESET_NEEDED; + schedule_work(&adapter->reset_task); + } +} + +/** * i40evf_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ @@ -179,11 +194,7 @@ static void i40evf_tx_timeout(struct net_device *netdev) struct i40evf_adapter *adapter = netdev_priv(netdev); adapter->tx_timeout_count++; - if (!(adapter->flags & (I40EVF_FLAG_RESET_PENDING | - I40EVF_FLAG_RESET_NEEDED))) { - adapter->flags |= I40EVF_FLAG_RESET_NEEDED; - schedule_work(&adapter->reset_task); - } + i40evf_schedule_reset(adapter); } /** @@ -636,35 +647,22 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter) int rx_buf_len; - adapter->flags &= ~I40EVF_FLAG_RX_PS_CAPABLE; - adapter->flags |= I40EVF_FLAG_RX_1BUF_CAPABLE; - - /* Decide whether to use packet split mode or not */ - if (netdev->mtu > ETH_DATA_LEN) { - if (adapter->flags & I40EVF_FLAG_RX_PS_CAPABLE) - adapter->flags |= I40EVF_FLAG_RX_PS_ENABLED; - else - adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED; - } else { - if (adapter->flags & I40EVF_FLAG_RX_1BUF_CAPABLE) - adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED; - else - adapter->flags |= I40EVF_FLAG_RX_PS_ENABLED; - } - /* Set the RX buffer length according to the mode */ - if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) { - rx_buf_len = I40E_RX_HDR_SIZE; - } else { - if (netdev->mtu <= ETH_DATA_LEN) - rx_buf_len = I40EVF_RXBUFFER_2048; - else - rx_buf_len = ALIGN(max_frame, 1024); - } + if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED || + netdev->mtu <= ETH_DATA_LEN) + rx_buf_len = I40EVF_RXBUFFER_2048; + else + rx_buf_len = ALIGN(max_frame, 1024); for (i = 0; i < adapter->num_active_queues; i++) { adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i); adapter->rx_rings[i].rx_buf_len = rx_buf_len; + if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) { + set_ring_ps_enabled(&adapter->rx_rings[i]); + adapter->rx_rings[i].rx_hdr_len = I40E_RX_HDR_SIZE; + } else { + clear_ring_ps_enabled(&adapter->rx_rings[i]); + } } } @@ -1001,7 +999,12 @@ static void i40evf_configure(struct i40evf_adapter *adapter) for (i = 0; i < adapter->num_active_queues; i++) { struct i40e_ring *ring = &adapter->rx_rings[i]; + if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) { + i40evf_alloc_rx_headers(ring); + i40evf_alloc_rx_buffers_ps(ring, ring->count); + } else { i40evf_alloc_rx_buffers_1buf(ring, ring->count); + } ring->next_to_use = ring->count - 1; writel(ring->next_to_use, ring->tail); } @@ -1032,7 +1035,7 @@ void i40evf_down(struct i40evf_adapter *adapter) struct net_device *netdev = adapter->netdev; struct i40evf_mac_filter *f; - if (adapter->state == __I40EVF_DOWN) + if (adapter->state <= __I40EVF_DOWN_PENDING) return; while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, @@ -1122,7 +1125,9 @@ static void i40evf_free_queues(struct i40evf_adapter *adapter) if (!adapter->vsi_res) return; kfree(adapter->tx_rings); + adapter->tx_rings = NULL; kfree(adapter->rx_rings); + adapter->rx_rings = NULL; } /** @@ -1454,7 +1459,11 @@ static int i40evf_init_rss(struct i40evf_adapter *adapter) int ret; /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ - hena = I40E_DEFAULT_RSS_HENA; + if (adapter->vf_res->vf_offload_flags & + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2) + hena = I40E_DEFAULT_RSS_HENA_EXPANDED; + else + hena = I40E_DEFAULT_RSS_HENA; wr32(hw, I40E_VFQF_HENA(0), (u32)hena); wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32)); @@ -1829,6 +1838,7 @@ static void i40evf_reset_task(struct work_struct *work) break; msleep(I40EVF_RESET_WAIT_MS); } + pci_set_master(adapter->pdev); /* extra wait to make sure minimum wait is met */ msleep(I40EVF_RESET_WAIT_MS); if (i == I40EVF_RESET_WAIT_COUNT) { @@ -1873,6 +1883,7 @@ static void i40evf_reset_task(struct work_struct *work) adapter->netdev->flags &= ~IFF_UP; clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); adapter->flags &= ~I40EVF_FLAG_RESET_PENDING; + adapter->state = __I40EVF_DOWN; dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n"); return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -2142,7 +2153,8 @@ static int i40evf_open(struct net_device *netdev) dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n"); return -EIO; } - if (adapter->state != __I40EVF_DOWN || adapter->aq_required) + + if (adapter->state != __I40EVF_DOWN) return -EBUSY; /* allocate transmit descriptors */ @@ -2197,14 +2209,14 @@ static int i40evf_close(struct net_device *netdev) { struct i40evf_adapter *adapter = netdev_priv(netdev); - if (adapter->state <= __I40EVF_DOWN) + if (adapter->state <= __I40EVF_DOWN_PENDING) return 0; set_bit(__I40E_DOWN, &adapter->vsi.state); i40evf_down(adapter); - adapter->state = __I40EVF_DOWN; + adapter->state = __I40EVF_DOWN_PENDING; i40evf_free_traffic_irqs(adapter); return 0; @@ -2325,9 +2337,24 @@ int i40evf_process_config(struct i40evf_adapter *adapter) NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_RXCSUM | NETIF_F_GRO; + netdev->hw_enc_features |= NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + + if (adapter->flags & I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE) + netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; netdev->hw_features &= ~NETIF_F_RXCSUM; @@ -2466,11 +2493,20 @@ static void i40evf_init_task(struct work_struct *work) default: goto err_alloc; } + + if (hw->mac.type == I40E_MAC_X722_VF) + adapter->flags |= I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE; + if (i40evf_process_config(adapter)) goto err_alloc; adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; adapter->flags |= I40EVF_FLAG_RX_CSUM_ENABLED; + adapter->flags |= I40EVF_FLAG_RX_1BUF_CAPABLE; + adapter->flags |= I40EVF_FLAG_RX_PS_CAPABLE; + + /* Default to single buffer rx, can be changed through ethtool. */ + adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED; netdev->netdev_ops = &i40evf_netdev_ops; i40evf_set_ethtool_ops(netdev); @@ -2502,10 +2538,9 @@ static void i40evf_init_task(struct work_struct *work) goto err_sw_init; i40evf_map_rings_to_vectors(adapter); if (adapter->vf_res->vf_offload_flags & - I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) + I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) adapter->flags |= I40EVF_FLAG_WB_ON_ITR_CAPABLE; - if (!RSS_AQ(adapter)) - i40evf_init_rss(adapter); + err = i40evf_request_misc_irq(adapter); if (err) goto err_sw_init; @@ -2885,6 +2920,11 @@ static int __init i40evf_init_module(void) pr_info("%s\n", i40evf_copyright); + i40evf_wq = create_singlethread_workqueue(i40evf_driver_name); + if (!i40evf_wq) { + pr_err("%s: Failed to create workqueue\n", i40evf_driver_name); + return -ENOMEM; + } ret = pci_register_driver(&i40evf_driver); return ret; } @@ -2900,6 +2940,7 @@ module_init(i40evf_init_module); static void __exit i40evf_exit_module(void) { pci_unregister_driver(&i40evf_driver); + destroy_workqueue(i40evf_wq); } module_exit(i40evf_exit_module); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index c1c526283757..488e738f76c6 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -270,6 +270,10 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter) vqpi->rxq.max_pkt_size = adapter->netdev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len; + if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) { + vqpi->rxq.splithdr_enabled = true; + vqpi->rxq.hdr_size = I40E_RX_HDR_SIZE; + } vqpi++; } @@ -804,6 +808,8 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, case I40E_VIRTCHNL_OP_DISABLE_QUEUES: i40evf_free_all_tx_resources(adapter); i40evf_free_all_rx_resources(adapter); + if (adapter->state == __I40EVF_DOWN_PENDING) + adapter->state = __I40EVF_DOWN; break; case I40E_VIRTCHNL_OP_VERSION: case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP: diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index adb33e2a0137..a23aa6704394 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -34,6 +34,7 @@ #include "e1000_mac.h" #include "e1000_82575.h" #include "e1000_i210.h" +#include "igb.h" static s32 igb_get_invariants_82575(struct e1000_hw *); static s32 igb_acquire_phy_82575(struct e1000_hw *); @@ -71,6 +72,32 @@ static s32 igb_update_nvm_checksum_i350(struct e1000_hw *hw); static const u16 e1000_82580_rxpbs_table[] = { 36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 }; +/* Due to a hw errata, if the host tries to configure the VFTA register + * while performing queries from the BMC or DMA, then the VFTA in some + * cases won't be written. + */ + +/** + * igb_write_vfta_i350 - Write value to VLAN filter table + * @hw: pointer to the HW structure + * @offset: register offset in VLAN filter table + * @value: register value written to VLAN filter table + * + * Writes value at the given offset in the register array which stores + * the VLAN filter table. + **/ +static void igb_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) +{ + struct igb_adapter *adapter = hw->back; + int i; + + for (i = 10; i--;) + array_wr32(E1000_VFTA, offset, value); + + wrfl(); + adapter->shadow_vfta[offset] = value; +} + /** * igb_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO * @hw: pointer to the HW structure @@ -398,6 +425,8 @@ static s32 igb_init_mac_params_82575(struct e1000_hw *hw) /* Set mta register count */ mac->mta_reg_count = 128; + /* Set uta register count */ + mac->uta_reg_count = (hw->mac.type == e1000_82575) ? 0 : 128; /* Set rar entry count */ switch (mac->type) { case e1000_82576: @@ -429,6 +458,11 @@ static s32 igb_init_mac_params_82575(struct e1000_hw *hw) mac->ops.release_swfw_sync = igb_release_swfw_sync_82575; } + if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) + mac->ops.write_vfta = igb_write_vfta_i350; + else + mac->ops.write_vfta = igb_write_vfta; + /* Set if part includes ASF firmware */ mac->asf_firmware_present = true; /* Set if manageability features are enabled. */ @@ -1517,10 +1551,7 @@ static s32 igb_init_hw_82575(struct e1000_hw *hw) /* Disabling VLAN filtering */ hw_dbg("Initializing the IEEE VLAN\n"); - if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) - igb_clear_vfta_i350(hw); - else - igb_clear_vfta(hw); + igb_clear_vfta(hw); /* Setup the receive address */ igb_init_rx_addrs(hw, rar_count); @@ -2889,7 +2920,7 @@ static struct e1000_mac_operations e1000_mac_ops_82575 = { #endif }; -static struct e1000_phy_operations e1000_phy_ops_82575 = { +static const struct e1000_phy_operations e1000_phy_ops_82575 = { .acquire = igb_acquire_phy_82575, .get_cfg_done = igb_get_cfg_done_82575, .release = igb_release_phy_82575, diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h index 2154aea7aa7e..de8805a2a2fe 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.h +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h @@ -56,10 +56,10 @@ s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr, #define E1000_SRRCTL_TIMESTAMP 0x40000000 -#define E1000_MRQC_ENABLE_RSS_4Q 0x00000002 +#define E1000_MRQC_ENABLE_RSS_MQ 0x00000002 #define E1000_MRQC_ENABLE_VMDQ 0x00000003 #define E1000_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 -#define E1000_MRQC_ENABLE_VMDQ_RSS_2Q 0x00000005 +#define E1000_MRQC_ENABLE_VMDQ_RSS_MQ 0x00000005 #define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 #define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index c3c598c347a9..e9f23ee8f15e 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -356,7 +356,8 @@ /* Ethertype field values */ #define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */ -#define MAX_JUMBO_FRAME_SIZE 0x3F00 +/* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */ +#define MAX_JUMBO_FRAME_SIZE 0x2600 /* PBA constants */ #define E1000_PBA_34K 0x0022 diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index 4034207eb5cc..2fb2213cd562 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -325,7 +325,7 @@ struct e1000_mac_operations { s32 (*get_thermal_sensor_data)(struct e1000_hw *); s32 (*init_thermal_sensor_thresh)(struct e1000_hw *); #endif - + void (*write_vfta)(struct e1000_hw *, u32, u32); }; struct e1000_phy_operations { @@ -372,7 +372,7 @@ struct e1000_thermal_sensor_data { struct e1000_info { s32 (*get_invariants)(struct e1000_hw *); struct e1000_mac_operations *mac_ops; - struct e1000_phy_operations *phy_ops; + const struct e1000_phy_operations *phy_ops; struct e1000_nvm_operations *nvm_ops; }; diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index 2a88595f956c..07cf4fe58338 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -92,10 +92,8 @@ void igb_clear_vfta(struct e1000_hw *hw) { u32 offset; - for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { - array_wr32(E1000_VFTA, offset, 0); - wrfl(); - } + for (offset = E1000_VLAN_FILTER_TBL_SIZE; offset--;) + hw->mac.ops.write_vfta(hw, offset, 0); } /** @@ -107,54 +105,14 @@ void igb_clear_vfta(struct e1000_hw *hw) * Writes value at the given offset in the register array which stores * the VLAN filter table. **/ -static void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) +void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) { + struct igb_adapter *adapter = hw->back; + array_wr32(E1000_VFTA, offset, value); wrfl(); -} - -/* Due to a hw errata, if the host tries to configure the VFTA register - * while performing queries from the BMC or DMA, then the VFTA in some - * cases won't be written. - */ -/** - * igb_clear_vfta_i350 - Clear VLAN filter table - * @hw: pointer to the HW structure - * - * Clears the register array which contains the VLAN filter table by - * setting all the values to 0. - **/ -void igb_clear_vfta_i350(struct e1000_hw *hw) -{ - u32 offset; - int i; - - for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { - for (i = 0; i < 10; i++) - array_wr32(E1000_VFTA, offset, 0); - - wrfl(); - } -} - -/** - * igb_write_vfta_i350 - Write value to VLAN filter table - * @hw: pointer to the HW structure - * @offset: register offset in VLAN filter table - * @value: register value written to VLAN filter table - * - * Writes value at the given offset in the register array which stores - * the VLAN filter table. - **/ -static void igb_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) -{ - int i; - - for (i = 0; i < 10; i++) - array_wr32(E1000_VFTA, offset, value); - - wrfl(); + adapter->shadow_vfta[offset] = value; } /** @@ -183,40 +141,155 @@ void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count) } /** + * igb_find_vlvf_slot - find the VLAN id or the first empty slot + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vlvf_bypass: skip VLVF if no match is found + * + * return the VLVF index where this VLAN id should be placed + * + **/ +static s32 igb_find_vlvf_slot(struct e1000_hw *hw, u32 vlan, bool vlvf_bypass) +{ + s32 regindex, first_empty_slot; + u32 bits; + + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* if vlvf_bypass is set we don't want to use an empty slot, we + * will simply bypass the VLVF if there are no entries present in the + * VLVF that contain our VLAN + */ + first_empty_slot = vlvf_bypass ? -E1000_ERR_NO_SPACE : 0; + + /* Search for the VLAN id in the VLVF entries. Save off the first empty + * slot found along the way. + * + * pre-decrement loop covering (IXGBE_VLVF_ENTRIES - 1) .. 1 + */ + for (regindex = E1000_VLVF_ARRAY_SIZE; --regindex > 0;) { + bits = rd32(E1000_VLVF(regindex)) & E1000_VLVF_VLANID_MASK; + if (bits == vlan) + return regindex; + if (!first_empty_slot && !bits) + first_empty_slot = regindex; + } + + return first_empty_slot ? : -E1000_ERR_NO_SPACE; +} + +/** * igb_vfta_set - enable or disable vlan in VLAN filter table * @hw: pointer to the HW structure - * @vid: VLAN id to add or remove - * @add: if true add filter, if false remove + * @vlan: VLAN id to add or remove + * @vind: VMDq output index that maps queue to VLAN id + * @vlan_on: if true add filter, if false remove * * Sets or clears a bit in the VLAN filter table array based on VLAN id * and if we are adding or removing the filter **/ -s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add) +s32 igb_vfta_set(struct e1000_hw *hw, u32 vlan, u32 vind, + bool vlan_on, bool vlvf_bypass) { - u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK; - u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK); - u32 vfta; struct igb_adapter *adapter = hw->back; - s32 ret_val = 0; + u32 regidx, vfta_delta, vfta, bits; + s32 vlvf_index; - vfta = adapter->shadow_vfta[index]; + if ((vlan > 4095) || (vind > 7)) + return -E1000_ERR_PARAM; - /* bit was set/cleared before we started */ - if ((!!(vfta & mask)) == add) { - ret_val = -E1000_ERR_CONFIG; - } else { - if (add) - vfta |= mask; - else - vfta &= ~mask; + /* this is a 2 part operation - first the VFTA, then the + * VLVF and VLVFB if VT Mode is set + * We don't write the VFTA until we know the VLVF part succeeded. + */ + + /* Part 1 + * The VFTA is a bitstring made up of 128 32-bit registers + * that enable the particular VLAN id, much like the MTA: + * bits[11-5]: which register + * bits[4-0]: which bit in the register + */ + regidx = vlan / 32; + vfta_delta = 1 << (vlan % 32); + vfta = adapter->shadow_vfta[regidx]; + + /* vfta_delta represents the difference between the current value + * of vfta and the value we want in the register. Since the diff + * is an XOR mask we can just update vfta using an XOR. + */ + vfta_delta &= vlan_on ? ~vfta : vfta; + vfta ^= vfta_delta; + + /* Part 2 + * If VT Mode is set + * Either vlan_on + * make sure the VLAN is in VLVF + * set the vind bit in the matching VLVFB + * Or !vlan_on + * clear the pool bit and possibly the vind + */ + if (!adapter->vfs_allocated_count) + goto vfta_update; + + vlvf_index = igb_find_vlvf_slot(hw, vlan, vlvf_bypass); + if (vlvf_index < 0) { + if (vlvf_bypass) + goto vfta_update; + return vlvf_index; } - if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354)) - igb_write_vfta_i350(hw, index, vfta); - else - igb_write_vfta(hw, index, vfta); - adapter->shadow_vfta[index] = vfta; - return ret_val; + bits = rd32(E1000_VLVF(vlvf_index)); + + /* set the pool bit */ + bits |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vind); + if (vlan_on) + goto vlvf_update; + + /* clear the pool bit */ + bits ^= 1 << (E1000_VLVF_POOLSEL_SHIFT + vind); + + if (!(bits & E1000_VLVF_POOLSEL_MASK)) { + /* Clear VFTA first, then disable VLVF. Otherwise + * we run the risk of stray packets leaking into + * the PF via the default pool + */ + if (vfta_delta) + hw->mac.ops.write_vfta(hw, regidx, vfta); + + /* disable VLVF and clear remaining bit from pool */ + wr32(E1000_VLVF(vlvf_index), 0); + + return 0; + } + + /* If there are still bits set in the VLVFB registers + * for the VLAN ID indicated we need to see if the + * caller is requesting that we clear the VFTA entry bit. + * If the caller has requested that we clear the VFTA + * entry bit but there are still pools/VFs using this VLAN + * ID entry then ignore the request. We're not worried + * about the case where we're turning the VFTA VLAN ID + * entry bit on, only when requested to turn it off as + * there may be multiple pools and/or VFs using the + * VLAN ID entry. In that case we cannot clear the + * VFTA bit until all pools/VFs using that VLAN ID have also + * been cleared. This will be indicated by "bits" being + * zero. + */ + vfta_delta = 0; + +vlvf_update: + /* record pool change and enable VLAN ID if not already enabled */ + wr32(E1000_VLVF(vlvf_index), bits | vlan | E1000_VLVF_VLANID_ENABLE); + +vfta_update: + /* bit was set/cleared before we started */ + if (vfta_delta) + hw->mac.ops.write_vfta(hw, regidx, vfta); + + return 0; } /** diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h index ea24961b0d70..90c8893c3eed 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.h +++ b/drivers/net/ethernet/intel/igb/e1000_mac.h @@ -56,8 +56,9 @@ s32 igb_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, void igb_clear_hw_cntrs_base(struct e1000_hw *hw); void igb_clear_vfta(struct e1000_hw *hw); -void igb_clear_vfta_i350(struct e1000_hw *hw); -s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add); +void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value); +s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, u32 vind, + bool vlan_on, bool vlvf_bypass); void igb_config_collision_dist(struct e1000_hw *hw); void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count); void igb_mta_set(struct e1000_hw *hw, u32 hash_value); diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c index 162cc49345d0..10f5c9e016a9 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mbx.c +++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c @@ -322,14 +322,20 @@ static s32 igb_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number) { s32 ret_val = -E1000_ERR_MBX; u32 p2v_mailbox; + int count = 10; - /* Take ownership of the buffer */ - wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); + do { + /* Take ownership of the buffer */ + wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); - /* reserve mailbox for vf use */ - p2v_mailbox = rd32(E1000_P2VMAILBOX(vf_number)); - if (p2v_mailbox & E1000_P2VMAILBOX_PFU) - ret_val = 0; + /* reserve mailbox for vf use */ + p2v_mailbox = rd32(E1000_P2VMAILBOX(vf_number)); + if (p2v_mailbox & E1000_P2VMAILBOX_PFU) { + ret_val = 0; + break; + } + udelay(1000); + } while (count-- > 0); return ret_val; } diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index e3cb93bdb21a..9413fa61392f 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -95,7 +95,6 @@ struct vf_data_storage { unsigned char vf_mac_addresses[ETH_ALEN]; u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES]; u16 num_vf_mc_hashes; - u16 vlans_enabled; u32 flags; unsigned long last_nack; u16 pf_vlan; /* When set, guest VLAN config not allowed. */ @@ -482,6 +481,7 @@ struct igb_adapter { #define IGB_FLAG_MAS_ENABLE (1 << 12) #define IGB_FLAG_HAS_MSIX (1 << 13) #define IGB_FLAG_EEE (1 << 14) +#define IGB_FLAG_VLAN_PROMISC BIT(15) /* Media Auto Sense */ #define IGB_MAS_ENABLE_0 0X0001 @@ -510,6 +510,8 @@ enum igb_boards { extern char igb_driver_name[]; extern char igb_driver_version[]; +int igb_open(struct net_device *netdev); +int igb_close(struct net_device *netdev); int igb_up(struct igb_adapter *); void igb_down(struct igb_adapter *); void igb_reinit_locked(struct igb_adapter *); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 1d329f1d047b..7982243d1f9b 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2017,7 +2017,7 @@ static void igb_diag_test(struct net_device *netdev, if (if_running) /* indicate we're in test mode */ - dev_close(netdev); + igb_close(netdev); else igb_reset(adapter); @@ -2050,7 +2050,7 @@ static void igb_diag_test(struct net_device *netdev, clear_bit(__IGB_TESTING, &adapter->state); if (if_running) - dev_open(netdev); + igb_open(netdev); } else { dev_info(&adapter->pdev->dev, "online testing starting\n"); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 5b4ad1ad4d5f..55a1405cb2a1 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -122,8 +122,8 @@ static void igb_setup_mrqc(struct igb_adapter *); static int igb_probe(struct pci_dev *, const struct pci_device_id *); static void igb_remove(struct pci_dev *pdev); static int igb_sw_init(struct igb_adapter *); -static int igb_open(struct net_device *); -static int igb_close(struct net_device *); +int igb_open(struct net_device *); +int igb_close(struct net_device *); static void igb_configure(struct igb_adapter *); static void igb_configure_tx(struct igb_adapter *); static void igb_configure_rx(struct igb_adapter *); @@ -140,7 +140,7 @@ static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); static int igb_change_mtu(struct net_device *, int); static int igb_set_mac(struct net_device *, void *); -static void igb_set_uta(struct igb_adapter *adapter); +static void igb_set_uta(struct igb_adapter *adapter, bool set); static irqreturn_t igb_intr(int irq, void *); static irqreturn_t igb_intr_msi(int irq, void *); static irqreturn_t igb_msix_other(int irq, void *); @@ -1534,12 +1534,13 @@ static void igb_irq_enable(struct igb_adapter *adapter) static void igb_update_mng_vlan(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + u16 pf_id = adapter->vfs_allocated_count; u16 vid = adapter->hw.mng_cookie.vlan_id; u16 old_vid = adapter->mng_vlan_id; if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { /* add VID to filter table */ - igb_vfta_set(hw, vid, true); + igb_vfta_set(hw, vid, pf_id, true, true); adapter->mng_vlan_id = vid; } else { adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; @@ -1549,7 +1550,7 @@ static void igb_update_mng_vlan(struct igb_adapter *adapter) (vid != old_vid) && !test_bit(old_vid, adapter->active_vlans)) { /* remove VID from filter table */ - igb_vfta_set(hw, old_vid, false); + igb_vfta_set(hw, vid, pf_id, false, true); } } @@ -1818,6 +1819,10 @@ void igb_down(struct igb_adapter *adapter) if (!pci_channel_offline(adapter->pdev)) igb_reset(adapter); + + /* clear VLAN promisc flag so VFTA will be updated if necessary */ + adapter->flags &= ~IGB_FLAG_VLAN_PROMISC; + igb_clean_all_tx_rings(adapter); igb_clean_all_rx_rings(adapter); #ifdef CONFIG_IGB_DCA @@ -1862,7 +1867,7 @@ void igb_reset(struct igb_adapter *adapter) struct e1000_hw *hw = &adapter->hw; struct e1000_mac_info *mac = &hw->mac; struct e1000_fc_info *fc = &hw->fc; - u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm; + u32 pba, hwm; /* Repartition Pba for greater than 9k mtu * To take effect CTRL.RST is required. @@ -1886,9 +1891,10 @@ void igb_reset(struct igb_adapter *adapter) break; } - if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && - (mac->type < e1000_82576)) { - /* adjust PBA for jumbo frames */ + if (mac->type == e1000_82575) { + u32 min_rx_space, min_tx_space, needed_tx_space; + + /* write Rx PBA so that hardware can report correct Tx PBA */ wr32(E1000_PBA, pba); /* To maintain wire speed transmits, the Tx FIFO should be @@ -1898,31 +1904,26 @@ void igb_reset(struct igb_adapter *adapter) * one full receive packet and is similarly rounded up and * expressed in KB. */ - pba = rd32(E1000_PBA); - /* upper 16 bits has Tx packet buffer allocation size in KB */ - tx_space = pba >> 16; - /* lower 16 bits has Rx packet buffer allocation size in KB */ - pba &= 0xffff; - /* the Tx fifo also stores 16 bytes of information about the Tx - * but don't include ethernet FCS because hardware appends it + min_rx_space = DIV_ROUND_UP(MAX_JUMBO_FRAME_SIZE, 1024); + + /* The Tx FIFO also stores 16 bytes of information about the Tx + * but don't include Ethernet FCS because hardware appends it. + * We only need to round down to the nearest 512 byte block + * count since the value we care about is 2 frames, not 1. */ - min_tx_space = (adapter->max_frame_size + - sizeof(union e1000_adv_tx_desc) - - ETH_FCS_LEN) * 2; - min_tx_space = ALIGN(min_tx_space, 1024); - min_tx_space >>= 10; - /* software strips receive CRC, so leave room for it */ - min_rx_space = adapter->max_frame_size; - min_rx_space = ALIGN(min_rx_space, 1024); - min_rx_space >>= 10; + min_tx_space = adapter->max_frame_size; + min_tx_space += sizeof(union e1000_adv_tx_desc) - ETH_FCS_LEN; + min_tx_space = DIV_ROUND_UP(min_tx_space, 512); + + /* upper 16 bits has Tx packet buffer allocation size in KB */ + needed_tx_space = min_tx_space - (rd32(E1000_PBA) >> 16); /* If current Tx allocation is less than the min Tx FIFO size, * and the min Tx FIFO size is less than the current Rx FIFO - * allocation, take space away from current Rx allocation + * allocation, take space away from current Rx allocation. */ - if (tx_space < min_tx_space && - ((min_tx_space - tx_space) < pba)) { - pba = pba - (min_tx_space - tx_space); + if (needed_tx_space < pba) { + pba -= needed_tx_space; /* if short on Rx space, Rx wins and must trump Tx * adjustment @@ -1930,18 +1931,20 @@ void igb_reset(struct igb_adapter *adapter) if (pba < min_rx_space) pba = min_rx_space; } + + /* adjust PBA for jumbo frames */ wr32(E1000_PBA, pba); } - /* flow control settings */ - /* The high water mark must be low enough to fit one full frame - * (or the size used for early receive) above it in the Rx FIFO. - * Set it to the lower of: - * - 90% of the Rx FIFO size, or - * - the full Rx FIFO size minus one full frame + /* flow control settings + * The high water mark must be low enough to fit one full frame + * after transmitting the pause frame. As such we must have enough + * space to allow for us to complete our current transmit and then + * receive the frame that is in progress from the link partner. + * Set it to: + * - the full Rx FIFO size minus one full Tx plus one full Rx frame */ - hwm = min(((pba << 10) * 9 / 10), - ((pba << 10) - 2 * adapter->max_frame_size)); + hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ fc->low_water = fc->high_water - 16; @@ -2051,7 +2054,7 @@ static int igb_set_features(struct net_device *netdev, if (changed & NETIF_F_HW_VLAN_CTAG_RX) igb_vlan_mode(netdev, features); - if (!(changed & NETIF_F_RXALL)) + if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) return 0; netdev->features = features; @@ -2064,6 +2067,25 @@ static int igb_set_features(struct net_device *netdev, return 0; } +static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, + u16 flags) +{ + /* guarantee we can provide a unique filter for the unicast address */ + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { + struct igb_adapter *adapter = netdev_priv(dev); + struct e1000_hw *hw = &adapter->hw; + int vfn = adapter->vfs_allocated_count; + int rar_entries = hw->mac.rar_entry_count - (vfn + 1); + + if (netdev_uc_count(dev) >= rar_entries) + return -ENOMEM; + } + + return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags); +} + static const struct net_device_ops igb_netdev_ops = { .ndo_open = igb_open, .ndo_stop = igb_close, @@ -2087,6 +2109,7 @@ static const struct net_device_ops igb_netdev_ops = { #endif .ndo_fix_features = igb_fix_features, .ndo_set_features = igb_set_features, + .ndo_fdb_add = igb_ndo_fdb_add, .ndo_features_check = passthru_features_check, }; @@ -2349,27 +2372,35 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * assignment. */ netdev->features |= NETIF_F_SG | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXHASH | NETIF_F_RXCSUM | + NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX; + if (hw->mac.type >= e1000_82576) + netdev->features |= NETIF_F_SCTP_CRC; + /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; netdev->hw_features |= NETIF_F_RXALL; + if (hw->mac.type >= e1000_i350) + netdev->hw_features |= NETIF_F_NTUPLE; + /* set this bit last since it cannot be part of hw_features */ netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; - netdev->vlan_features |= NETIF_F_TSO | + netdev->vlan_features |= NETIF_F_SG | + NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | - NETIF_F_SG; + NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC; + + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->priv_flags |= IFF_SUPP_NOFCS; @@ -2378,11 +2409,6 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->vlan_features |= NETIF_F_HIGHDMA; } - if (hw->mac.type >= e1000_82576) { - netdev->hw_features |= NETIF_F_SCTP_CRC; - netdev->features |= NETIF_F_SCTP_CRC; - } - netdev->priv_flags |= IFF_UNICAST_FLT; adapter->en_mng_pt = igb_enable_mng_pass_thru(hw); @@ -2515,6 +2541,26 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->wol = 0; } + /* Some vendors want the ability to Use the EEPROM setting as + * enable/disable only, and not for capability + */ + if (((hw->mac.type == e1000_i350) || + (hw->mac.type == e1000_i354)) && + (pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)) { + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + adapter->wol = 0; + } + if (hw->mac.type == e1000_i350) { + if (((pdev->subsystem_device == 0x5001) || + (pdev->subsystem_device == 0x5002)) && + (hw->bus.func == 0)) { + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + adapter->wol = 0; + } + if (pdev->subsystem_device == 0x1F52) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + } + device_set_wakeup_enable(&adapter->pdev->dev, adapter->flags & IGB_FLAG_WOL_SUPPORTED); @@ -2921,14 +2967,6 @@ void igb_set_flag_queue_pairs(struct igb_adapter *adapter, /* Device supports enough interrupts without queue pairing. */ break; case e1000_82576: - /* If VFs are going to be allocated with RSS queues then we - * should pair the queues in order to conserve interrupts due - * to limited supply. - */ - if ((adapter->rss_queues > 1) && - (adapter->vfs_allocated_count > 6)) - adapter->flags |= IGB_FLAG_QUEUE_PAIRS; - /* fall through */ case e1000_82580: case e1000_i350: case e1000_i354: @@ -2939,6 +2977,8 @@ void igb_set_flag_queue_pairs(struct igb_adapter *adapter, */ if (adapter->rss_queues > (max_rss_queues / 2)) adapter->flags |= IGB_FLAG_QUEUE_PAIRS; + else + adapter->flags &= ~IGB_FLAG_QUEUE_PAIRS; break; } } @@ -3132,7 +3172,7 @@ err_setup_tx: return err; } -static int igb_open(struct net_device *netdev) +int igb_open(struct net_device *netdev) { return __igb_open(netdev, false); } @@ -3169,7 +3209,7 @@ static int __igb_close(struct net_device *netdev, bool suspending) return 0; } -static int igb_close(struct net_device *netdev) +int igb_close(struct net_device *netdev) { return __igb_close(netdev, false); } @@ -3460,12 +3500,12 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) wr32(E1000_VT_CTL, vtctl); } if (adapter->rss_queues > 1) - mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q; + mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_MQ; else mrqc |= E1000_MRQC_ENABLE_VMDQ; } else { if (hw->mac.type != e1000_i211) - mrqc |= E1000_MRQC_ENABLE_RSS_4Q; + mrqc |= E1000_MRQC_ENABLE_RSS_MQ; } igb_vmm_control(adapter); @@ -3498,7 +3538,7 @@ void igb_setup_rctl(struct igb_adapter *adapter) /* disable store bad packets and clear size bits. */ rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256); - /* enable LPE to prevent packets larger than max_frame_size */ + /* enable LPE to allow for reception of jumbo frames */ rctl |= E1000_RCTL_LPE; /* disable queue 0 to prevent tail write w/o re-config */ @@ -3522,8 +3562,7 @@ void igb_setup_rctl(struct igb_adapter *adapter) E1000_RCTL_BAM | /* RX All Bcast Pkts */ E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ - rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */ - E1000_RCTL_DPF | /* Allow filtered pause */ + rctl &= ~(E1000_RCTL_DPF | /* Allow filtered pause */ E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */ /* Do not mess with E1000_CTRL_VME, it affects transmit as well, * and that breaks VLANs. @@ -3539,12 +3578,8 @@ static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, struct e1000_hw *hw = &adapter->hw; u32 vmolr; - /* if it isn't the PF check to see if VFs are enabled and - * increase the size to support vlan tags - */ - if (vfn < adapter->vfs_allocated_count && - adapter->vf_data[vfn].vlans_enabled) - size += VLAN_TAG_SIZE; + if (size > MAX_JUMBO_FRAME_SIZE) + size = MAX_JUMBO_FRAME_SIZE; vmolr = rd32(E1000_VMOLR(vfn)); vmolr &= ~E1000_VMOLR_RLPML_MASK; @@ -3554,30 +3589,26 @@ static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, return 0; } -/** - * igb_rlpml_set - set maximum receive packet size - * @adapter: board private structure - * - * Configure maximum receivable packet size. - **/ -static void igb_rlpml_set(struct igb_adapter *adapter) +static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter, + int vfn, bool enable) { - u32 max_frame_size = adapter->max_frame_size; struct e1000_hw *hw = &adapter->hw; - u16 pf_id = adapter->vfs_allocated_count; + u32 val, reg; - if (pf_id) { - igb_set_vf_rlpml(adapter, max_frame_size, pf_id); - /* If we're in VMDQ or SR-IOV mode, then set global RLPML - * to our max jumbo frame size, in case we need to enable - * jumbo frames on one of the rings later. - * This will not pass over-length frames into the default - * queue because it's gated by the VMOLR.RLPML. - */ - max_frame_size = MAX_JUMBO_FRAME_SIZE; - } + if (hw->mac.type < e1000_82576) + return; - wr32(E1000_RLPML, max_frame_size); + if (hw->mac.type == e1000_i350) + reg = E1000_DVMOLR(vfn); + else + reg = E1000_VMOLR(vfn); + + val = rd32(reg); + if (enable) + val |= E1000_VMOLR_STRVLAN; + else + val &= ~(E1000_VMOLR_STRVLAN); + wr32(reg, val); } static inline void igb_set_vmolr(struct igb_adapter *adapter, @@ -3593,14 +3624,6 @@ static inline void igb_set_vmolr(struct igb_adapter *adapter, return; vmolr = rd32(E1000_VMOLR(vfn)); - vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */ - if (hw->mac.type == e1000_i350) { - u32 dvmolr; - - dvmolr = rd32(E1000_DVMOLR(vfn)); - dvmolr |= E1000_DVMOLR_STRVLAN; - wr32(E1000_DVMOLR(vfn), dvmolr); - } if (aupe) vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ else @@ -3684,9 +3707,6 @@ static void igb_configure_rx(struct igb_adapter *adapter) { int i; - /* set UTA to appropriate mode */ - igb_set_uta(adapter); - /* set the correct pool for the PF default MAC address in entry 0 */ igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0, adapter->vfs_allocated_count); @@ -4004,6 +4024,130 @@ static int igb_write_uc_addr_list(struct net_device *netdev) return count; } +static int igb_vlan_promisc_enable(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 i, pf_id; + + switch (hw->mac.type) { + case e1000_i210: + case e1000_i211: + case e1000_i350: + /* VLAN filtering needed for VLAN prio filter */ + if (adapter->netdev->features & NETIF_F_NTUPLE) + break; + /* fall through */ + case e1000_82576: + case e1000_82580: + case e1000_i354: + /* VLAN filtering needed for pool filtering */ + if (adapter->vfs_allocated_count) + break; + /* fall through */ + default: + return 1; + } + + /* We are already in VLAN promisc, nothing to do */ + if (adapter->flags & IGB_FLAG_VLAN_PROMISC) + return 0; + + if (!adapter->vfs_allocated_count) + goto set_vfta; + + /* Add PF to all active pools */ + pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; + + for (i = E1000_VLVF_ARRAY_SIZE; --i;) { + u32 vlvf = rd32(E1000_VLVF(i)); + + vlvf |= 1 << pf_id; + wr32(E1000_VLVF(i), vlvf); + } + +set_vfta: + /* Set all bits in the VLAN filter table array */ + for (i = E1000_VLAN_FILTER_TBL_SIZE; i--;) + hw->mac.ops.write_vfta(hw, i, ~0U); + + /* Set flag so we don't redo unnecessary work */ + adapter->flags |= IGB_FLAG_VLAN_PROMISC; + + return 0; +} + +#define VFTA_BLOCK_SIZE 8 +static void igb_scrub_vfta(struct igb_adapter *adapter, u32 vfta_offset) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vfta[VFTA_BLOCK_SIZE] = { 0 }; + u32 vid_start = vfta_offset * 32; + u32 vid_end = vid_start + (VFTA_BLOCK_SIZE * 32); + u32 i, vid, word, bits, pf_id; + + /* guarantee that we don't scrub out management VLAN */ + vid = adapter->mng_vlan_id; + if (vid >= vid_start && vid < vid_end) + vfta[(vid - vid_start) / 32] |= 1 << (vid % 32); + + if (!adapter->vfs_allocated_count) + goto set_vfta; + + pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; + + for (i = E1000_VLVF_ARRAY_SIZE; --i;) { + u32 vlvf = rd32(E1000_VLVF(i)); + + /* pull VLAN ID from VLVF */ + vid = vlvf & VLAN_VID_MASK; + + /* only concern ourselves with a certain range */ + if (vid < vid_start || vid >= vid_end) + continue; + + if (vlvf & E1000_VLVF_VLANID_ENABLE) { + /* record VLAN ID in VFTA */ + vfta[(vid - vid_start) / 32] |= 1 << (vid % 32); + + /* if PF is part of this then continue */ + if (test_bit(vid, adapter->active_vlans)) + continue; + } + + /* remove PF from the pool */ + bits = ~(1 << pf_id); + bits &= rd32(E1000_VLVF(i)); + wr32(E1000_VLVF(i), bits); + } + +set_vfta: + /* extract values from active_vlans and write back to VFTA */ + for (i = VFTA_BLOCK_SIZE; i--;) { + vid = (vfta_offset + i) * 32; + word = vid / BITS_PER_LONG; + bits = vid % BITS_PER_LONG; + + vfta[i] |= adapter->active_vlans[word] >> bits; + + hw->mac.ops.write_vfta(hw, vfta_offset + i, vfta[i]); + } +} + +static void igb_vlan_promisc_disable(struct igb_adapter *adapter) +{ + u32 i; + + /* We are not in VLAN promisc, nothing to do */ + if (!(adapter->flags & IGB_FLAG_VLAN_PROMISC)) + return; + + /* Set flag so we don't redo unnecessary work */ + adapter->flags &= ~IGB_FLAG_VLAN_PROMISC; + + for (i = 0; i < E1000_VLAN_FILTER_TBL_SIZE; i += VFTA_BLOCK_SIZE) + igb_scrub_vfta(adapter, i); +} + /** * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set * @netdev: network interface device structure @@ -4018,21 +4162,17 @@ static void igb_set_rx_mode(struct net_device *netdev) struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; unsigned int vfn = adapter->vfs_allocated_count; - u32 rctl, vmolr = 0; + u32 rctl = 0, vmolr = 0; int count; /* Check for Promiscuous and All Multicast modes */ - rctl = rd32(E1000_RCTL); - - /* clear the effected bits */ - rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE); - if (netdev->flags & IFF_PROMISC) { - /* retain VLAN HW filtering if in VT mode */ - if (adapter->vfs_allocated_count) - rctl |= E1000_RCTL_VFE; - rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); - vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME); + rctl |= E1000_RCTL_UPE | E1000_RCTL_MPE; + vmolr |= E1000_VMOLR_MPME; + + /* enable use of UTA filter to force packets to default pool */ + if (hw->mac.type == e1000_82576) + vmolr |= E1000_VMOLR_ROPE; } else { if (netdev->flags & IFF_ALLMULTI) { rctl |= E1000_RCTL_MPE; @@ -4050,17 +4190,34 @@ static void igb_set_rx_mode(struct net_device *netdev) vmolr |= E1000_VMOLR_ROMPE; } } - /* Write addresses to available RAR registers, if there is not - * sufficient space to store all the addresses then enable - * unicast promiscuous mode - */ - count = igb_write_uc_addr_list(netdev); - if (count < 0) { - rctl |= E1000_RCTL_UPE; - vmolr |= E1000_VMOLR_ROPE; - } - rctl |= E1000_RCTL_VFE; } + + /* Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + count = igb_write_uc_addr_list(netdev); + if (count < 0) { + rctl |= E1000_RCTL_UPE; + vmolr |= E1000_VMOLR_ROPE; + } + + /* enable VLAN filtering by default */ + rctl |= E1000_RCTL_VFE; + + /* disable VLAN filtering for modes that require it */ + if ((netdev->flags & IFF_PROMISC) || + (netdev->features & NETIF_F_RXALL)) { + /* if we fail to set all rules then just clear VFE */ + if (igb_vlan_promisc_enable(adapter)) + rctl &= ~E1000_RCTL_VFE; + } else { + igb_vlan_promisc_disable(adapter); + } + + /* update state of unicast, multicast, and VLAN filtering modes */ + rctl |= rd32(E1000_RCTL) & ~(E1000_RCTL_UPE | E1000_RCTL_MPE | + E1000_RCTL_VFE); wr32(E1000_RCTL, rctl); /* In order to support SR-IOV and eventually VMDq it is necessary to set @@ -4071,9 +4228,19 @@ static void igb_set_rx_mode(struct net_device *netdev) if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350)) return; + /* set UTA to appropriate mode */ + igb_set_uta(adapter, !!(vmolr & E1000_VMOLR_ROPE)); + vmolr |= rd32(E1000_VMOLR(vfn)) & ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); + + /* enable Rx jumbo frames, no need for restriction */ + vmolr &= ~E1000_VMOLR_RLPML_MASK; + vmolr |= MAX_JUMBO_FRAME_SIZE | E1000_VMOLR_LPE; + wr32(E1000_VMOLR(vfn), vmolr); + wr32(E1000_RLPML, MAX_JUMBO_FRAME_SIZE); + igb_restore_vf_multicasts(adapter); } @@ -4227,6 +4394,7 @@ static void igb_watchdog_task(struct work_struct *work) u32 link; int i; u32 connsw; + u16 phy_data, retry_count = 20; link = igb_has_link(adapter); @@ -4305,6 +4473,25 @@ static void igb_watchdog_task(struct work_struct *work) break; } + if (adapter->link_speed != SPEED_1000) + goto no_wait; + + /* wait for Remote receiver status OK */ +retry_read_status: + if (!igb_read_phy_reg(hw, PHY_1000T_STATUS, + &phy_data)) { + if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && + retry_count) { + msleep(100); + retry_count--; + goto retry_read_status; + } else if (!retry_count) { + dev_err(&adapter->pdev->dev, "exceed max 2 second\n"); + } + } else { + dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n"); + } +no_wait: netif_carrier_on(netdev); igb_ping_all_vfs(adapter); @@ -4713,70 +4900,57 @@ static int igb_tso(struct igb_ring *tx_ring, return 1; } +static inline bool igb_ipv6_csum_is_sctp(struct sk_buff *skb) +{ + unsigned int offset = 0; + + ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL); + + return offset == skb_checksum_start_offset(skb); +} + static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) { struct sk_buff *skb = first->skb; u32 vlan_macip_lens = 0; - u32 mss_l4len_idx = 0; u32 type_tucmd = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { +csum_failed: if (!(first->tx_flags & IGB_TX_FLAGS_VLAN)) return; - } else { - u8 l4_hdr = 0; - - switch (first->protocol) { - case htons(ETH_P_IP): - vlan_macip_lens |= skb_network_header_len(skb); - type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; - l4_hdr = ip_hdr(skb)->protocol; - break; - case htons(ETH_P_IPV6): - vlan_macip_lens |= skb_network_header_len(skb); - l4_hdr = ipv6_hdr(skb)->nexthdr; - break; - default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but proto=%x!\n", - first->protocol); - } - break; - } + goto no_csum; + } - switch (l4_hdr) { - case IPPROTO_TCP: - type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP; - mss_l4len_idx = tcp_hdrlen(skb) << - E1000_ADVTXD_L4LEN_SHIFT; - break; - case IPPROTO_SCTP: - type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; - mss_l4len_idx = sizeof(struct sctphdr) << - E1000_ADVTXD_L4LEN_SHIFT; - break; - case IPPROTO_UDP: - mss_l4len_idx = sizeof(struct udphdr) << - E1000_ADVTXD_L4LEN_SHIFT; - break; - default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); - } + switch (skb->csum_offset) { + case offsetof(struct tcphdr, check): + type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; + /* fall through */ + case offsetof(struct udphdr, check): + break; + case offsetof(struct sctphdr, checksum): + /* validate that this is actually an SCTP request */ + if (((first->protocol == htons(ETH_P_IP)) && + (ip_hdr(skb)->protocol == IPPROTO_SCTP)) || + ((first->protocol == htons(ETH_P_IPV6)) && + igb_ipv6_csum_is_sctp(skb))) { + type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP; break; } - - /* update TX checksum flag */ - first->tx_flags |= IGB_TX_FLAGS_CSUM; + default: + skb_checksum_help(skb); + goto csum_failed; } + /* update TX checksum flag */ + first->tx_flags |= IGB_TX_FLAGS_CSUM; + vlan_macip_lens = skb_checksum_start_offset(skb) - + skb_network_offset(skb); +no_csum: vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; - igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); + igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0); } #define IGB_SET_FLAG(_input, _flag, _result) \ @@ -5088,16 +5262,6 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, { struct igb_adapter *adapter = netdev_priv(netdev); - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - - if (skb->len <= 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - /* The minimum packet size with TCTL.PSP set is 17 so pad the skb * in order to meet this minimum size requirement. */ @@ -5792,125 +5956,132 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter) static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) { struct e1000_hw *hw = &adapter->hw; - u32 pool_mask, reg, vid; - int i; + u32 pool_mask, vlvf_mask, i; - pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + /* create mask for VF and other pools */ + pool_mask = E1000_VLVF_POOLSEL_MASK; + vlvf_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + + /* drop PF from pool bits */ + pool_mask &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + + adapter->vfs_allocated_count)); /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); + for (i = E1000_VLVF_ARRAY_SIZE; i--;) { + u32 vlvf = rd32(E1000_VLVF(i)); + u32 vfta_mask, vid, vfta; /* remove the vf from the pool */ - reg &= ~pool_mask; - - /* if pool is empty then remove entry from vfta */ - if (!(reg & E1000_VLVF_POOLSEL_MASK) && - (reg & E1000_VLVF_VLANID_ENABLE)) { - reg = 0; - vid = reg & E1000_VLVF_VLANID_MASK; - igb_vfta_set(hw, vid, false); - } + if (!(vlvf & vlvf_mask)) + continue; + + /* clear out bit from VLVF */ + vlvf ^= vlvf_mask; + + /* if other pools are present, just remove ourselves */ + if (vlvf & pool_mask) + goto update_vlvfb; - wr32(E1000_VLVF(i), reg); + /* if PF is present, leave VFTA */ + if (vlvf & E1000_VLVF_POOLSEL_MASK) + goto update_vlvf; + + vid = vlvf & E1000_VLVF_VLANID_MASK; + vfta_mask = 1 << (vid % 32); + + /* clear bit from VFTA */ + vfta = adapter->shadow_vfta[vid / 32]; + if (vfta & vfta_mask) + hw->mac.ops.write_vfta(hw, vid / 32, vfta ^ vfta_mask); +update_vlvf: + /* clear pool selection enable */ + if (adapter->flags & IGB_FLAG_VLAN_PROMISC) + vlvf &= E1000_VLVF_POOLSEL_MASK; + else + vlvf = 0; +update_vlvfb: + /* clear pool bits */ + wr32(E1000_VLVF(i), vlvf); } +} - adapter->vf_data[vf].vlans_enabled = 0; +static int igb_find_vlvf_entry(struct e1000_hw *hw, u32 vlan) +{ + u32 vlvf; + int idx; + + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* Search for the VLAN id in the VLVF entries */ + for (idx = E1000_VLVF_ARRAY_SIZE; --idx;) { + vlvf = rd32(E1000_VLVF(idx)); + if ((vlvf & VLAN_VID_MASK) == vlan) + break; + } + + return idx; } -static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) +void igb_update_pf_vlvf(struct igb_adapter *adapter, u32 vid) { struct e1000_hw *hw = &adapter->hw; - u32 reg, i; - - /* The vlvf table only exists on 82576 hardware and newer */ - if (hw->mac.type < e1000_82576) - return -1; + u32 bits, pf_id; + int idx; - /* we only need to do this if VMDq is enabled */ - if (!adapter->vfs_allocated_count) - return -1; + idx = igb_find_vlvf_entry(hw, vid); + if (!idx) + return; - /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); - if ((reg & E1000_VLVF_VLANID_ENABLE) && - vid == (reg & E1000_VLVF_VLANID_MASK)) - break; + /* See if any other pools are set for this VLAN filter + * entry other than the PF. + */ + pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT; + bits = ~(1 << pf_id) & E1000_VLVF_POOLSEL_MASK; + bits &= rd32(E1000_VLVF(idx)); + + /* Disable the filter so this falls into the default pool. */ + if (!bits) { + if (adapter->flags & IGB_FLAG_VLAN_PROMISC) + wr32(E1000_VLVF(idx), 1 << pf_id); + else + wr32(E1000_VLVF(idx), 0); } +} - if (add) { - if (i == E1000_VLVF_ARRAY_SIZE) { - /* Did not find a matching VLAN ID entry that was - * enabled. Search for a free filter entry, i.e. - * one without the enable bit set - */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); - if (!(reg & E1000_VLVF_VLANID_ENABLE)) - break; - } - } - if (i < E1000_VLVF_ARRAY_SIZE) { - /* Found an enabled/available entry */ - reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); - - /* if !enabled we need to set this up in vfta */ - if (!(reg & E1000_VLVF_VLANID_ENABLE)) { - /* add VID to filter table */ - igb_vfta_set(hw, vid, true); - reg |= E1000_VLVF_VLANID_ENABLE; - } - reg &= ~E1000_VLVF_VLANID_MASK; - reg |= vid; - wr32(E1000_VLVF(i), reg); - - /* do not modify RLPML for PF devices */ - if (vf >= adapter->vfs_allocated_count) - return 0; - - if (!adapter->vf_data[vf].vlans_enabled) { - u32 size; - - reg = rd32(E1000_VMOLR(vf)); - size = reg & E1000_VMOLR_RLPML_MASK; - size += 4; - reg &= ~E1000_VMOLR_RLPML_MASK; - reg |= size; - wr32(E1000_VMOLR(vf), reg); - } +static s32 igb_set_vf_vlan(struct igb_adapter *adapter, u32 vid, + bool add, u32 vf) +{ + int pf_id = adapter->vfs_allocated_count; + struct e1000_hw *hw = &adapter->hw; + int err; - adapter->vf_data[vf].vlans_enabled++; - } - } else { - if (i < E1000_VLVF_ARRAY_SIZE) { - /* remove vf from the pool */ - reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf)); - /* if pool is empty then remove entry from vfta */ - if (!(reg & E1000_VLVF_POOLSEL_MASK)) { - reg = 0; - igb_vfta_set(hw, vid, false); - } - wr32(E1000_VLVF(i), reg); - - /* do not modify RLPML for PF devices */ - if (vf >= adapter->vfs_allocated_count) - return 0; - - adapter->vf_data[vf].vlans_enabled--; - if (!adapter->vf_data[vf].vlans_enabled) { - u32 size; - - reg = rd32(E1000_VMOLR(vf)); - size = reg & E1000_VMOLR_RLPML_MASK; - size -= 4; - reg &= ~E1000_VMOLR_RLPML_MASK; - reg |= size; - wr32(E1000_VMOLR(vf), reg); - } - } + /* If VLAN overlaps with one the PF is currently monitoring make + * sure that we are able to allocate a VLVF entry. This may be + * redundant but it guarantees PF will maintain visibility to + * the VLAN. + */ + if (add && test_bit(vid, adapter->active_vlans)) { + err = igb_vfta_set(hw, vid, pf_id, true, false); + if (err) + return err; } - return 0; + + err = igb_vfta_set(hw, vid, vf, add, false); + + if (add && !err) + return err; + + /* If we failed to add the VF VLAN or we are removing the VF VLAN + * we may need to drop the PF pool bit in order to allow us to free + * up the VLVF resources. + */ + if (test_bit(vid, adapter->active_vlans) || + (adapter->flags & IGB_FLAG_VLAN_PROMISC)) + igb_update_pf_vlvf(adapter, vid); + + return err; } static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) @@ -5923,130 +6094,104 @@ static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) wr32(E1000_VMVIR(vf), 0); } -static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos) +static int igb_enable_port_vlan(struct igb_adapter *adapter, int vf, + u16 vlan, u8 qos) { - int err = 0; - struct igb_adapter *adapter = netdev_priv(netdev); + int err; - if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) - return -EINVAL; - if (vlan || qos) { - err = igb_vlvf_set(adapter, vlan, !!vlan, vf); - if (err) - goto out; - igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); - igb_set_vmolr(adapter, vf, !vlan); - adapter->vf_data[vf].pf_vlan = vlan; - adapter->vf_data[vf].pf_qos = qos; - dev_info(&adapter->pdev->dev, - "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); - if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_warn(&adapter->pdev->dev, - "The VF VLAN has been set, but the PF device is not up.\n"); - dev_warn(&adapter->pdev->dev, - "Bring the PF device up before attempting to use the VF device.\n"); - } - } else { - igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan, - false, vf); - igb_set_vmvir(adapter, vlan, vf); - igb_set_vmolr(adapter, vf, true); - adapter->vf_data[vf].pf_vlan = 0; - adapter->vf_data[vf].pf_qos = 0; + err = igb_set_vf_vlan(adapter, vlan, true, vf); + if (err) + return err; + + igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); + igb_set_vmolr(adapter, vf, !vlan); + + /* revoke access to previous VLAN */ + if (vlan != adapter->vf_data[vf].pf_vlan) + igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan, + false, vf); + + adapter->vf_data[vf].pf_vlan = vlan; + adapter->vf_data[vf].pf_qos = qos; + igb_set_vf_vlan_strip(adapter, vf, true); + dev_info(&adapter->pdev->dev, + "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); + if (test_bit(__IGB_DOWN, &adapter->state)) { + dev_warn(&adapter->pdev->dev, + "The VF VLAN has been set, but the PF device is not up.\n"); + dev_warn(&adapter->pdev->dev, + "Bring the PF device up before attempting to use the VF device.\n"); } -out: + return err; } -static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid) +static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf) { - struct e1000_hw *hw = &adapter->hw; - int i; - u32 reg; + /* Restore tagless access via VLAN 0 */ + igb_set_vf_vlan(adapter, 0, true, vf); - /* Find the vlan filter for this id */ - for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { - reg = rd32(E1000_VLVF(i)); - if ((reg & E1000_VLVF_VLANID_ENABLE) && - vid == (reg & E1000_VLVF_VLANID_MASK)) - break; - } + igb_set_vmvir(adapter, 0, vf); + igb_set_vmolr(adapter, vf, true); + + /* Remove any PF assigned VLAN */ + if (adapter->vf_data[vf].pf_vlan) + igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan, + false, vf); - if (i >= E1000_VLVF_ARRAY_SIZE) - i = -1; + adapter->vf_data[vf].pf_vlan = 0; + adapter->vf_data[vf].pf_qos = 0; + igb_set_vf_vlan_strip(adapter, vf, false); - return i; + return 0; } -static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +static int igb_ndo_set_vf_vlan(struct net_device *netdev, + int vf, u16 vlan, u8 qos) { - struct e1000_hw *hw = &adapter->hw; - int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; - int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); - int err = 0; + struct igb_adapter *adapter = netdev_priv(netdev); - /* If in promiscuous mode we need to make sure the PF also has - * the VLAN filter set. - */ - if (add && (adapter->netdev->flags & IFF_PROMISC)) - err = igb_vlvf_set(adapter, vid, add, - adapter->vfs_allocated_count); - if (err) - goto out; + if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) + return -EINVAL; - err = igb_vlvf_set(adapter, vid, add, vf); + return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) : + igb_disable_port_vlan(adapter, vf); +} - if (err) - goto out; +static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +{ + int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); + int ret; - /* Go through all the checks to see if the VLAN filter should - * be wiped completely. - */ - if (!add && (adapter->netdev->flags & IFF_PROMISC)) { - u32 vlvf, bits; - int regndx = igb_find_vlvf_entry(adapter, vid); - - if (regndx < 0) - goto out; - /* See if any other pools are set for this VLAN filter - * entry other than the PF. - */ - vlvf = bits = rd32(E1000_VLVF(regndx)); - bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT + - adapter->vfs_allocated_count); - /* If the filter was removed then ensure PF pool bit - * is cleared if the PF only added itself to the pool - * because the PF is in promiscuous mode. - */ - if ((vlvf & VLAN_VID_MASK) == vid && - !test_bit(vid, adapter->active_vlans) && - !bits) - igb_vlvf_set(adapter, vid, add, - adapter->vfs_allocated_count); - } + if (adapter->vf_data[vf].pf_vlan) + return -1; -out: - return err; + /* VLAN 0 is a special case, don't allow it to be removed */ + if (!vid && !add) + return 0; + + ret = igb_set_vf_vlan(adapter, vid, !!add, vf); + if (!ret) + igb_set_vf_vlan_strip(adapter, vf, !!vid); + return ret; } static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) { - /* clear flags - except flag that indicates PF has set the MAC */ - adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC; - adapter->vf_data[vf].last_nack = jiffies; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; - /* reset offloads to defaults */ - igb_set_vmolr(adapter, vf, true); + /* clear flags - except flag that indicates PF has set the MAC */ + vf_data->flags &= IGB_VF_FLAG_PF_SET_MAC; + vf_data->last_nack = jiffies; /* reset vlans for device */ igb_clear_vf_vfta(adapter, vf); - if (adapter->vf_data[vf].pf_vlan) - igb_ndo_set_vf_vlan(adapter->netdev, vf, - adapter->vf_data[vf].pf_vlan, - adapter->vf_data[vf].pf_qos); - else - igb_clear_vf_vfta(adapter, vf); + igb_set_vf_vlan(adapter, vf_data->pf_vlan, true, vf); + igb_set_vmvir(adapter, vf_data->pf_vlan | + (vf_data->pf_qos << VLAN_PRIO_SHIFT), vf); + igb_set_vmolr(adapter, vf, !vf_data->pf_vlan); + igb_set_vf_vlan_strip(adapter, vf, !!(vf_data->pf_vlan)); /* reset multicast table array for vf */ adapter->vf_data[vf].num_vf_mc_hashes = 0; @@ -6191,7 +6336,7 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) "VF %d attempted to override administratively set VLAN tag\nReload the VF driver to resume operations\n", vf); else - retval = igb_set_vf_vlan(adapter, msgbuf, vf); + retval = igb_set_vf_vlan_msg(adapter, msgbuf, vf); break; default: dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]); @@ -6233,6 +6378,7 @@ static void igb_msg_task(struct igb_adapter *adapter) /** * igb_set_uta - Set unicast filter table address * @adapter: board private structure + * @set: boolean indicating if we are setting or clearing bits * * The unicast table address is a register array of 32-bit registers. * The table is meant to be used in a way similar to how the MTA is used @@ -6240,21 +6386,18 @@ static void igb_msg_task(struct igb_adapter *adapter) * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous * enable bit to allow vlan tag stripping when promiscuous mode is enabled **/ -static void igb_set_uta(struct igb_adapter *adapter) +static void igb_set_uta(struct igb_adapter *adapter, bool set) { struct e1000_hw *hw = &adapter->hw; + u32 uta = set ? ~0 : 0; int i; - /* The UTA table only exists on 82576 hardware and newer */ - if (hw->mac.type < e1000_82576) - return; - /* we only need to do this if VMDq is enabled */ if (!adapter->vfs_allocated_count) return; - for (i = 0; i < hw->mac.uta_reg_count; i++) - array_wr32(E1000_UTA, i, ~0); + for (i = hw->mac.uta_reg_count; i--;) + array_wr32(E1000_UTA, i, uta); } /** @@ -7202,7 +7345,7 @@ static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) wr32(E1000_CTRL, ctrl); } - igb_rlpml_set(adapter); + igb_set_vf_vlan_strip(adapter, adapter->vfs_allocated_count, enable); } static int igb_vlan_rx_add_vid(struct net_device *netdev, @@ -7212,11 +7355,9 @@ static int igb_vlan_rx_add_vid(struct net_device *netdev, struct e1000_hw *hw = &adapter->hw; int pf_id = adapter->vfs_allocated_count; - /* attempt to add filter to vlvf array */ - igb_vlvf_set(adapter, vid, true, pf_id); - /* add the filter since PF can receive vlans w/o entry in vlvf */ - igb_vfta_set(hw, vid, true); + if (!vid || !(adapter->flags & IGB_FLAG_VLAN_PROMISC)) + igb_vfta_set(hw, vid, pf_id, true, !!vid); set_bit(vid, adapter->active_vlans); @@ -7227,16 +7368,12 @@ static int igb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; int pf_id = adapter->vfs_allocated_count; - s32 err; - - /* remove vlan from VLVF table array */ - err = igb_vlvf_set(adapter, vid, false, pf_id); + struct e1000_hw *hw = &adapter->hw; - /* if vid was not present in VLVF just remove it from table */ - if (err) - igb_vfta_set(hw, vid, false); + /* remove VID from filter table */ + if (vid && !(adapter->flags & IGB_FLAG_VLAN_PROMISC)) + igb_vfta_set(hw, vid, pf_id, false, true); clear_bit(vid, adapter->active_vlans); @@ -7245,11 +7382,12 @@ static int igb_vlan_rx_kill_vid(struct net_device *netdev, static void igb_restore_vlan(struct igb_adapter *adapter) { - u16 vid; + u16 vid = 1; igb_vlan_mode(adapter->netdev, adapter->netdev->features); + igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0); - for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) + for_each_set_bit_from(vid, adapter->active_vlans, VLAN_N_VID) igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } @@ -7704,15 +7842,14 @@ static void igb_io_resume(struct pci_dev *pdev) static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, u8 qsel) { - u32 rar_low, rar_high; struct e1000_hw *hw = &adapter->hw; + u32 rar_low, rar_high; /* HW expects these in little endian so we reverse the byte order - * from network order (big endian) to little endian + * from network order (big endian) to CPU endian */ - rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | - ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); - rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); + rar_low = le32_to_cpup((__be32 *)(addr)); + rar_high = le16_to_cpup((__be16 *)(addr + 4)); /* Indicate to hardware the Address is Valid. */ rar_high |= E1000_RAH_AV; @@ -7959,9 +8096,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) * than the Rx threshold. Set hwm to PBA - max frame * size in 16B units, capping it at PBA - 6KB. */ - hwm = 64 * pba - adapter->max_frame_size / 16; - if (hwm < 64 * (pba - 6)) - hwm = 64 * (pba - 6); + hwm = 64 * (pba - 6); reg = rd32(E1000_FCRTC); reg &= ~E1000_FCRTC_RTH_COAL_MASK; reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) @@ -7971,9 +8106,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) /* Set the DMA Coalescing Rx threshold to PBA - 2 * max * frame size, capping it at PBA - 10KB. */ - dmac_thr = pba - adapter->max_frame_size / 512; - if (dmac_thr < pba - 10) - dmac_thr = pba - 10; + dmac_thr = pba - 10; reg = rd32(E1000_DMACR); reg &= ~E1000_DMACR_DMACTHR_MASK; reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index c44df87c38de..22a8a29895b4 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -525,7 +525,8 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, ts.tv_nsec = rq->perout.period.nsec; ns = timespec64_to_ns(&ts); ns = ns >> 1; - if (on && ns <= 70000000LL) { + if (on && ((ns <= 70000000LL) || (ns == 125000000LL) || + (ns == 250000000LL) || (ns == 500000000LL))) { if (ns < 8LL) return -EINVAL; use_freq = 1; diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c index 7b6cb4c3764c..01752f44ace2 100644 --- a/drivers/net/ethernet/intel/igbvf/mbx.c +++ b/drivers/net/ethernet/intel/igbvf/mbx.c @@ -234,13 +234,19 @@ static s32 e1000_check_for_rst_vf(struct e1000_hw *hw) static s32 e1000_obtain_mbx_lock_vf(struct e1000_hw *hw) { s32 ret_val = -E1000_ERR_MBX; - - /* Take ownership of the buffer */ - ew32(V2PMAILBOX(0), E1000_V2PMAILBOX_VFU); - - /* reserve mailbox for VF use */ - if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) - ret_val = E1000_SUCCESS; + int count = 10; + + do { + /* Take ownership of the buffer */ + ew32(V2PMAILBOX(0), E1000_V2PMAILBOX_VFU); + + /* reserve mailbox for VF use */ + if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) { + ret_val = 0; + break; + } + udelay(1000); + } while (count-- > 0); return ret_val; } diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 297af801f051..c12442252adb 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -43,6 +43,7 @@ #include <linux/ethtool.h> #include <linux/if_vlan.h> #include <linux/prefetch.h> +#include <linux/sctp.h> #include "igbvf.h" @@ -876,7 +877,6 @@ static irqreturn_t igbvf_msix_other(int irq, void *data) adapter->int_counter1++; - netif_carrier_off(netdev); hw->mac.get_link_status = 1; if (!test_bit(__IGBVF_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); @@ -1908,6 +1908,31 @@ static void igbvf_watchdog_task(struct work_struct *work) #define IGBVF_TX_FLAGS_VLAN_MASK 0xffff0000 #define IGBVF_TX_FLAGS_VLAN_SHIFT 16 +static void igbvf_tx_ctxtdesc(struct igbvf_ring *tx_ring, u32 vlan_macip_lens, + u32 type_tucmd, u32 mss_l4len_idx) +{ + struct e1000_adv_tx_context_desc *context_desc; + struct igbvf_buffer *buffer_info; + u16 i = tx_ring->next_to_use; + + context_desc = IGBVF_TX_CTXTDESC_ADV(*tx_ring, i); + buffer_info = &tx_ring->buffer_info[i]; + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + /* set bits to identify this as an advanced context descriptor */ + type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + + context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); + context_desc->seqnum_seed = 0; + context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); + context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); + + buffer_info->time_stamp = jiffies; + buffer_info->dma = 0; +} + static int igbvf_tso(struct igbvf_adapter *adapter, struct igbvf_ring *tx_ring, struct sk_buff *skb, u32 tx_flags, u8 *hdr_len, @@ -1987,65 +2012,56 @@ static int igbvf_tso(struct igbvf_adapter *adapter, return true; } -static inline bool igbvf_tx_csum(struct igbvf_adapter *adapter, - struct igbvf_ring *tx_ring, - struct sk_buff *skb, u32 tx_flags, - __be16 protocol) +static inline bool igbvf_ipv6_csum_is_sctp(struct sk_buff *skb) { - struct e1000_adv_tx_context_desc *context_desc; - unsigned int i; - struct igbvf_buffer *buffer_info; - u32 info = 0, tu_cmd = 0; - - if ((skb->ip_summed == CHECKSUM_PARTIAL) || - (tx_flags & IGBVF_TX_FLAGS_VLAN)) { - i = tx_ring->next_to_use; - buffer_info = &tx_ring->buffer_info[i]; - context_desc = IGBVF_TX_CTXTDESC_ADV(*tx_ring, i); + unsigned int offset = 0; - if (tx_flags & IGBVF_TX_FLAGS_VLAN) - info |= (tx_flags & IGBVF_TX_FLAGS_VLAN_MASK); + ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL); - info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT); - if (skb->ip_summed == CHECKSUM_PARTIAL) - info |= (skb_transport_header(skb) - - skb_network_header(skb)); + return offset == skb_checksum_start_offset(skb); +} - context_desc->vlan_macip_lens = cpu_to_le32(info); +static bool igbvf_tx_csum(struct igbvf_ring *tx_ring, struct sk_buff *skb, + u32 tx_flags, __be16 protocol) +{ + u32 vlan_macip_lens = 0; + u32 type_tucmd = 0; - tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT); + if (skb->ip_summed != CHECKSUM_PARTIAL) { +csum_failed: + if (!(tx_flags & IGBVF_TX_FLAGS_VLAN)) + return false; + goto no_csum; + } - if (skb->ip_summed == CHECKSUM_PARTIAL) { - switch (protocol) { - case htons(ETH_P_IP): - tu_cmd |= E1000_ADVTXD_TUCMD_IPV4; - if (ip_hdr(skb)->protocol == IPPROTO_TCP) - tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP; - break; - case htons(ETH_P_IPV6): - if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) - tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP; - break; - default: - break; - } + switch (skb->csum_offset) { + case offsetof(struct tcphdr, check): + type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; + /* fall through */ + case offsetof(struct udphdr, check): + break; + case offsetof(struct sctphdr, checksum): + /* validate that this is actually an SCTP request */ + if (((protocol == htons(ETH_P_IP)) && + (ip_hdr(skb)->protocol == IPPROTO_SCTP)) || + ((protocol == htons(ETH_P_IPV6)) && + igbvf_ipv6_csum_is_sctp(skb))) { + type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP; + break; } - - context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd); - context_desc->seqnum_seed = 0; - context_desc->mss_l4len_idx = 0; - - buffer_info->time_stamp = jiffies; - buffer_info->dma = 0; - i++; - if (i == tx_ring->count) - i = 0; - tx_ring->next_to_use = i; - - return true; + default: + skb_checksum_help(skb); + goto csum_failed; } - return false; + vlan_macip_lens = skb_checksum_start_offset(skb) - + skb_network_offset(skb); +no_csum: + vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= tx_flags & IGBVF_TX_FLAGS_VLAN_MASK; + + igbvf_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0); + return true; } static int igbvf_maybe_stop_tx(struct net_device *netdev, int size) @@ -2264,7 +2280,7 @@ static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb, if (tso) tx_flags |= IGBVF_TX_FLAGS_TSO; - else if (igbvf_tx_csum(adapter, tx_ring, skb, tx_flags, protocol) && + else if (igbvf_tx_csum(tx_ring, skb, tx_flags, protocol) && (skb->ip_summed == CHECKSUM_PARTIAL)) tx_flags |= IGBVF_TX_FLAGS_CSUM; @@ -2717,11 +2733,11 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->bd_number = cards_found++; netdev->hw_features = NETIF_F_SG | - NETIF_F_IP_CSUM | - NETIF_F_IPV6_CSUM | - NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_RXCSUM; + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_RXCSUM | + NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC; netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_CTAG_TX | @@ -2731,11 +2747,14 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; - netdev->vlan_features |= NETIF_F_TSO; - netdev->vlan_features |= NETIF_F_TSO6; - netdev->vlan_features |= NETIF_F_IP_CSUM; - netdev->vlan_features |= NETIF_F_IPV6_CSUM; - netdev->vlan_features |= NETIF_F_SG; + netdev->vlan_features |= NETIF_F_SG | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_HW_CSUM | + NETIF_F_SCTP_CRC; + + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->hw_enc_features |= NETIF_F_HW_CSUM; /*reset the controller to put the device in a known good state */ err = hw->mac.ops.reset_hw(hw); diff --git a/drivers/net/ethernet/intel/igbvf/vf.h b/drivers/net/ethernet/intel/igbvf/vf.h index 0f1eca639f68..f00a41d9a1ca 100644 --- a/drivers/net/ethernet/intel/igbvf/vf.h +++ b/drivers/net/ethernet/intel/igbvf/vf.h @@ -126,6 +126,7 @@ struct e1000_adv_tx_context_desc { #define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ #define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ #define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ +#define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 packet TYPE of SCTP */ #define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ #define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 4b9156cd8b93..84fa28ceb200 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -796,6 +796,10 @@ struct ixgbe_adapter { u8 default_up; unsigned long fwd_bitmask; /* Bitmask indicating in use pools */ +#define IXGBE_MAX_LINK_HANDLE 10 + struct ixgbe_mat_field *jump_tables[IXGBE_MAX_LINK_HANDLE]; + unsigned long tables; + /* maximum number of RETA entries among all devices supported by ixgbe * driver: currently it's x550 device in non-SRIOV mode */ @@ -925,6 +929,9 @@ s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw, u16 soft_id); void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input, union ixgbe_atr_input *mask); +int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ixgbe_fdir_filter *input, + u16 sw_idx); void ixgbe_set_rx_mode(struct net_device *netdev); #ifdef CONFIG_IXGBE_DCB void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index bea96b3bc90c..726e0eeee63b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2520,9 +2520,9 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, return ret; } -static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, - struct ixgbe_fdir_filter *input, - u16 sw_idx) +int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ixgbe_fdir_filter *input, + u16 sw_idx) { struct ixgbe_hw *hw = &adapter->hw; struct hlist_node *node2; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e6035ff6b861..569cb0757c93 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -51,6 +51,8 @@ #include <linux/prefetch.h> #include <scsi/fc/fc_fcoe.h> #include <net/vxlan.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> #ifdef CONFIG_OF #include <linux/of_net.h> @@ -65,6 +67,7 @@ #include "ixgbe_common.h" #include "ixgbe_dcb_82599.h" #include "ixgbe_sriov.h" +#include "ixgbe_model.h" char ixgbe_driver_name[] = "ixgbe"; static const char ixgbe_driver_string[] = @@ -1089,7 +1092,7 @@ static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter) * @tx_ring: tx ring to clean **/ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, - struct ixgbe_ring *tx_ring) + struct ixgbe_ring *tx_ring, int napi_budget) { struct ixgbe_adapter *adapter = q_vector->adapter; struct ixgbe_tx_buffer *tx_buffer; @@ -1127,7 +1130,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, total_packets += tx_buffer->gso_segs; /* free the skb */ - dev_consume_skb_any(tx_buffer->skb); + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -2784,7 +2787,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget) #endif ixgbe_for_each_ring(ring, q_vector->tx) - clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring); + clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring, budget); /* Exit if we are called by netpoll or busy polling is active */ if ((budget <= 0) || !ixgbe_qv_lock_napi(q_vector)) @@ -5545,6 +5548,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter) #endif /* CONFIG_IXGBE_DCB */ #endif /* IXGBE_FCOE */ + /* initialize static ixgbe jump table entries */ + adapter->jump_tables[0] = ixgbe_ipv4_fields; + adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) * hw->mac.num_rar_entries, GFP_ATOMIC); @@ -8200,6 +8206,225 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) return 0; } +static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter, + struct tc_cls_u32_offload *cls) +{ + int err; + + spin_lock(&adapter->fdir_perfect_lock); + err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, cls->knode.handle); + spin_unlock(&adapter->fdir_perfect_lock); + return err; +} + +static int ixgbe_configure_clsu32_add_hnode(struct ixgbe_adapter *adapter, + __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + /* This ixgbe devices do not support hash tables at the moment + * so abort when given hash tables. + */ + if (cls->hnode.divisor > 0) + return -EINVAL; + + set_bit(TC_U32_USERHTID(cls->hnode.handle), &adapter->tables); + return 0; +} + +static int ixgbe_configure_clsu32_del_hnode(struct ixgbe_adapter *adapter, + struct tc_cls_u32_offload *cls) +{ + clear_bit(TC_U32_USERHTID(cls->hnode.handle), &adapter->tables); + return 0; +} + +static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, + __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + u32 loc = cls->knode.handle & 0xfffff; + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_mat_field *field_ptr; + struct ixgbe_fdir_filter *input; + union ixgbe_atr_input mask; +#ifdef CONFIG_NET_CLS_ACT + const struct tc_action *a; +#endif + int i, err = 0; + u8 queue; + u32 handle; + + memset(&mask, 0, sizeof(union ixgbe_atr_input)); + handle = cls->knode.handle; + + /* At the moment cls_u32 jumps to transport layer and skips past + * L2 headers. The canonical method to match L2 frames is to use + * negative values. However this is error prone at best but really + * just broken because there is no way to "know" what sort of hdr + * is in front of the transport layer. Fix cls_u32 to support L2 + * headers when needed. + */ + if (protocol != htons(ETH_P_IP)) + return -EINVAL; + + if (cls->knode.link_handle || + cls->knode.link_handle >= IXGBE_MAX_LINK_HANDLE) { + struct ixgbe_nexthdr *nexthdr = ixgbe_ipv4_jumps; + u32 uhtid = TC_U32_USERHTID(cls->knode.link_handle); + + if (!test_bit(uhtid, &adapter->tables)) + return -EINVAL; + + for (i = 0; nexthdr[i].jump; i++) { + if (nexthdr->o != cls->knode.sel->offoff || + nexthdr->s != cls->knode.sel->offshift || + nexthdr->m != cls->knode.sel->offmask || + /* do not support multiple key jumps its just mad */ + cls->knode.sel->nkeys > 1) + return -EINVAL; + + if (nexthdr->off != cls->knode.sel->keys[0].off || + nexthdr->val != cls->knode.sel->keys[0].val || + nexthdr->mask != cls->knode.sel->keys[0].mask) + return -EINVAL; + + if (uhtid >= IXGBE_MAX_LINK_HANDLE) + return -EINVAL; + + adapter->jump_tables[uhtid] = nexthdr->jump; + } + return 0; + } + + if (loc >= ((1024 << adapter->fdir_pballoc) - 2)) { + e_err(drv, "Location out of range\n"); + return -EINVAL; + } + + /* cls u32 is a graph starting at root node 0x800. The driver tracks + * links and also the fields used to advance the parser across each + * link (e.g. nexthdr/eat parameters from 'tc'). This way we can map + * the u32 graph onto the hardware parse graph denoted in ixgbe_model.h + * To add support for new nodes update ixgbe_model.h parse structures + * this function _should_ be generic try not to hardcode values here. + */ + if (TC_U32_USERHTID(handle) == 0x800) { + field_ptr = adapter->jump_tables[0]; + } else { + if (TC_U32_USERHTID(handle) >= ARRAY_SIZE(adapter->jump_tables)) + return -EINVAL; + + field_ptr = adapter->jump_tables[TC_U32_USERHTID(handle)]; + } + + if (!field_ptr) + return -EINVAL; + + input = kzalloc(sizeof(*input), GFP_KERNEL); + if (!input) + return -ENOMEM; + + for (i = 0; i < cls->knode.sel->nkeys; i++) { + int off = cls->knode.sel->keys[i].off; + __be32 val = cls->knode.sel->keys[i].val; + __be32 m = cls->knode.sel->keys[i].mask; + bool found_entry = false; + int j; + + for (j = 0; field_ptr[j].val; j++) { + if (field_ptr[j].off == off && + field_ptr[j].mask == m) { + field_ptr[j].val(input, &mask, val, m); + input->filter.formatted.flow_type |= + field_ptr[j].type; + found_entry = true; + break; + } + } + + if (!found_entry) + goto err_out; + } + + mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK | + IXGBE_ATR_L4TYPE_MASK; + + if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4) + mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK; + +#ifdef CONFIG_NET_CLS_ACT + if (list_empty(&cls->knode.exts->actions)) + goto err_out; + + list_for_each_entry(a, &cls->knode.exts->actions, list) { + if (!is_tcf_gact_shot(a)) + goto err_out; + } +#endif + + input->action = IXGBE_FDIR_DROP_QUEUE; + queue = IXGBE_FDIR_DROP_QUEUE; + input->sw_idx = loc; + + spin_lock(&adapter->fdir_perfect_lock); + + if (hlist_empty(&adapter->fdir_filter_list)) { + memcpy(&adapter->fdir_mask, &mask, sizeof(mask)); + err = ixgbe_fdir_set_input_mask_82599(hw, &mask); + if (err) + goto err_out_w_lock; + } else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) { + err = -EINVAL; + goto err_out_w_lock; + } + + ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask); + err = ixgbe_fdir_write_perfect_filter_82599(hw, &input->filter, + input->sw_idx, queue); + if (!err) + ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx); + spin_unlock(&adapter->fdir_perfect_lock); + + return err; +err_out_w_lock: + spin_unlock(&adapter->fdir_perfect_lock); +err_out: + kfree(input); + return -EINVAL; +} + +int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + + if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) && + tc->type == TC_SETUP_CLSU32) { + switch (tc->cls_u32->command) { + case TC_CLSU32_NEW_KNODE: + case TC_CLSU32_REPLACE_KNODE: + return ixgbe_configure_clsu32(adapter, + proto, tc->cls_u32); + case TC_CLSU32_DELETE_KNODE: + return ixgbe_delete_clsu32(adapter, tc->cls_u32); + case TC_CLSU32_NEW_HNODE: + case TC_CLSU32_REPLACE_HNODE: + return ixgbe_configure_clsu32_add_hnode(adapter, proto, + tc->cls_u32); + case TC_CLSU32_DELETE_HNODE: + return ixgbe_configure_clsu32_del_hnode(adapter, + tc->cls_u32); + default: + return -EINVAL; + } + } + + if (tc->type != TC_SETUP_MQPRIO) + return -EINVAL; + + return ixgbe_setup_tc(dev, tc->tc); +} + #ifdef CONFIG_PCI_IOV void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter) { @@ -8262,19 +8487,17 @@ static int ixgbe_set_features(struct net_device *netdev, } /* - * Check if Flow Director n-tuple support was enabled or disabled. If - * the state changed, we need to reset. + * Check if Flow Director n-tuple support or hw_tc support was + * enabled or disabled. If the state changed, we need to reset. */ - switch (features & NETIF_F_NTUPLE) { - case NETIF_F_NTUPLE: + if ((features & NETIF_F_NTUPLE) || (features & NETIF_F_HW_TC)) { /* turn off ATR, enable perfect filters and reset */ if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) need_reset = true; adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE; - break; - default: + } else { /* turn off perfect filters, enable ATR and reset */ if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) need_reset = true; @@ -8282,23 +8505,16 @@ static int ixgbe_set_features(struct net_device *netdev, adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE; /* We cannot enable ATR if SR-IOV is enabled */ - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - break; - - /* We cannot enable ATR if we have 2 or more traffic classes */ - if (netdev_get_num_tc(netdev) > 1) - break; - - /* We cannot enable ATR if RSS is disabled */ - if (adapter->ring_feature[RING_F_RSS].limit <= 1) - break; - - /* A sample rate of 0 indicates ATR disabled */ - if (!adapter->atr_sample_rate) - break; - - adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; - break; + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED || + /* We cannot enable ATR if we have 2 or more tcs */ + (netdev_get_num_tc(netdev) > 1) || + /* We cannot enable ATR if RSS is disabled */ + (adapter->ring_feature[RING_F_RSS].limit <= 1) || + /* A sample rate of 0 indicates ATR disabled */ + (!adapter->atr_sample_rate)) + ; /* do nothing not supported */ + else /* otherwise supported and set the flag */ + adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; } if (features & NETIF_F_HW_VLAN_CTAG_RX) @@ -8657,9 +8873,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_set_vf_trust = ixgbe_ndo_set_vf_trust, .ndo_get_vf_config = ixgbe_ndo_get_vf_config, .ndo_get_stats64 = ixgbe_get_stats64, -#ifdef CONFIG_IXGBE_DCB - .ndo_setup_tc = ixgbe_setup_tc, -#endif + .ndo_setup_tc = __ixgbe_setup_tc, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ixgbe_netpoll, #endif @@ -9030,7 +9244,8 @@ skip_sriov: case ixgbe_mac_X550EM_x: netdev->features |= NETIF_F_SCTP_CRC; netdev->hw_features |= NETIF_F_SCTP_CRC | - NETIF_F_NTUPLE; + NETIF_F_NTUPLE | + NETIF_F_HW_TC; break; default: break; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h new file mode 100644 index 000000000000..ce48872d4782 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h @@ -0,0 +1,112 @@ +/******************************************************************************* + * + * Intel 10 Gigabit PCI Express Linux drive + * Copyright(c) 2016 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see <http://www.gnu.org/licenses/>. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Contact Information: + * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + * + ******************************************************************************/ + +#ifndef _IXGBE_MODEL_H_ +#define _IXGBE_MODEL_H_ + +#include "ixgbe.h" +#include "ixgbe_type.h" + +struct ixgbe_mat_field { + unsigned int off; + unsigned int mask; + int (*val)(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m); + unsigned int type; +}; + +static inline int ixgbe_mat_prgm_sip(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m) +{ + input->filter.formatted.src_ip[0] = val; + mask->formatted.src_ip[0] = m; + return 0; +} + +static inline int ixgbe_mat_prgm_dip(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m) +{ + input->filter.formatted.dst_ip[0] = val; + mask->formatted.dst_ip[0] = m; + return 0; +} + +static struct ixgbe_mat_field ixgbe_ipv4_fields[] = { + { .off = 12, .mask = -1, .val = ixgbe_mat_prgm_sip, + .type = IXGBE_ATR_FLOW_TYPE_IPV4}, + { .off = 16, .mask = -1, .val = ixgbe_mat_prgm_dip, + .type = IXGBE_ATR_FLOW_TYPE_IPV4}, + { .val = NULL } /* terminal node */ +}; + +static inline int ixgbe_mat_prgm_sport(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m) +{ + input->filter.formatted.src_port = val & 0xffff; + mask->formatted.src_port = m & 0xffff; + return 0; +}; + +static inline int ixgbe_mat_prgm_dport(struct ixgbe_fdir_filter *input, + union ixgbe_atr_input *mask, + u32 val, u32 m) +{ + input->filter.formatted.dst_port = val & 0xffff; + mask->formatted.dst_port = m & 0xffff; + return 0; +}; + +static struct ixgbe_mat_field ixgbe_tcp_fields[] = { + {.off = 0, .mask = 0xffff, .val = ixgbe_mat_prgm_sport, + .type = IXGBE_ATR_FLOW_TYPE_TCPV4}, + {.off = 2, .mask = 0xffff, .val = ixgbe_mat_prgm_dport, + .type = IXGBE_ATR_FLOW_TYPE_TCPV4}, + { .val = NULL } /* terminal node */ +}; + +struct ixgbe_nexthdr { + /* offset, shift, and mask of position to next header */ + unsigned int o; + u32 s; + u32 m; + /* match criteria to make this jump*/ + unsigned int off; + u32 val; + u32 mask; + /* location of jump to make */ + struct ixgbe_mat_field *jump; +}; + +static struct ixgbe_nexthdr ixgbe_ipv4_jumps[] = { + { .o = 0, .s = 6, .m = 0xf, + .off = 8, .val = 0x600, .mask = 0xff00, .jump = ixgbe_tcp_fields}, + { .jump = NULL } /* terminal node */ +}; +#endif /* _IXGBE_MODEL_H_ */ |