diff options
Diffstat (limited to 'drivers/net/ethernet')
132 files changed, 12238 insertions, 2095 deletions
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 31c5e476fd64..0b13af8e4070 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -122,6 +122,7 @@ config FEALNX cards. <http://www.myson.com.tw/> source "drivers/net/ethernet/natsemi/Kconfig" +source "drivers/net/ethernet/netronome/Kconfig" source "drivers/net/ethernet/8390/Kconfig" config NET_NETX diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 071f84eb6f3f..38dc1a776a2b 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_NET_VENDOR_MOXART) += moxa/ obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/ obj-$(CONFIG_FEALNX) += fealnx.o obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/ +obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/ obj-$(CONFIG_NET_NETX) += netx-eth.o obj-$(CONFIG_NET_VENDOR_NUVOTON) += nuvoton/ obj-$(CONFIG_NET_VENDOR_NVIDIA) += nvidia/ diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index 096531a73124..e0e95a15cab0 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -1912,21 +1912,21 @@ static struct platform_driver bfin_mac_driver = { }, }; +static struct platform_driver * const drivers[] = { + &bfin_mii_bus_driver, + &bfin_mac_driver, +}; + static int __init bfin_mac_init(void) { - int ret; - ret = platform_driver_register(&bfin_mii_bus_driver); - if (!ret) - return platform_driver_register(&bfin_mac_driver); - return -ENODEV; + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } module_init(bfin_mac_init); static void __exit bfin_mac_cleanup(void) { - platform_driver_unregister(&bfin_mac_driver); - platform_driver_unregister(&bfin_mii_bus_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } module_exit(bfin_mac_cleanup); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 53ce1222b11d..8a9b493566c9 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2024,7 +2024,6 @@ read_again: skb->dev = netdev; skb->protocol = eth_type_trans(skb, netdev); skb_record_rx_queue(skb, channel->queue_index); - skb_mark_napi_id(skb, napi); napi_gro_receive(napi, skb); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 9147a0107c44..d21ee8767c2d 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1084,7 +1084,7 @@ static const struct net_device_ops xgene_ndev_ops = { }; #ifdef CONFIG_ACPI -static int xgene_get_port_id_acpi(struct device *dev, +static void xgene_get_port_id_acpi(struct device *dev, struct xgene_enet_pdata *pdata) { acpi_status status; @@ -1097,24 +1097,19 @@ static int xgene_get_port_id_acpi(struct device *dev, pdata->port_id = temp; } - return 0; + return; } #endif -static int xgene_get_port_id_dt(struct device *dev, struct xgene_enet_pdata *pdata) +static void xgene_get_port_id_dt(struct device *dev, struct xgene_enet_pdata *pdata) { u32 id = 0; - int ret; - ret = of_property_read_u32(dev->of_node, "port-id", &id); - if (ret) { - pdata->port_id = 0; - ret = 0; - } else { - pdata->port_id = id & BIT(0); - } + of_property_read_u32(dev->of_node, "port-id", &id); - return ret; + pdata->port_id = id & BIT(0); + + return; } static int xgene_get_tx_delay(struct xgene_enet_pdata *pdata) @@ -1209,13 +1204,11 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) } if (dev->of_node) - ret = xgene_get_port_id_dt(dev, pdata); + xgene_get_port_id_dt(dev, pdata); #ifdef CONFIG_ACPI else - ret = xgene_get_port_id_acpi(dev, pdata); + xgene_get_port_id_acpi(dev, pdata); #endif - if (ret) - return ret; if (!device_get_mac_address(dev, ndev->dev_addr, ETH_ALEN)) eth_hw_addr_random(ndev); diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index bd377a6b067d..d3763bc2c561 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -577,7 +577,6 @@ static int alx_alloc_rings(struct alx_priv *alx) alx->int_mask &= ~ALX_ISR_ALL_QUEUES; alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0; - alx->tx_ringsz = alx->tx_ringsz; netif_napi_add(alx->dev, &alx->napi, alx_poll, 64); diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 8b1929e9f698..a54bafad3538 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -2884,33 +2884,21 @@ struct platform_driver bcm63xx_enet_shared_driver = { }, }; +static struct platform_driver * const drivers[] = { + &bcm63xx_enet_shared_driver, + &bcm63xx_enet_driver, + &bcm63xx_enetsw_driver, +}; + /* entry point */ static int __init bcm_enet_init(void) { - int ret; - - ret = platform_driver_register(&bcm63xx_enet_shared_driver); - if (ret) - return ret; - - ret = platform_driver_register(&bcm63xx_enet_driver); - if (ret) - platform_driver_unregister(&bcm63xx_enet_shared_driver); - - ret = platform_driver_register(&bcm63xx_enetsw_driver); - if (ret) { - platform_driver_unregister(&bcm63xx_enet_driver); - platform_driver_unregister(&bcm63xx_enet_shared_driver); - } - - return ret; + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } static void __exit bcm_enet_exit(void) { - platform_driver_unregister(&bcm63xx_enet_driver); - platform_driver_unregister(&bcm63xx_enetsw_driver); - platform_driver_unregister(&bcm63xx_enet_shared_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 858106352ce9..993c780bdfab 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1216,7 +1216,7 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, /* Initialize SW view of the ring */ spin_lock_init(&ring->lock); ring->priv = priv; - netif_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64); + netif_tx_napi_add(priv->netdev, &ring->napi, bcm_sysport_tx_poll, 64); ring->index = index; ring->size = size; ring->alloc_size = ring->size; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index b5e64b02200c..0b214b5d944a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -540,10 +540,6 @@ struct bnx2x_fastpath { struct napi_struct napi; -#ifdef CONFIG_NET_RX_BUSY_POLL - unsigned long busy_poll_state; -#endif - union host_hc_status_block status_blk; /* chip independent shortcuts into sb structure */ __le16 *sb_index_values; @@ -617,115 +613,6 @@ struct bnx2x_fastpath { #define bnx2x_fp_stats(bp, fp) (&((bp)->fp_stats[(fp)->index])) #define bnx2x_fp_qstats(bp, fp) (&((bp)->fp_stats[(fp)->index].eth_q_stats)) -#ifdef CONFIG_NET_RX_BUSY_POLL - -enum bnx2x_fp_state { - BNX2X_STATE_FP_NAPI = BIT(0), /* NAPI handler owns the queue */ - - BNX2X_STATE_FP_NAPI_REQ_BIT = 1, /* NAPI would like to own the queue */ - BNX2X_STATE_FP_NAPI_REQ = BIT(1), - - BNX2X_STATE_FP_POLL_BIT = 2, - BNX2X_STATE_FP_POLL = BIT(2), /* busy_poll owns the queue */ - - BNX2X_STATE_FP_DISABLE_BIT = 3, /* queue is dismantled */ -}; - -static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp) -{ - WRITE_ONCE(fp->busy_poll_state, 0); -} - -/* called from the device poll routine to get ownership of a FP */ -static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp) -{ - unsigned long prev, old = READ_ONCE(fp->busy_poll_state); - - while (1) { - switch (old) { - case BNX2X_STATE_FP_POLL: - /* make sure bnx2x_fp_lock_poll() wont starve us */ - set_bit(BNX2X_STATE_FP_NAPI_REQ_BIT, - &fp->busy_poll_state); - /* fallthrough */ - case BNX2X_STATE_FP_POLL | BNX2X_STATE_FP_NAPI_REQ: - return false; - default: - break; - } - prev = cmpxchg(&fp->busy_poll_state, old, BNX2X_STATE_FP_NAPI); - if (unlikely(prev != old)) { - old = prev; - continue; - } - return true; - } -} - -static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) -{ - smp_wmb(); - fp->busy_poll_state = 0; -} - -/* called from bnx2x_low_latency_poll() */ -static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp) -{ - return cmpxchg(&fp->busy_poll_state, 0, BNX2X_STATE_FP_POLL) == 0; -} - -static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) -{ - smp_mb__before_atomic(); - clear_bit(BNX2X_STATE_FP_POLL_BIT, &fp->busy_poll_state); -} - -/* true if a socket is polling */ -static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp) -{ - return READ_ONCE(fp->busy_poll_state) & BNX2X_STATE_FP_POLL; -} - -/* false if fp is currently owned */ -static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp) -{ - set_bit(BNX2X_STATE_FP_DISABLE_BIT, &fp->busy_poll_state); - return !bnx2x_fp_ll_polling(fp); - -} -#else -static inline void bnx2x_fp_busy_poll_init(struct bnx2x_fastpath *fp) -{ -} - -static inline bool bnx2x_fp_lock_napi(struct bnx2x_fastpath *fp) -{ - return true; -} - -static inline void bnx2x_fp_unlock_napi(struct bnx2x_fastpath *fp) -{ -} - -static inline bool bnx2x_fp_lock_poll(struct bnx2x_fastpath *fp) -{ - return false; -} - -static inline void bnx2x_fp_unlock_poll(struct bnx2x_fastpath *fp) -{ -} - -static inline bool bnx2x_fp_ll_polling(struct bnx2x_fastpath *fp) -{ - return false; -} -static inline bool bnx2x_fp_ll_disable(struct bnx2x_fastpath *fp) -{ - return true; -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - /* Use 2500 as a mini-jumbo MTU for FCoE */ #define BNX2X_FCOE_MINI_JUMBO_MTU 2500 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index f8d7a2f06950..d9add7c02e42 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -46,7 +46,6 @@ static void bnx2x_add_all_napi_cnic(struct bnx2x *bp) for_each_rx_queue_cnic(bp, i) { netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), bnx2x_poll, NAPI_POLL_WEIGHT); - napi_hash_add(&bnx2x_fp(bp, i, napi)); } } @@ -58,7 +57,6 @@ static void bnx2x_add_all_napi(struct bnx2x *bp) for_each_eth_queue(bp, i) { netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), bnx2x_poll, NAPI_POLL_WEIGHT); - napi_hash_add(&bnx2x_fp(bp, i, napi)); } } @@ -1094,12 +1092,7 @@ reuse_rx: __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(cqe_fp->vlan_tag)); - skb_mark_napi_id(skb, &fp->napi); - - if (bnx2x_fp_ll_polling(fp)) - netif_receive_skb(skb); - else - napi_gro_receive(&fp->napi, skb); + napi_gro_receive(&fp->napi, skb); next_rx: rx_buf->data = NULL; @@ -1869,7 +1862,6 @@ static void bnx2x_napi_enable_cnic(struct bnx2x *bp) int i; for_each_rx_queue_cnic(bp, i) { - bnx2x_fp_busy_poll_init(&bp->fp[i]); napi_enable(&bnx2x_fp(bp, i, napi)); } } @@ -1879,7 +1871,6 @@ static void bnx2x_napi_enable(struct bnx2x *bp) int i; for_each_eth_queue(bp, i) { - bnx2x_fp_busy_poll_init(&bp->fp[i]); napi_enable(&bnx2x_fp(bp, i, napi)); } } @@ -1890,8 +1881,6 @@ static void bnx2x_napi_disable_cnic(struct bnx2x *bp) for_each_rx_queue_cnic(bp, i) { napi_disable(&bnx2x_fp(bp, i, napi)); - while (!bnx2x_fp_ll_disable(&bp->fp[i])) - usleep_range(1000, 2000); } } @@ -1901,8 +1890,6 @@ static void bnx2x_napi_disable(struct bnx2x *bp) for_each_eth_queue(bp, i) { napi_disable(&bnx2x_fp(bp, i, napi)); - while (!bnx2x_fp_ll_disable(&bp->fp[i])) - usleep_range(1000, 2000); } } @@ -3232,9 +3219,6 @@ static int bnx2x_poll(struct napi_struct *napi, int budget) return 0; } #endif - if (!bnx2x_fp_lock_napi(fp)) - return budget; - for_each_cos_in_tx_queue(fp, cos) if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos])) bnx2x_tx_int(bp, fp->txdata_ptr[cos]); @@ -3243,14 +3227,10 @@ static int bnx2x_poll(struct napi_struct *napi, int budget) work_done += bnx2x_rx_int(fp, budget - work_done); /* must not complete if we consumed full budget */ - if (work_done >= budget) { - bnx2x_fp_unlock_napi(fp); + if (work_done >= budget) break; - } } - bnx2x_fp_unlock_napi(fp); - /* Fall out from the NAPI loop if needed */ if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { @@ -3294,32 +3274,6 @@ static int bnx2x_poll(struct napi_struct *napi, int budget) return work_done; } -#ifdef CONFIG_NET_RX_BUSY_POLL -/* must be called with local_bh_disable()d */ -int bnx2x_low_latency_recv(struct napi_struct *napi) -{ - struct bnx2x_fastpath *fp = container_of(napi, struct bnx2x_fastpath, - napi); - struct bnx2x *bp = fp->bp; - int found = 0; - - if ((bp->state == BNX2X_STATE_CLOSED) || - (bp->state == BNX2X_STATE_ERROR) || - (bp->dev->features & (NETIF_F_LRO | NETIF_F_GRO))) - return LL_FLUSH_FAILED; - - if (!bnx2x_fp_lock_poll(fp)) - return LL_FLUSH_BUSY; - - if (bnx2x_has_rx_work(fp)) - found = bnx2x_rx_int(fp, 4); - - bnx2x_fp_unlock_poll(fp); - - return found; -} -#endif - /* we split the first BD into headers and data BDs * to ease the pain of our fellow microcode engineers * we use one mapping for both BDs diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index b7d32e8412f1..4cbb03f87b5a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -570,13 +570,6 @@ int bnx2x_enable_msix(struct bnx2x *bp); int bnx2x_enable_msi(struct bnx2x *bp); /** - * bnx2x_low_latency_recv - LL callback - * - * @napi: napi structure - */ -int bnx2x_low_latency_recv(struct napi_struct *napi); - -/** * bnx2x_alloc_mem_bp - allocate memories outsize main driver structure * * @bp: driver handle diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index d84efcd34fac..a3ce9f2a2335 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -52,7 +52,7 @@ static const struct { { Q_STATS_OFFSET32(rx_skb_alloc_failed), 4, "[%s]: rx_skb_alloc_discard" }, { Q_STATS_OFFSET32(hw_csum_err), 4, "[%s]: rx_csum_offload_errors" }, - + { Q_STATS_OFFSET32(driver_xoff), 4, "[%s]: tx_exhaustion_events" }, { Q_STATS_OFFSET32(total_bytes_transmitted_hi), 8, "[%s]: tx_bytes" }, /* 10 */{ Q_STATS_OFFSET32(total_unicast_packets_transmitted_hi), 8, "[%s]: tx_ucast_packets" }, @@ -128,7 +128,8 @@ static const struct { 4, STATS_FLAGS_BOTH, "rx_skb_alloc_discard" }, { STATS_OFFSET32(hw_csum_err), 4, STATS_FLAGS_BOTH, "rx_csum_offload_errors" }, - + { STATS_OFFSET32(driver_xoff), + 4, STATS_FLAGS_BOTH, "tx_exhaustion_events" }, { STATS_OFFSET32(total_bytes_transmitted_hi), 8, STATS_FLAGS_BOTH, "tx_bytes" }, { STATS_OFFSET32(tx_stat_ifhcoutbadoctets_hi), @@ -3068,9 +3069,7 @@ static void bnx2x_self_test(struct net_device *dev, #define IS_PORT_STAT(i) \ ((bnx2x_stats_arr[i].flags & STATS_FLAGS_BOTH) == STATS_FLAGS_PORT) #define IS_FUNC_STAT(i) (bnx2x_stats_arr[i].flags & STATS_FLAGS_FUNC) -#define HIDE_PORT_STAT(bp) \ - ((IS_MF(bp) && !(bp->msg_enable & BNX2X_MSG_STATS)) || \ - IS_VF(bp)) +#define HIDE_PORT_STAT(bp) IS_VF(bp) /* ethtool statistics are displayed for all regular ethernet queues and the * fcoe L2 queue if not disabled diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h index cafd5de675cf..27aa0802d87d 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h @@ -3013,8 +3013,8 @@ struct afex_stats { }; #define BCM_5710_FW_MAJOR_VERSION 7 -#define BCM_5710_FW_MINOR_VERSION 12 -#define BCM_5710_FW_REVISION_VERSION 30 +#define BCM_5710_FW_MINOR_VERSION 13 +#define BCM_5710_FW_REVISION_VERSION 1 #define BCM_5710_FW_ENGINEERING_VERSION 0 #define BCM_5710_FW_COMPILE_FLAGS 1 @@ -3583,7 +3583,7 @@ enum classify_rule { CLASSIFY_RULE_OPCODE_MAC, CLASSIFY_RULE_OPCODE_VLAN, CLASSIFY_RULE_OPCODE_PAIR, - CLASSIFY_RULE_OPCODE_VXLAN, + CLASSIFY_RULE_OPCODE_IMAC_VNI, MAX_CLASSIFY_RULE }; @@ -3826,6 +3826,17 @@ struct eth_classify_header { __le32 echo; }; +/* + * Command for adding/removing a Inner-MAC/VNI classification rule + */ +struct eth_classify_imac_vni_cmd { + struct eth_classify_cmd_header header; + __le32 vni; + __le16 imac_lsb; + __le16 imac_mid; + __le16 imac_msb; + __le16 reserved1; +}; /* * Command for adding/removing a MAC classification rule @@ -3869,14 +3880,6 @@ struct eth_classify_vlan_cmd { /* * Command for adding/removing a VXLAN classification rule */ -struct eth_classify_vxlan_cmd { - struct eth_classify_cmd_header header; - __le32 vni; - __le16 inner_mac_lsb; - __le16 inner_mac_mid; - __le16 inner_mac_msb; - __le16 reserved1; -}; /* * union for eth classification rule @@ -3885,7 +3888,7 @@ union eth_classify_rule_cmd { struct eth_classify_mac_cmd mac; struct eth_classify_vlan_cmd vlan; struct eth_classify_pair_cmd pair; - struct eth_classify_vxlan_cmd vxlan; + struct eth_classify_imac_vni_cmd imac_vni; }; /* @@ -5623,6 +5626,14 @@ enum igu_mode { MAX_IGU_MODE }; +/* + * Inner Headers Classification Type + */ +enum inner_clss_type { + INNER_CLSS_DISABLED, + INNER_CLSS_USE_VLAN, + INNER_CLSS_USE_VNI, + MAX_INNER_CLSS_TYPE}; /* * IP versions @@ -5953,14 +5964,6 @@ enum ts_offset_cmd { MAX_TS_OFFSET_CMD }; -/* Tunnel Mode */ -enum tunnel_mode { - TUNN_MODE_NONE, - TUNN_MODE_VXLAN, - TUNN_MODE_GRE, - MAX_TUNNEL_MODE -}; - /* zone A per-queue data */ struct ustorm_queue_zone_data { struct ustorm_eth_rx_producers eth_rx_producers; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 2e611dc5f162..6c4e3a69976f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13004,9 +13004,6 @@ static const struct net_device_ops bnx2x_netdev_ops = { .ndo_fcoe_get_wwn = bnx2x_fcoe_get_wwn, #endif -#ifdef CONFIG_NET_RX_BUSY_POLL - .ndo_busy_poll = bnx2x_low_latency_recv, -#endif .ndo_get_phys_port_id = bnx2x_get_phys_port_id, .ndo_set_vf_link_state = bnx2x_set_vf_link_state, .ndo_features_check = bnx2x_features_check, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index bdf094fb6ef9..11446adc03cc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4231,12 +4231,10 @@ static void bnxt_init_napi(struct bnxt *bp) bnapi = bp->bnapi[i]; netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll, 64); - napi_hash_add(&bnapi->napi); } } else { bnapi = bp->bnapi[0]; netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll, 64); - napi_hash_add(&bnapi->napi); } } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 17f017ab4dac..b15a60d787c7 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2041,11 +2041,11 @@ static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv) for (i = 0; i < priv->hw_params->tx_queues; ++i) { ring = &priv->tx_rings[i]; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); + netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); } ring = &priv->tx_rings[DESC_INDEX]; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); + netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); } static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index b7b93e7a643d..48d8fbb1c220 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1864,7 +1864,6 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, skb->truesize += skb->data_len; skb->ip_summed = CHECKSUM_UNNECESSARY; skb_record_rx_queue(skb, rxq->rspq.idx); - skb_mark_napi_id(skb, &rxq->rspq.napi); pi = netdev_priv(skb->dev); if (pi->rxtstamp) cxgb4_sgetim_to_hwtstamp(adapter, skb_hwtstamps(skb), @@ -2528,7 +2527,6 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, goto err; netif_napi_add(dev, &iq->napi, napi_rx_handler, 64); - napi_hash_add(&iq->napi); iq->cur_desc = iq->desc; iq->cidx = 0; iq->gen = 1; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index b36643ef0593..b2182d3ba3cc 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2458,13 +2458,11 @@ static int enic_dev_init(struct enic *enic) switch (vnic_dev_get_intr_mode(enic->vdev)) { default: netif_napi_add(netdev, &enic->napi[0], enic_poll, 64); - napi_hash_add(&enic->napi[0]); break; case VNIC_DEV_INTR_MODE_MSIX: for (i = 0; i < enic->rq_count; i++) { netif_napi_add(netdev, &enic->napi[i], enic_poll_msix_rq, NAPI_POLL_WEIGHT); - napi_hash_add(&enic->napi[i]); } for (i = 0; i < enic->wq_count; i++) netif_napi_add(netdev, &enic->napi[enic_cq_wq(enic, i)], diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index ccca4799c27b..f92b6d948398 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -70,7 +70,6 @@ static const int multicast_filter_limit = 0x40; static int rio_open (struct net_device *dev); static void rio_timer (unsigned long data); static void rio_tx_timeout (struct net_device *dev); -static void alloc_list (struct net_device *dev); static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev); static irqreturn_t rio_interrupt (int irq, void *dev_instance); static void rio_free_tx (struct net_device *dev, int irq); @@ -253,19 +252,6 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_out_unmap_rx; - if (np->chip_id == CHIP_IP1000A && - (np->pdev->revision == 0x40 || np->pdev->revision == 0x41)) { - /* PHY magic taken from ipg driver, undocumented registers */ - mii_write(dev, np->phy_addr, 31, 0x0001); - mii_write(dev, np->phy_addr, 27, 0x01e0); - mii_write(dev, np->phy_addr, 31, 0x0002); - mii_write(dev, np->phy_addr, 27, 0xeb8e); - mii_write(dev, np->phy_addr, 31, 0x0000); - mii_write(dev, np->phy_addr, 30, 0x005e); - /* advertise 1000BASE-T half & full duplex, prefer MASTER */ - mii_write(dev, np->phy_addr, MII_CTRL1000, 0x0700); - } - /* Fiber device? */ np->phy_media = (dr16(ASICCtrl) & PhyMedia) ? 1 : 0; np->link_status = 0; @@ -275,13 +261,11 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent) if (np->an_enable == 2) { np->an_enable = 1; } - mii_set_media_pcs (dev); } else { /* Auto-Negotiation is mandatory for 1000BASE-T, IEEE 802.3ab Annex 28D page 14 */ if (np->speed == 1000) np->an_enable = 1; - mii_set_media (dev); } err = register_netdev (dev); @@ -446,19 +430,106 @@ static void rio_set_led_mode(struct net_device *dev) dw32(ASICCtrl, mode); } -static int -rio_open (struct net_device *dev) +static inline dma_addr_t desc_to_dma(struct netdev_desc *desc) +{ + return le64_to_cpu(desc->fraginfo) & DMA_BIT_MASK(48); +} + +static void free_list(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + struct sk_buff *skb; + int i; + + /* Free all the skbuffs in the queue. */ + for (i = 0; i < RX_RING_SIZE; i++) { + skb = np->rx_skbuff[i]; + if (skb) { + pci_unmap_single(np->pdev, desc_to_dma(&np->rx_ring[i]), + skb->len, PCI_DMA_FROMDEVICE); + dev_kfree_skb(skb); + np->rx_skbuff[i] = NULL; + } + np->rx_ring[i].status = 0; + np->rx_ring[i].fraginfo = 0; + } + for (i = 0; i < TX_RING_SIZE; i++) { + skb = np->tx_skbuff[i]; + if (skb) { + pci_unmap_single(np->pdev, desc_to_dma(&np->tx_ring[i]), + skb->len, PCI_DMA_TODEVICE); + dev_kfree_skb(skb); + np->tx_skbuff[i] = NULL; + } + } +} + +static void rio_reset_ring(struct netdev_private *np) +{ + int i; + + np->cur_rx = 0; + np->cur_tx = 0; + np->old_rx = 0; + np->old_tx = 0; + + for (i = 0; i < TX_RING_SIZE; i++) + np->tx_ring[i].status = cpu_to_le64(TFDDone); + + for (i = 0; i < RX_RING_SIZE; i++) + np->rx_ring[i].status = 0; +} + + /* allocate and initialize Tx and Rx descriptors */ +static int alloc_list(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + int i; + + rio_reset_ring(np); + np->rx_buf_sz = (dev->mtu <= 1500 ? PACKET_SIZE : dev->mtu + 32); + + /* Initialize Tx descriptors, TFDListPtr leaves in start_xmit(). */ + for (i = 0; i < TX_RING_SIZE; i++) { + np->tx_skbuff[i] = NULL; + np->tx_ring[i].next_desc = cpu_to_le64(np->tx_ring_dma + + ((i + 1) % TX_RING_SIZE) * + sizeof(struct netdev_desc)); + } + + /* Initialize Rx descriptors & allocate buffers */ + for (i = 0; i < RX_RING_SIZE; i++) { + /* Allocated fixed size of skbuff */ + struct sk_buff *skb; + + skb = netdev_alloc_skb_ip_align(dev, np->rx_buf_sz); + np->rx_skbuff[i] = skb; + if (!skb) { + free_list(dev); + return -ENOMEM; + } + + np->rx_ring[i].next_desc = cpu_to_le64(np->rx_ring_dma + + ((i + 1) % RX_RING_SIZE) * + sizeof(struct netdev_desc)); + /* Rubicon now supports 40 bits of addressing space. */ + np->rx_ring[i].fraginfo = + cpu_to_le64(pci_map_single( + np->pdev, skb->data, np->rx_buf_sz, + PCI_DMA_FROMDEVICE)); + np->rx_ring[i].fraginfo |= cpu_to_le64((u64)np->rx_buf_sz << 48); + } + + return 0; +} + +static void rio_hw_init(struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->ioaddr; - const int irq = np->pdev->irq; int i; u16 macctrl; - i = request_irq(irq, rio_interrupt, IRQF_SHARED, dev->name, dev); - if (i) - return i; - /* Reset all logic functions */ dw16(ASICCtrl + 2, GlobalReset | DMAReset | FIFOReset | NetworkReset | HostReset); @@ -469,11 +540,31 @@ rio_open (struct net_device *dev) /* DebugCtrl bit 4, 5, 9 must set */ dw32(DebugCtrl, dr32(DebugCtrl) | 0x0230); + if (np->chip_id == CHIP_IP1000A && + (np->pdev->revision == 0x40 || np->pdev->revision == 0x41)) { + /* PHY magic taken from ipg driver, undocumented registers */ + mii_write(dev, np->phy_addr, 31, 0x0001); + mii_write(dev, np->phy_addr, 27, 0x01e0); + mii_write(dev, np->phy_addr, 31, 0x0002); + mii_write(dev, np->phy_addr, 27, 0xeb8e); + mii_write(dev, np->phy_addr, 31, 0x0000); + mii_write(dev, np->phy_addr, 30, 0x005e); + /* advertise 1000BASE-T half & full duplex, prefer MASTER */ + mii_write(dev, np->phy_addr, MII_CTRL1000, 0x0700); + } + + if (np->phy_media) + mii_set_media_pcs(dev); + else + mii_set_media(dev); + /* Jumbo frame */ if (np->jumbo != 0) dw16(MaxFrameSize, MAX_JUMBO+14); - alloc_list (dev); + /* Set RFDListPtr */ + dw32(RFDListPtr0, np->rx_ring_dma); + dw32(RFDListPtr1, 0); /* Set station address */ /* 16 or 32-bit access is required by TC9020 datasheet but 8-bit works @@ -509,10 +600,6 @@ rio_open (struct net_device *dev) dw32(MACCtrl, dr32(MACCtrl) | AutoVLANuntagging); } - setup_timer(&np->timer, rio_timer, (unsigned long)dev); - np->timer.expires = jiffies + 1*HZ; - add_timer (&np->timer); - /* Start Tx/Rx */ dw32(MACCtrl, dr32(MACCtrl) | StatsEnable | RxEnable | TxEnable); @@ -522,6 +609,42 @@ rio_open (struct net_device *dev) macctrl |= (np->tx_flow) ? TxFlowControlEnable : 0; macctrl |= (np->rx_flow) ? RxFlowControlEnable : 0; dw16(MACCtrl, macctrl); +} + +static void rio_hw_stop(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->ioaddr; + + /* Disable interrupts */ + dw16(IntEnable, 0); + + /* Stop Tx and Rx logics */ + dw32(MACCtrl, TxDisable | RxDisable | StatsDisable); +} + +static int rio_open(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + const int irq = np->pdev->irq; + int i; + + i = alloc_list(dev); + if (i) + return i; + + rio_hw_init(dev); + + i = request_irq(irq, rio_interrupt, IRQF_SHARED, dev->name, dev); + if (i) { + rio_hw_stop(dev); + free_list(dev); + return i; + } + + setup_timer(&np->timer, rio_timer, (unsigned long)dev); + np->timer.expires = jiffies + 1 * HZ; + add_timer(&np->timer); netif_start_queue (dev); @@ -586,60 +709,6 @@ rio_tx_timeout (struct net_device *dev) dev->trans_start = jiffies; /* prevent tx timeout */ } - /* allocate and initialize Tx and Rx descriptors */ -static void -alloc_list (struct net_device *dev) -{ - struct netdev_private *np = netdev_priv(dev); - void __iomem *ioaddr = np->ioaddr; - int i; - - np->cur_rx = np->cur_tx = 0; - np->old_rx = np->old_tx = 0; - np->rx_buf_sz = (dev->mtu <= 1500 ? PACKET_SIZE : dev->mtu + 32); - - /* Initialize Tx descriptors, TFDListPtr leaves in start_xmit(). */ - for (i = 0; i < TX_RING_SIZE; i++) { - np->tx_skbuff[i] = NULL; - np->tx_ring[i].status = cpu_to_le64 (TFDDone); - np->tx_ring[i].next_desc = cpu_to_le64 (np->tx_ring_dma + - ((i+1)%TX_RING_SIZE) * - sizeof (struct netdev_desc)); - } - - /* Initialize Rx descriptors */ - for (i = 0; i < RX_RING_SIZE; i++) { - np->rx_ring[i].next_desc = cpu_to_le64 (np->rx_ring_dma + - ((i + 1) % RX_RING_SIZE) * - sizeof (struct netdev_desc)); - np->rx_ring[i].status = 0; - np->rx_ring[i].fraginfo = 0; - np->rx_skbuff[i] = NULL; - } - - /* Allocate the rx buffers */ - for (i = 0; i < RX_RING_SIZE; i++) { - /* Allocated fixed size of skbuff */ - struct sk_buff *skb; - - skb = netdev_alloc_skb_ip_align(dev, np->rx_buf_sz); - np->rx_skbuff[i] = skb; - if (skb == NULL) - break; - - /* Rubicon now supports 40 bits of addressing space. */ - np->rx_ring[i].fraginfo = - cpu_to_le64 ( pci_map_single ( - np->pdev, skb->data, np->rx_buf_sz, - PCI_DMA_FROMDEVICE)); - np->rx_ring[i].fraginfo |= cpu_to_le64((u64)np->rx_buf_sz << 48); - } - - /* Set RFDListPtr */ - dw32(RFDListPtr0, np->rx_ring_dma); - dw32(RFDListPtr1, 0); -} - static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev) { @@ -748,11 +817,6 @@ rio_interrupt (int irq, void *dev_instance) return IRQ_RETVAL(handled); } -static inline dma_addr_t desc_to_dma(struct netdev_desc *desc) -{ - return le64_to_cpu(desc->fraginfo) & DMA_BIT_MASK(48); -} - static void rio_free_tx (struct net_device *dev, int irq) { @@ -1730,44 +1794,16 @@ static int rio_close (struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); - void __iomem *ioaddr = np->ioaddr; - struct pci_dev *pdev = np->pdev; - struct sk_buff *skb; - int i; netif_stop_queue (dev); - /* Disable interrupts */ - dw16(IntEnable, 0); - - /* Stop Tx and Rx logics */ - dw32(MACCtrl, TxDisable | RxDisable | StatsDisable); + rio_hw_stop(dev); free_irq(pdev->irq, dev); del_timer_sync (&np->timer); - /* Free all the skbuffs in the queue. */ - for (i = 0; i < RX_RING_SIZE; i++) { - skb = np->rx_skbuff[i]; - if (skb) { - pci_unmap_single(pdev, desc_to_dma(&np->rx_ring[i]), - skb->len, PCI_DMA_FROMDEVICE); - dev_kfree_skb (skb); - np->rx_skbuff[i] = NULL; - } - np->rx_ring[i].status = 0; - np->rx_ring[i].fraginfo = 0; - } - for (i = 0; i < TX_RING_SIZE; i++) { - skb = np->tx_skbuff[i]; - if (skb) { - pci_unmap_single(pdev, desc_to_dma(&np->tx_ring[i]), - skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb (skb); - np->tx_skbuff[i] = NULL; - } - } + free_list(dev); return 0; } @@ -1795,11 +1831,55 @@ rio_remove1 (struct pci_dev *pdev) } } +#ifdef CONFIG_PM_SLEEP +static int rio_suspend(struct device *device) +{ + struct net_device *dev = dev_get_drvdata(device); + struct netdev_private *np = netdev_priv(dev); + + if (!netif_running(dev)) + return 0; + + netif_device_detach(dev); + del_timer_sync(&np->timer); + rio_hw_stop(dev); + + return 0; +} + +static int rio_resume(struct device *device) +{ + struct net_device *dev = dev_get_drvdata(device); + struct netdev_private *np = netdev_priv(dev); + + if (!netif_running(dev)) + return 0; + + rio_reset_ring(np); + rio_hw_init(dev); + np->timer.expires = jiffies + 1 * HZ; + add_timer(&np->timer); + netif_device_attach(dev); + dl2k_enable_int(np); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(rio_pm_ops, rio_suspend, rio_resume); +#define RIO_PM_OPS (&rio_pm_ops) + +#else + +#define RIO_PM_OPS NULL + +#endif /* CONFIG_PM_SLEEP */ + static struct pci_driver rio_driver = { .name = "dl2k", .id_table = rio_pci_tbl, .probe = rio_probe1, .remove = rio_remove1, + .driver.pm = RIO_PM_OPS, }; module_pci_driver(rio_driver); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 734f655c99c1..d2a5baf019ab 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -241,13 +241,11 @@ static u32 lancer_cmd_get_file_len(struct be_adapter *adapter, u8 *file_name) u32 data_read = 0, eof; u8 addn_status; struct be_dma_mem data_len_cmd; - int status; memset(&data_len_cmd, 0, sizeof(data_len_cmd)); /* data_offset and data_size should be 0 to get reg len */ - status = lancer_cmd_read_object(adapter, &data_len_cmd, 0, 0, - file_name, &data_read, &eof, - &addn_status); + lancer_cmd_read_object(adapter, &data_len_cmd, 0, 0, file_name, + &data_read, &eof, &addn_status); return data_read; } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index b6ad02909d6b..4cab8879f5ae 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2184,7 +2184,6 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo, skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3); skb->csum_level = rxcp->tunneled; - skb_mark_napi_id(skb, napi); if (rxcp->vlanf) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag); @@ -2631,7 +2630,6 @@ static int be_evt_queues_create(struct be_adapter *adapter) eqo->affinity_mask); netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); - napi_hash_add(&eqo->napi); } return 0; } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index b2a32209ffbf..d2328fc5da57 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3277,7 +3277,6 @@ static void fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx) { struct device_node *np = pdev->dev.of_node; - int err; *num_tx = *num_rx = 1; @@ -3285,13 +3284,9 @@ fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx) return; /* parse the num of tx and rx queues */ - err = of_property_read_u32(np, "fsl,num-tx-queues", num_tx); - if (err) - *num_tx = 1; + of_property_read_u32(np, "fsl,num-tx-queues", num_tx); - err = of_property_read_u32(np, "fsl,num-rx-queues", num_rx); - if (err) - *num_rx = 1; + of_property_read_u32(np, "fsl,num-rx-queues", num_rx); if (*num_tx < 1 || *num_tx > FEC_ENET_MAX_TX_QS) { dev_warn(&pdev->dev, "Invalid num_tx(=%d), fall back to 1\n", diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index afe7f39cdd7c..25553ee857b4 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -1084,27 +1084,23 @@ static struct platform_driver mpc52xx_fec_driver = { /* Module */ /* ======================================================================== */ +static struct platform_driver * const drivers[] = { +#ifdef CONFIG_FEC_MPC52xx_MDIO + &mpc52xx_fec_mdio_driver, +#endif + &mpc52xx_fec_driver, +}; + static int __init mpc52xx_fec_init(void) { -#ifdef CONFIG_FEC_MPC52xx_MDIO - int ret; - ret = platform_driver_register(&mpc52xx_fec_mdio_driver); - if (ret) { - pr_err("failed to register mdio driver\n"); - return ret; - } -#endif - return platform_driver_register(&mpc52xx_fec_driver); + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } static void __exit mpc52xx_fec_exit(void) { - platform_driver_unregister(&mpc52xx_fec_driver); -#ifdef CONFIG_FEC_MPC52xx_MDIO - platform_driver_unregister(&mpc52xx_fec_mdio_driver); -#endif + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index cf8e54652df9..48a9c176e0d1 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -1050,7 +1050,7 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->netdev_ops = &fs_enet_netdev_ops; ndev->watchdog_timeo = 2 * HZ; netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight); - netif_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2); + netif_tx_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2); ndev->ethtool_ops = &fs_ethtool_ops; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 7cf898455e60..4ce60e0c8341 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -738,7 +738,6 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) struct gfar_private *priv = NULL; struct device_node *np = ofdev->dev.of_node; struct device_node *child = NULL; - struct property *stash; u32 stash_len = 0; u32 stash_idx = 0; unsigned int num_tx_qs, num_rx_qs; @@ -854,9 +853,7 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) goto err_grp_init; } - stash = of_find_property(np, "bd-stash", NULL); - - if (stash) { + if (of_property_read_bool(np, "bd-stash")) { priv->device_flags |= FSL_GIANFAR_DEV_HAS_BD_STASHING; priv->bd_stash_en = 1; } @@ -1347,12 +1344,12 @@ static int gfar_probe(struct platform_device *ofdev) if (priv->poll_mode == GFAR_SQ_POLLING) { netif_napi_add(dev, &priv->gfargrp[i].napi_rx, gfar_poll_rx_sq, GFAR_DEV_WEIGHT); - netif_napi_add(dev, &priv->gfargrp[i].napi_tx, + netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, gfar_poll_tx_sq, 2); } else { netif_napi_add(dev, &priv->gfargrp[i].napi_rx, gfar_poll_rx, GFAR_DEV_WEIGHT); - netif_napi_add(dev, &priv->gfargrp[i].napi_tx, + netif_tx_napi_add(dev, &priv->gfargrp[i].napi_tx, gfar_poll_tx, 2); } } diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 4163b16489b3..061e4e04e561 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -269,17 +269,6 @@ config I40E To compile this driver as a module, choose M here. The module will be called i40e. -config I40E_VXLAN - bool "Virtual eXtensible Local Area Network Support" - default n - depends on I40E && VXLAN && !(I40E=y && VXLAN=m) - ---help--- - This allows one to create VXLAN virtual interfaces that provide - Layer 2 Networks over Layer 3 Networks. VXLAN is often used - to tunnel virtual network infrastructure in virtualized environments. - Say Y here if you want to use Virtual eXtensible Local Area Network - (VXLAN) in the driver. - config I40E_DCB bool "Data Center Bridging (DCB) Support" default n diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 14440200499b..48809e5d3f79 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -33,7 +33,7 @@ #include "fm10k_pf.h" #include "fm10k_vf.h" -#define FM10K_MAX_JUMBO_FRAME_SIZE 15358 /* Maximum supported size 15K */ +#define FM10K_MAX_JUMBO_FRAME_SIZE 15342 /* Maximum supported size 15K */ #define MAX_QUEUES FM10K_MAX_QUEUES_PF diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index e76a44cf330c..746a1986690b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1428,6 +1428,10 @@ static int fm10k_poll(struct napi_struct *napi, int budget) fm10k_for_each_ring(ring, q_vector->tx) clean_complete &= fm10k_clean_tx_irq(q_vector, ring); + /* Handle case where we are called by netpoll with a budget of 0 */ + if (budget <= 0) + return budget; + /* attempt to distribute budget to each queue fairly, but don't * allow the budget to go below 1 because we'll exit polling */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 74be792f3f1b..5fbffbaefe32 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -846,7 +846,7 @@ static irqreturn_t fm10k_msix_clean_rings(int __always_unused irq, void *data) struct fm10k_q_vector *q_vector = data; if (q_vector->rx.count || q_vector->tx.count) - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h index 318a212f0a78..35afd711d144 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h @@ -77,6 +77,7 @@ struct fm10k_hw; #define FM10K_PCIE_SRIOV_CTRL_VFARI 0x10 #define FM10K_ERR_PARAM -2 +#define FM10K_ERR_NO_RESOURCES -3 #define FM10K_ERR_REQUESTS_PENDING -4 #define FM10K_ERR_RESET_REQUESTED -5 #define FM10K_ERR_DMA_PENDING -6 diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c index 36c8b0aa08fd..3a18ef1cc017 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c @@ -103,7 +103,12 @@ static s32 fm10k_init_hw_vf(struct fm10k_hw *hw) s32 err; u16 i; - /* assume we always have at least 1 queue */ + /* verify we have at least 1 queue */ + if (!~fm10k_read_reg(hw, FM10K_TXQCTL(0)) || + !~fm10k_read_reg(hw, FM10K_RXQCTL(0))) + return FM10K_ERR_NO_RESOURCES; + + /* determine how many queues we have */ for (i = 1; tqdloc0 && (i < FM10K_MAX_QUEUES_POOL); i++) { /* verify the Descriptor cache offsets are increasing */ tqdloc = ~fm10k_read_reg(hw, FM10K_TQDLOC(i)); diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 4dd3e26129b4..23b4580616b7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -42,7 +42,6 @@ #include <linux/string.h> #include <linux/in.h> #include <linux/ip.h> -#include <linux/tcp.h> #include <linux/sctp.h> #include <linux/pkt_sched.h> #include <linux/ipv6.h> @@ -104,6 +103,7 @@ #define I40E_PRIV_FLAGS_LINKPOLL_FLAG BIT(1) #define I40E_PRIV_FLAGS_FD_ATR BIT(2) #define I40E_PRIV_FLAGS_VEB_STATS BIT(3) +#define I40E_PRIV_FLAGS_PS BIT(4) #define I40E_NVM_VERSION_LO_SHIFT 0 #define I40E_NVM_VERSION_LO_MASK (0xff << I40E_NVM_VERSION_LO_SHIFT) @@ -187,6 +187,7 @@ struct i40e_lump_tracking { #define I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR (I40E_FDIR_BUFFER_HEAD_ROOM * 4) #define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4) +#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4) enum i40e_fd_stat_idx { I40E_FD_STAT_ATR, @@ -265,7 +266,7 @@ struct i40e_pf { u16 num_lan_qps; /* num lan queues this PF has set up */ u16 num_lan_msix; /* num queue vectors for the base PF vsi */ int queues_left; /* queues left unclaimed */ - u16 rss_size; /* num queues in the RSS array */ + u16 alloc_rss_size; /* allocated RSS queues */ u16 rss_size_max; /* HW defined max RSS queues */ u16 fdir_pf_filter_count; /* num of guaranteed filters for this PF */ u16 num_alloc_vsi; /* num VSIs this driver supports */ @@ -280,7 +281,7 @@ struct i40e_pf { u32 fd_atr_cnt; u32 fd_tcp_rule; -#ifdef CONFIG_I40E_VXLAN +#if IS_ENABLED(CONFIG_VXLAN) __be16 vxlan_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS]; u16 pending_vxlan_bitmap; @@ -321,9 +322,7 @@ struct i40e_pf { #define I40E_FLAG_FD_ATR_ENABLED BIT_ULL(22) #define I40E_FLAG_PTP BIT_ULL(25) #define I40E_FLAG_MFP_ENABLED BIT_ULL(26) -#ifdef CONFIG_I40E_VXLAN #define I40E_FLAG_VXLAN_FILTER_SYNC BIT_ULL(27) -#endif #define I40E_FLAG_PORT_ID_VALID BIT_ULL(28) #define I40E_FLAG_DCB_CAPABLE BIT_ULL(29) #define I40E_FLAG_RSS_AQ_CAPABLE BIT_ULL(31) @@ -412,7 +411,7 @@ struct i40e_pf { u32 rx_hwtstamp_cleared; bool ptp_tx; bool ptp_rx; - u16 rss_table_size; + u16 rss_table_size; /* HW RSS table size */ /* These are only valid in NPAR modes */ u32 npar_max_bw; u32 npar_min_bw; @@ -487,6 +486,7 @@ struct i40e_vsi { u32 tx_restart; u32 tx_busy; u64 tx_linearize; + u64 tx_force_wb; u32 rx_buf_failed; u32 rx_page_failed; @@ -504,8 +504,10 @@ struct i40e_vsi { u16 tx_itr_setting; u16 int_rate_limit; /* value in usecs */ - u16 rss_table_size; - u16 rss_size; + u16 rss_table_size; /* HW RSS table size */ + u16 rss_size; /* Allocated RSS queues */ + u8 *rss_hkey_user; /* User configured hash keys */ + u8 *rss_lut_user; /* User configured lookup table entries */ u16 max_frame; u16 rx_hdr_len; @@ -575,6 +577,9 @@ struct i40e_q_vector { u8 num_ringpairs; /* total number of ring pairs in vector */ +#define I40E_Q_VECTOR_HUNG_DETECT 0 /* Bit Index for hung detection logic */ + unsigned long hung_detected; /* Set/Reset for hung_detection logic */ + cpumask_t affinity_mask; struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; @@ -602,8 +607,8 @@ static inline char *i40e_nvm_version_str(struct i40e_hw *hw) full_ver = hw->nvm.oem_ver; ver = (u8)(full_ver >> I40E_OEM_VER_SHIFT); - build = (u16)((full_ver >> I40E_OEM_VER_BUILD_SHIFT) - & I40E_OEM_VER_BUILD_MASK); + build = (u16)((full_ver >> I40E_OEM_VER_BUILD_SHIFT) & + I40E_OEM_VER_BUILD_MASK); patch = (u8)(full_ver & I40E_OEM_VER_PATCH_MASK); snprintf(buf, sizeof(buf), @@ -668,6 +673,8 @@ extern const char i40e_driver_name[]; extern const char i40e_driver_version_str[]; void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags); void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags); +int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id); void i40e_update_stats(struct i40e_vsi *vsi); void i40e_update_eth_stats(struct i40e_vsi *vsi); @@ -691,7 +698,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, bool is_vf, bool is_netdev); void i40e_del_filter(struct i40e_vsi *vsi, u8 *macaddr, s16 vlan, bool is_vf, bool is_netdev); -int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl); +int i40e_sync_vsi_filters(struct i40e_vsi *vsi); struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, u16 uplink, u32 param1); int i40e_vsi_release(struct i40e_vsi *vsi); @@ -709,7 +716,7 @@ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid, void i40e_veb_release(struct i40e_veb *veb); int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc); -i40e_status i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid); +int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid); void i40e_vsi_remove_pvid(struct i40e_vsi *vsi); void i40e_vsi_reset_stats(struct i40e_vsi *vsi); void i40e_pf_reset_stats(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 6584b6cd73fd..61a497935941 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -2403,4 +2403,4 @@ struct i40e_aqc_debug_modify_internals { I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_internals); -#endif +#endif /* _I40E_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index d4b7af9a2fc8..10744a698d6f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -103,8 +103,8 @@ static ssize_t i40e_dbg_dump_read(struct file *filp, char __user *buffer, len = min_t(int, count, (i40e_dbg_dump_data_len - *ppos)); bytes_not_copied = copy_to_user(buffer, &i40e_dbg_dump_buf[*ppos], len); - if (bytes_not_copied < 0) - return bytes_not_copied; + if (bytes_not_copied) + return -EFAULT; *ppos += len; return len; @@ -353,8 +353,8 @@ static ssize_t i40e_dbg_command_read(struct file *filp, char __user *buffer, bytes_not_copied = copy_to_user(buffer, buf, len); kfree(buf); - if (bytes_not_copied < 0) - return bytes_not_copied; + if (bytes_not_copied) + return -EFAULT; *ppos = len; return len; @@ -981,12 +981,10 @@ static ssize_t i40e_dbg_command_write(struct file *filp, if (!cmd_buf) return count; bytes_not_copied = copy_from_user(cmd_buf, buffer, count); - if (bytes_not_copied < 0) { + if (bytes_not_copied) { kfree(cmd_buf); - return bytes_not_copied; + return -EFAULT; } - if (bytes_not_copied > 0) - count -= bytes_not_copied; cmd_buf[count] = '\0'; cmd_buf_tmp = strchr(cmd_buf, '\n'); @@ -1140,7 +1138,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, spin_lock_bh(&vsi->mac_filter_list_lock); f = i40e_add_filter(vsi, ma, vlan, false, false); spin_unlock_bh(&vsi->mac_filter_list_lock); - ret = i40e_sync_vsi_filters(vsi, true); + ret = i40e_sync_vsi_filters(vsi); if (f && !ret) dev_info(&pf->pdev->dev, "add macaddr: %pM vlan=%d added to VSI %d\n", @@ -1179,7 +1177,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, spin_lock_bh(&vsi->mac_filter_list_lock); i40e_del_filter(vsi, ma, vlan, false, false); spin_unlock_bh(&vsi->mac_filter_list_lock); - ret = i40e_sync_vsi_filters(vsi, true); + ret = i40e_sync_vsi_filters(vsi); if (!ret) dev_info(&pf->pdev->dev, "del macaddr: %pM vlan=%d removed from VSI %d\n", @@ -2034,8 +2032,8 @@ static ssize_t i40e_dbg_netdev_ops_read(struct file *filp, char __user *buffer, bytes_not_copied = copy_to_user(buffer, buf, len); kfree(buf); - if (bytes_not_copied < 0) - return bytes_not_copied; + if (bytes_not_copied) + return -EFAULT; *ppos = len; return len; @@ -2068,10 +2066,8 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, memset(i40e_dbg_netdev_ops_buf, 0, sizeof(i40e_dbg_netdev_ops_buf)); bytes_not_copied = copy_from_user(i40e_dbg_netdev_ops_buf, buffer, count); - if (bytes_not_copied < 0) - return bytes_not_copied; - else if (bytes_not_copied > 0) - count -= bytes_not_copied; + if (bytes_not_copied) + return -EFAULT; i40e_dbg_netdev_ops_buf[count] = '\0'; buf_tmp = strchr(i40e_dbg_netdev_ops_buf, '\n'); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 3f385ffe420f..29d5833e24a3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -88,6 +88,7 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = { I40E_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast), I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), I40E_VSI_STAT("tx_linearize", tx_linearize), + I40E_VSI_STAT("tx_force_wb", tx_force_wb), }; /* These PF_STATs might look like duplicates of some NETDEV_STATs, @@ -230,6 +231,7 @@ static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = { "LinkPolling", "flow-director-atr", "veb-stats", + "packet-split", }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings) @@ -2110,7 +2112,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, switch (cmd->cmd) { case ETHTOOL_GRXRINGS: - cmd->data = vsi->alloc_queue_pairs; + cmd->data = vsi->num_queue_pairs; ret = 0; break; case ETHTOOL_GRXFH: @@ -2583,7 +2585,6 @@ static int i40e_set_channels(struct net_device *dev, return -EINVAL; } -#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4) /** * i40e_get_rxfh_key_size - get the RSS hash key size * @netdev: network interface device structure @@ -2611,10 +2612,9 @@ static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u32 reg_val; - int i, j; + u8 *lut, *seed = NULL; + int ret; + u16 i; if (hfunc) *hfunc = ETH_RSS_HASH_TOP; @@ -2622,24 +2622,20 @@ static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, if (!indir) return 0; - for (i = 0, j = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) { - reg_val = rd32(hw, I40E_PFQF_HLUT(i)); - indir[j++] = reg_val & 0xff; - indir[j++] = (reg_val >> 8) & 0xff; - indir[j++] = (reg_val >> 16) & 0xff; - indir[j++] = (reg_val >> 24) & 0xff; - } + seed = key; + lut = kzalloc(I40E_HLUT_ARRAY_SIZE, GFP_KERNEL); + if (!lut) + return -ENOMEM; + ret = i40e_get_rss(vsi, seed, lut, I40E_HLUT_ARRAY_SIZE); + if (ret) + goto out; + for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) + indir[i] = (u32)(lut[i]); - if (key) { - for (i = 0, j = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) { - reg_val = rd32(hw, I40E_PFQF_HKEY(i)); - key[j++] = (u8)(reg_val & 0xff); - key[j++] = (u8)((reg_val >> 8) & 0xff); - key[j++] = (u8)((reg_val >> 16) & 0xff); - key[j++] = (u8)((reg_val >> 24) & 0xff); - } - } - return 0; +out: + kfree(lut); + + return ret; } /** @@ -2656,10 +2652,8 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - u32 reg_val; - int i, j; + u8 *seed = NULL; + u16 i; if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; @@ -2667,24 +2661,28 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, if (!indir) return 0; - for (i = 0, j = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) { - reg_val = indir[j++]; - reg_val |= indir[j++] << 8; - reg_val |= indir[j++] << 16; - reg_val |= indir[j++] << 24; - wr32(hw, I40E_PFQF_HLUT(i), reg_val); - } - if (key) { - for (i = 0, j = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) { - reg_val = key[j++]; - reg_val |= key[j++] << 8; - reg_val |= key[j++] << 16; - reg_val |= key[j++] << 24; - wr32(hw, I40E_PFQF_HKEY(i), reg_val); + if (!vsi->rss_hkey_user) { + vsi->rss_hkey_user = kzalloc(I40E_HKEY_ARRAY_SIZE, + GFP_KERNEL); + if (!vsi->rss_hkey_user) + return -ENOMEM; } + memcpy(vsi->rss_hkey_user, key, I40E_HKEY_ARRAY_SIZE); + seed = vsi->rss_hkey_user; } - return 0; + if (!vsi->rss_lut_user) { + vsi->rss_lut_user = kzalloc(I40E_HLUT_ARRAY_SIZE, GFP_KERNEL); + if (!vsi->rss_lut_user) + return -ENOMEM; + } + + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) + vsi->rss_lut_user[i] = (u8)(indir[i]); + + return i40e_config_rss(vsi, seed, vsi->rss_lut_user, + I40E_HLUT_ARRAY_SIZE); } /** @@ -2712,6 +2710,8 @@ static u32 i40e_get_priv_flags(struct net_device *dev) I40E_PRIV_FLAGS_FD_ATR : 0; ret_flags |= pf->flags & I40E_FLAG_VEB_STATS_ENABLED ? I40E_PRIV_FLAGS_VEB_STATS : 0; + ret_flags |= pf->flags & I40E_FLAG_RX_PS_ENABLED ? + I40E_PRIV_FLAGS_PS : 0; return ret_flags; } @@ -2726,6 +2726,26 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; + bool reset_required = false; + + /* NOTE: MFP is not settable */ + + /* allow the user to control the method of receive + * buffer DMA, whether the packet is split at header + * boundaries into two separate buffers. In some cases + * one routine or the other will perform better. + */ + if ((flags & I40E_PRIV_FLAGS_PS) && + !(pf->flags & I40E_FLAG_RX_PS_ENABLED)) { + pf->flags |= I40E_FLAG_RX_PS_ENABLED; + pf->flags &= ~I40E_FLAG_RX_1BUF_ENABLED; + reset_required = true; + } else if (!(flags & I40E_PRIV_FLAGS_PS) && + (pf->flags & I40E_FLAG_RX_PS_ENABLED)) { + pf->flags &= ~I40E_FLAG_RX_PS_ENABLED; + pf->flags |= I40E_FLAG_RX_1BUF_ENABLED; + reset_required = true; + } if (flags & I40E_PRIV_FLAGS_LINKPOLL_FLAG) pf->flags |= I40E_FLAG_LINK_POLLING_ENABLED; @@ -2748,6 +2768,10 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) else pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED; + /* if needed, issue reset to cause things to take effect */ + if (reset_required) + i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED)); + return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c index fe5d9bf3ed6d..579a46ca82df 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c +++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c @@ -1544,8 +1544,6 @@ void i40e_fcoe_vsi_setup(struct i40e_pf *pf) if (!(pf->flags & I40E_FLAG_FCOE_ENABLED)) return; - BUG_ON(!pf->vsi[pf->lan_vsi]); - for (i = 0; i < pf->num_alloc_vsi; i++) { vsi = pf->vsi[i]; if (vsi && vsi->type == I40E_VSI_FCOE) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b825f978d441..8cd395d1cd09 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -27,7 +27,7 @@ /* Local includes */ #include "i40e.h" #include "i40e_diag.h" -#ifdef CONFIG_I40E_VXLAN +#if IS_ENABLED(CONFIG_VXLAN) #include <net/vxlan.h> #endif @@ -38,8 +38,8 @@ static const char i40e_driver_string[] = #define DRV_KERN "-k" #define DRV_VERSION_MAJOR 1 -#define DRV_VERSION_MINOR 3 -#define DRV_VERSION_BUILD 46 +#define DRV_VERSION_MINOR 4 +#define DRV_VERSION_BUILD 7 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -55,6 +55,8 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); +static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); @@ -790,75 +792,6 @@ static void i40e_update_fcoe_stats(struct i40e_vsi *vsi) #endif /** - * i40e_update_link_xoff_rx - Update XOFF received in link flow control mode - * @pf: the corresponding PF - * - * Update the Rx XOFF counter (PAUSE frames) in link flow control mode - **/ -static void i40e_update_link_xoff_rx(struct i40e_pf *pf) -{ - struct i40e_hw_port_stats *osd = &pf->stats_offsets; - struct i40e_hw_port_stats *nsd = &pf->stats; - struct i40e_hw *hw = &pf->hw; - u64 xoff = 0; - - if ((hw->fc.current_mode != I40E_FC_FULL) && - (hw->fc.current_mode != I40E_FC_RX_PAUSE)) - return; - - xoff = nsd->link_xoff_rx; - i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), - pf->stat_offsets_loaded, - &osd->link_xoff_rx, &nsd->link_xoff_rx); - - /* No new LFC xoff rx */ - if (!(nsd->link_xoff_rx - xoff)) - return; - -} - -/** - * i40e_update_prio_xoff_rx - Update XOFF received in PFC mode - * @pf: the corresponding PF - * - * Update the Rx XOFF counter (PAUSE frames) in PFC mode - **/ -static void i40e_update_prio_xoff_rx(struct i40e_pf *pf) -{ - struct i40e_hw_port_stats *osd = &pf->stats_offsets; - struct i40e_hw_port_stats *nsd = &pf->stats; - bool xoff[I40E_MAX_TRAFFIC_CLASS] = {false}; - struct i40e_dcbx_config *dcb_cfg; - struct i40e_hw *hw = &pf->hw; - u16 i; - u8 tc; - - dcb_cfg = &hw->local_dcbx_config; - - /* Collect Link XOFF stats when PFC is disabled */ - if (!dcb_cfg->pfc.pfcenable) { - i40e_update_link_xoff_rx(pf); - return; - } - - for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { - u64 prio_xoff = nsd->priority_xoff_rx[i]; - - i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i), - pf->stat_offsets_loaded, - &osd->priority_xoff_rx[i], - &nsd->priority_xoff_rx[i]); - - /* No new PFC xoff rx */ - if (!(nsd->priority_xoff_rx[i] - prio_xoff)) - continue; - /* Get the TC for given priority */ - tc = dcb_cfg->etscfg.prioritytable[i]; - xoff[tc] = true; - } -} - -/** * i40e_update_vsi_stats - Update the vsi statistics counters. * @vsi: the VSI to be updated * @@ -881,6 +814,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) u64 bytes, packets; unsigned int start; u64 tx_linearize; + u64 tx_force_wb; u64 rx_p, rx_b; u64 tx_p, tx_b; u16 q; @@ -899,7 +833,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) */ rx_b = rx_p = 0; tx_b = tx_p = 0; - tx_restart = tx_busy = tx_linearize = 0; + tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; rx_page = 0; rx_buf = 0; rcu_read_lock(); @@ -917,6 +851,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) tx_restart += p->tx_stats.restart_queue; tx_busy += p->tx_stats.tx_busy; tx_linearize += p->tx_stats.tx_linearize; + tx_force_wb += p->tx_stats.tx_force_wb; /* Rx queue is part of the same block as Tx queue */ p = &p[1]; @@ -934,6 +869,7 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) vsi->tx_restart = tx_restart; vsi->tx_busy = tx_busy; vsi->tx_linearize = tx_linearize; + vsi->tx_force_wb = tx_force_wb; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; @@ -1049,12 +985,18 @@ static void i40e_update_pf_stats(struct i40e_pf *pf) i40e_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_tx, &nsd->link_xon_tx); - i40e_update_prio_xoff_rx(pf); /* handles I40E_GLPRT_LXOFFRXC */ + i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), + pf->stat_offsets_loaded, + &osd->link_xoff_rx, &nsd->link_xoff_rx); i40e_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_tx, &nsd->link_xoff_tx); for (i = 0; i < 8; i++) { + i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i), + pf->stat_offsets_loaded, + &osd->priority_xoff_rx[i], + &nsd->priority_xoff_rx[i]); i40e_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i), pf->stat_offsets_loaded, &osd->priority_xon_rx[i], @@ -1547,10 +1489,9 @@ static int i40e_set_mac(struct net_device *netdev, void *p) spin_unlock_bh(&vsi->mac_filter_list_lock); } - i40e_sync_vsi_filters(vsi, false); ether_addr_copy(netdev->dev_addr, addr->sa_data); - return 0; + return i40e_sync_vsi_filters(vsi); } /** @@ -1625,7 +1566,8 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, switch (vsi->type) { case I40E_VSI_MAIN: - qcount = min_t(int, pf->rss_size, num_tc_qps); + qcount = min_t(int, pf->alloc_rss_size, + num_tc_qps); break; #ifdef I40E_FCOE case I40E_VSI_FCOE: @@ -1851,13 +1793,12 @@ static void i40e_cleanup_add_list(struct list_head *add_list) /** * i40e_sync_vsi_filters - Update the VSI filter list to the HW * @vsi: ptr to the VSI - * @grab_rtnl: whether RTNL needs to be grabbed * * Push any outstanding VSI filter changes through the AdminQ. * * Returns 0 or error value **/ -int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) +int i40e_sync_vsi_filters(struct i40e_vsi *vsi) { struct list_head tmp_del_list, tmp_add_list; struct i40e_mac_filter *f, *ftmp, *fclone; @@ -1865,8 +1806,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) bool add_happened = false; int filter_list_len = 0; u32 changed_flags = 0; + i40e_status aq_ret = 0; bool err_cond = false; - i40e_status ret = 0; + int retval = 0; struct i40e_pf *pf; int num_add = 0; int num_del = 0; @@ -1929,8 +1871,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) } spin_unlock_bh(&vsi->mac_filter_list_lock); - if (err_cond) + if (err_cond) { i40e_cleanup_add_list(&tmp_add_list); + retval = -ENOMEM; + goto out; + } } /* Now process 'del_list' outside the lock */ @@ -1948,7 +1893,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) i40e_undo_del_filter_entries(vsi, &tmp_del_list); i40e_undo_add_filter_entries(vsi); spin_unlock_bh(&vsi->mac_filter_list_lock); - return -ENOMEM; + retval = -ENOMEM; + goto out; } list_for_each_entry_safe(f, ftmp, &tmp_del_list, list) { @@ -1966,18 +1912,22 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) /* flush a full buffer */ if (num_del == filter_list_len) { - ret = i40e_aq_remove_macvlan(&pf->hw, - vsi->seid, del_list, num_del, - NULL); + aq_ret = i40e_aq_remove_macvlan(&pf->hw, + vsi->seid, + del_list, + num_del, + NULL); aq_err = pf->hw.aq.asq_last_status; num_del = 0; memset(del_list, 0, sizeof(*del_list)); - if (ret && aq_err != I40E_AQ_RC_ENOENT) + if (aq_ret && aq_err != I40E_AQ_RC_ENOENT) { + retval = -EIO; dev_err(&pf->pdev->dev, "ignoring delete macvlan error, err %s, aq_err %s while flushing a full buffer\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); + } } /* Release memory for MAC filter entries which were * synced up with HW. @@ -1987,15 +1937,16 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) } if (num_del) { - ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid, - del_list, num_del, NULL); + aq_ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid, + del_list, num_del, + NULL); aq_err = pf->hw.aq.asq_last_status; num_del = 0; - if (ret && aq_err != I40E_AQ_RC_ENOENT) + if (aq_ret && aq_err != I40E_AQ_RC_ENOENT) dev_info(&pf->pdev->dev, "ignoring delete macvlan error, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); } @@ -2019,7 +1970,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) spin_lock_bh(&vsi->mac_filter_list_lock); i40e_undo_add_filter_entries(vsi); spin_unlock_bh(&vsi->mac_filter_list_lock); - return -ENOMEM; + retval = -ENOMEM; + goto out; } list_for_each_entry_safe(f, ftmp, &tmp_add_list, list) { @@ -2040,13 +1992,13 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) /* flush a full buffer */ if (num_add == filter_list_len) { - ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, - add_list, num_add, - NULL); + aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, + add_list, num_add, + NULL); aq_err = pf->hw.aq.asq_last_status; num_add = 0; - if (ret) + if (aq_ret) break; memset(add_list, 0, sizeof(*add_list)); } @@ -2058,18 +2010,19 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) } if (num_add) { - ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, - add_list, num_add, NULL); + aq_ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid, + add_list, num_add, NULL); aq_err = pf->hw.aq.asq_last_status; num_add = 0; } kfree(add_list); add_list = NULL; - if (add_happened && ret && aq_err != I40E_AQ_RC_EINVAL) { + if (add_happened && aq_ret && aq_err != I40E_AQ_RC_EINVAL) { + retval = i40e_aq_rc_to_posix(aq_ret, aq_err); dev_info(&pf->pdev->dev, "add filter failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, aq_err)); if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOSPC) && !test_bit(__I40E_FILTER_OVERFLOW_PROMISC, @@ -2087,16 +2040,19 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) bool cur_multipromisc; cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI); - ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw, - vsi->seid, - cur_multipromisc, - NULL); - if (ret) + aq_ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw, + vsi->seid, + cur_multipromisc, + NULL); + if (aq_ret) { + retval = i40e_aq_rc_to_posix(aq_ret, + pf->hw.aq.asq_last_status); dev_info(&pf->pdev->dev, "set multi promisc failed, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + } } if ((changed_flags & IFF_PROMISC) || promisc_forced_on) { bool cur_promisc; @@ -2112,44 +2068,50 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi, bool grab_rtnl) */ if (pf->cur_promisc != cur_promisc) { pf->cur_promisc = cur_promisc; - if (grab_rtnl) - i40e_do_reset_safe(pf, - BIT(__I40E_PF_RESET_REQUESTED)); - else - i40e_do_reset(pf, - BIT(__I40E_PF_RESET_REQUESTED)); + set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); } } else { - ret = i40e_aq_set_vsi_unicast_promiscuous( + aq_ret = i40e_aq_set_vsi_unicast_promiscuous( &vsi->back->hw, vsi->seid, cur_promisc, NULL); - if (ret) + if (aq_ret) { + retval = + i40e_aq_rc_to_posix(aq_ret, + pf->hw.aq.asq_last_status); dev_info(&pf->pdev->dev, "set unicast promisc failed, err %d, aq_err %d\n", - ret, pf->hw.aq.asq_last_status); - ret = i40e_aq_set_vsi_multicast_promiscuous( + aq_ret, pf->hw.aq.asq_last_status); + } + aq_ret = i40e_aq_set_vsi_multicast_promiscuous( &vsi->back->hw, vsi->seid, cur_promisc, NULL); - if (ret) + if (aq_ret) { + retval = + i40e_aq_rc_to_posix(aq_ret, + pf->hw.aq.asq_last_status); dev_info(&pf->pdev->dev, "set multicast promisc failed, err %d, aq_err %d\n", - ret, pf->hw.aq.asq_last_status); + aq_ret, pf->hw.aq.asq_last_status); + } } - ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw, - vsi->seid, - cur_promisc, NULL); - if (ret) + aq_ret = i40e_aq_set_vsi_broadcast(&vsi->back->hw, + vsi->seid, + cur_promisc, NULL); + if (aq_ret) { + retval = i40e_aq_rc_to_posix(aq_ret, + pf->hw.aq.asq_last_status); dev_info(&pf->pdev->dev, "set brdcast promisc failed, err %s, aq_err %s\n", - i40e_stat_str(&pf->hw, ret), + i40e_stat_str(&pf->hw, aq_ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + } } - +out: clear_bit(__I40E_CONFIG_BUSY, &vsi->state); - return 0; + return retval; } /** @@ -2166,8 +2128,15 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v] && - (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) - i40e_sync_vsi_filters(pf->vsi[v], true); + (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) { + int ret = i40e_sync_vsi_filters(pf->vsi[v]); + + if (ret) { + /* come back and try again later */ + pf->flags |= I40E_FLAG_FILTER_SYNC; + break; + } + } } } @@ -2377,16 +2346,13 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid) } } - /* Make sure to release before sync_vsi_filter because that - * function will lock/unlock as necessary - */ spin_unlock_bh(&vsi->mac_filter_list_lock); - if (test_bit(__I40E_DOWN, &vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) - return 0; - - return i40e_sync_vsi_filters(vsi, false); + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); + return 0; } /** @@ -2459,16 +2425,13 @@ int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid) } } - /* Make sure to release before sync_vsi_filter because that - * function with lock/unlock as necessary - */ spin_unlock_bh(&vsi->mac_filter_list_lock); - if (test_bit(__I40E_DOWN, &vsi->back->state) || - test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state)) - return 0; - - return i40e_sync_vsi_filters(vsi, false); + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); + return 0; } /** @@ -2711,6 +2674,11 @@ static void i40e_config_xps_tx_ring(struct i40e_ring *ring) netif_set_xps_queue(ring->netdev, mask, ring->queue_index); free_cpumask_var(mask); } + + /* schedule our worker thread which will take care of + * applying the new filter changes + */ + i40e_service_event_schedule(vsi->back); } /** @@ -4360,17 +4328,41 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) else val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); + /* Bail out if interrupts are disabled because napi_poll + * execution in-progress or will get scheduled soon. + * napi_poll cleans TX and RX queues and updates 'next_to_clean'. + */ + if (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)) + return; + head = i40e_get_head(tx_ring); tx_pending = i40e_get_tx_pending(tx_ring); - /* Interrupts are disabled and TX pending is non-zero, - * trigger the SW interrupt (don't wait). Worst case - * there will be one extra interrupt which may result - * into not cleaning any queues because queues are cleaned. + /* HW is done executing descriptors, updated HEAD write back, + * but SW hasn't processed those descriptors. If interrupt is + * not generated from this point ON, it could result into + * dev_watchdog detecting timeout on those netdev_queue, + * hence proactively trigger SW interrupt. */ - if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) - i40e_force_wb(vsi, tx_ring->q_vector); + if (tx_pending) { + /* NAPI Poll didn't run and clear since it was set */ + if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT, + &tx_ring->q_vector->hung_detected)) { + netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n", + vsi->seid, q_idx, tx_pending, + tx_ring->next_to_clean, head, + tx_ring->next_to_use, + readl(tx_ring->tail)); + netdev_info(vsi->netdev, "VSI_seid %d, Issuing force_wb for TX queue %d, Interrupt Reg: 0x%x\n", + vsi->seid, q_idx, val); + i40e_force_wb(vsi, tx_ring->q_vector); + } else { + /* First Chance - detected possible hung */ + set_bit(I40E_Q_VECTOR_HUNG_DETECT, + &tx_ring->q_vector->hung_detected); + } + } } /** @@ -5302,7 +5294,7 @@ int i40e_open(struct net_device *netdev) TCP_FLAG_CWR) >> 16); wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16); -#ifdef CONFIG_I40E_VXLAN +#if IS_ENABLED(CONFIG_VXLAN) vxlan_get_rx_port(netdev); #endif @@ -5738,7 +5730,7 @@ static void i40e_handle_lan_overflow_event(struct i40e_pf *pf, **/ static void i40e_service_event_complete(struct i40e_pf *pf) { - BUG_ON(!test_bit(__I40E_SERVICE_SCHED, &pf->state)); + WARN_ON(!test_bit(__I40E_SERVICE_SCHED, &pf->state)); /* flush memory to make sure state is correct before next watchog */ smp_mb__before_atomic(); @@ -6013,6 +6005,9 @@ static void i40e_link_event(struct i40e_pf *pf) i40e_status status; bool new_link, old_link; + /* save off old link status information */ + pf->hw.phy.link_info_old = pf->hw.phy.link_info; + /* set this to force the get_link_status call to refresh state */ pf->hw.phy.get_link_info = true; @@ -6147,13 +6142,9 @@ unlock: static void i40e_handle_link_event(struct i40e_pf *pf, struct i40e_arq_event_info *e) { - struct i40e_hw *hw = &pf->hw; struct i40e_aqc_get_link_status *status = (struct i40e_aqc_get_link_status *)&e->desc.params.raw; - /* save off old link status information */ - hw->phy.link_info_old = hw->phy.link_info; - /* Do a new status request to re-enable LSE reporting * and load new status information into the hw struct * This completely ignores any state information @@ -6685,6 +6676,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) struct i40e_hw *hw = &pf->hw; u8 set_fc_aq_fail = 0; i40e_status ret; + u32 val; u32 v; /* Now we wait for GRST to settle out. @@ -6823,6 +6815,20 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) } } + /* Reconfigure hardware for allowing smaller MSS in the case + * of TSO, so that we avoid the MDD being fired and causing + * a reset in the case of small MSS+TSO. + */ +#define I40E_REG_MSS 0x000E64DC +#define I40E_REG_MSS_MIN_MASK 0x3FF0000 +#define I40E_64BYTE_MSS 0x400000 + val = rd32(hw, I40E_REG_MSS); + if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) { + val &= ~I40E_REG_MSS_MIN_MASK; + val |= I40E_64BYTE_MSS; + wr32(hw, I40E_REG_MSS, val); + } + if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || (pf->hw.aq.fw_maj_ver < 4)) { msleep(75); @@ -6984,7 +6990,7 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf) i40e_flush(hw); } -#ifdef CONFIG_I40E_VXLAN +#if IS_ENABLED(CONFIG_VXLAN) /** * i40e_sync_vxlan_filters_subtask - Sync the VSI filter list with HW * @pf: board private structure @@ -7051,7 +7057,7 @@ static void i40e_service_task(struct work_struct *work) i40e_watchdog_subtask(pf); i40e_fdir_reinit_subtask(pf); i40e_sync_filters_subtask(pf); -#ifdef CONFIG_I40E_VXLAN +#if IS_ENABLED(CONFIG_VXLAN) i40e_sync_vxlan_filters_subtask(pf); #endif i40e_clean_adminq_subtask(pf); @@ -7282,6 +7288,23 @@ static void i40e_vsi_free_arrays(struct i40e_vsi *vsi, bool free_qvectors) } /** + * i40e_clear_rss_config_user - clear the user configured RSS hash keys + * and lookup table + * @vsi: Pointer to VSI structure + */ +static void i40e_clear_rss_config_user(struct i40e_vsi *vsi) +{ + if (!vsi) + return; + + kfree(vsi->rss_hkey_user); + vsi->rss_hkey_user = NULL; + + kfree(vsi->rss_lut_user); + vsi->rss_lut_user = NULL; +} + +/** * i40e_vsi_clear - Deallocate the VSI provided * @vsi: the VSI being un-configured **/ @@ -7318,6 +7341,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx); i40e_vsi_free_arrays(vsi, true); + i40e_clear_rss_config_user(vsi); pf->vsi[vsi->idx] = NULL; if (vsi->idx < pf->next_vsi) @@ -7780,7 +7804,8 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) * @vsi: vsi structure * @seed: RSS hash seed **/ -static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed) +static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) { struct i40e_aqc_get_set_rss_key_data rss_key; struct i40e_pf *pf = vsi->back; @@ -7833,43 +7858,57 @@ static int i40e_vsi_config_rss(struct i40e_vsi *vsi) { u8 seed[I40E_HKEY_ARRAY_SIZE]; struct i40e_pf *pf = vsi->back; + u8 *lut; + int ret; - netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); - vsi->rss_size = min_t(int, pf->rss_size, vsi->num_queue_pairs); + if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)) + return 0; - if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) - return i40e_config_rss_aq(vsi, seed); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; - return 0; + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); + ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); + kfree(lut); + + return ret; } /** - * i40e_config_rss_reg - Prepare for RSS if used - * @pf: board private structure + * i40e_config_rss_reg - Configure RSS keys and lut by writing registers + * @vsi: Pointer to vsi structure * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure **/ -static int i40e_config_rss_reg(struct i40e_pf *pf, const u8 *seed) +static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, + const u8 *lut, u16 lut_size) { - struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - u32 *seed_dw = (u32 *)seed; - u32 current_queue = 0; - u32 lut = 0; - int i, j; + u8 i; /* Fill out hash function seed */ - for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) - wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]); + if (seed) { + u32 *seed_dw = (u32 *)seed; - for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) { - lut = 0; - for (j = 0; j < 4; j++) { - if (current_queue == vsi->rss_size) - current_queue = 0; - lut |= ((current_queue) << (8 * j)); - current_queue++; - } - wr32(&pf->hw, I40E_PFQF_HLUT(i), lut); + for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) + wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]); + } + + if (lut) { + u32 *lut_dw = (u32 *)lut; + + if (lut_size != I40E_HLUT_ARRAY_SIZE) + return -EINVAL; + + for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) + wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]); } i40e_flush(hw); @@ -7877,18 +7916,101 @@ static int i40e_config_rss_reg(struct i40e_pf *pf, const u8 *seed) } /** - * i40e_config_rss - Prepare for RSS if used + * i40e_get_rss_reg - Get the RSS keys and lut by reading registers + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the keys + * @lut: Buffer to store the lookup table entries + * @lut_size: Size of buffer to store the lookup table entries + * + * Returns 0 on success, negative on failure + */ +static int i40e_get_rss_reg(struct i40e_vsi *vsi, u8 *seed, + u8 *lut, u16 lut_size) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + u16 i; + + if (seed) { + u32 *seed_dw = (u32 *)seed; + + for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++) + seed_dw[i] = rd32(hw, I40E_PFQF_HKEY(i)); + } + if (lut) { + u32 *lut_dw = (u32 *)lut; + + if (lut_size != I40E_HLUT_ARRAY_SIZE) + return -EINVAL; + for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) + lut_dw[i] = rd32(hw, I40E_PFQF_HLUT(i)); + } + + return 0; +} + +/** + * i40e_config_rss - Configure RSS keys and lut + * @vsi: Pointer to VSI structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + */ +int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +{ + struct i40e_pf *pf = vsi->back; + + if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) + return i40e_config_rss_aq(vsi, seed, lut, lut_size); + else + return i40e_config_rss_reg(vsi, seed, lut, lut_size); +} + +/** + * i40e_get_rss - Get RSS keys and lut + * @vsi: Pointer to VSI structure + * @seed: Buffer to store the keys + * @lut: Buffer to store the lookup table entries + * lut_size: Size of buffer to store the lookup table entries + * + * Returns 0 on success, negative on failure + */ +int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) +{ + return i40e_get_rss_reg(vsi, seed, lut, lut_size); +} + +/** + * i40e_fill_rss_lut - Fill the RSS lookup table with default values + * @pf: Pointer to board private structure + * @lut: Lookup table + * @rss_table_size: Lookup table size + * @rss_size: Range of queue number for hashing + */ +static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size) +{ + u16 i; + + for (i = 0; i < rss_table_size; i++) + lut[i] = i % rss_size; +} + +/** + * i40e_pf_config_rss - Prepare for RSS if used * @pf: board private structure **/ -static int i40e_config_rss(struct i40e_pf *pf) +static int i40e_pf_config_rss(struct i40e_pf *pf) { struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; u8 seed[I40E_HKEY_ARRAY_SIZE]; + u8 *lut; struct i40e_hw *hw = &pf->hw; u32 reg_val; u64 hena; - - netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + int ret; /* By default we enable TCP/UDP with IPv4/IPv6 ptypes */ hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) | @@ -7898,8 +8020,6 @@ static int i40e_config_rss(struct i40e_pf *pf) wr32(hw, I40E_PFQF_HENA(0), (u32)hena); wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); - vsi->rss_size = min_t(int, pf->rss_size, vsi->num_queue_pairs); - /* Determine the RSS table size based on the hardware capabilities */ reg_val = rd32(hw, I40E_PFQF_CTL_0); reg_val = (pf->rss_table_size == 512) ? @@ -7907,10 +8027,32 @@ static int i40e_config_rss(struct i40e_pf *pf) (reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512); wr32(hw, I40E_PFQF_CTL_0, reg_val); - if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) - return i40e_config_rss_aq(pf->vsi[pf->lan_vsi], seed); + /* Determine the RSS size of the VSI */ + if (!vsi->rss_size) + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* Use user configured lut if there is one, otherwise use default */ + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); else - return i40e_config_rss_reg(pf, seed); + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); + + /* Use user configured hash key if there is one, otherwise + * use default. + */ + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); + else + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size); + kfree(lut); + + return ret; } /** @@ -7935,13 +8077,28 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) vsi->req_queue_pairs = queue_count; i40e_prep_for_reset(pf); - pf->rss_size = new_rss_size; + pf->alloc_rss_size = new_rss_size; i40e_reset_and_rebuild(pf, true); - i40e_config_rss(pf); + + /* Discard the user configured hash keys and lut, if less + * queues are enabled. + */ + if (queue_count < vsi->rss_size) { + i40e_clear_rss_config_user(vsi); + dev_dbg(&pf->pdev->dev, + "discard user configured hash keys and lut\n"); + } + + /* Reset vsi->rss_size, as number of enabled queues changed */ + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + + i40e_pf_config_rss(pf); } - dev_info(&pf->pdev->dev, "RSS count: %d\n", pf->rss_size); - return pf->rss_size; + dev_info(&pf->pdev->dev, "RSS count/HW max RSS count: %d/%d\n", + pf->alloc_rss_size, pf->rss_size_max); + return pf->alloc_rss_size; } /** @@ -8112,13 +8269,14 @@ static int i40e_sw_init(struct i40e_pf *pf) * maximum might end up larger than the available queues */ pf->rss_size_max = BIT(pf->hw.func_caps.rss_table_entry_width); - pf->rss_size = 1; + pf->alloc_rss_size = 1; pf->rss_table_size = pf->hw.func_caps.rss_table_size; pf->rss_size_max = min_t(int, pf->rss_size_max, pf->hw.func_caps.num_tx_qp); if (pf->hw.func_caps.rss) { pf->flags |= I40E_FLAG_RSS_ENABLED; - pf->rss_size = min_t(int, pf->rss_size_max, num_online_cpus()); + pf->alloc_rss_size = min_t(int, pf->rss_size_max, + num_online_cpus()); } /* MFP mode enabled */ @@ -8275,7 +8433,7 @@ static int i40e_set_features(struct net_device *netdev, return 0; } -#ifdef CONFIG_I40E_VXLAN +#if IS_ENABLED(CONFIG_VXLAN) /** * i40e_get_vxlan_port_idx - Lookup a possibly offloaded for Rx UDP port * @pf: board private structure @@ -8595,7 +8753,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_get_vf_config = i40e_ndo_get_vf_config, .ndo_set_vf_link_state = i40e_ndo_set_vf_link_state, .ndo_set_vf_spoofchk = i40e_ndo_set_vf_spoofchk, -#ifdef CONFIG_I40E_VXLAN +#if IS_ENABLED(CONFIG_VXLAN) .ndo_add_vxlan_port = i40e_add_vxlan_port, .ndo_del_vxlan_port = i40e_del_vxlan_port, #endif @@ -9051,7 +9209,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) f->is_vf, f->is_netdev); spin_unlock_bh(&vsi->mac_filter_list_lock); - i40e_sync_vsi_filters(vsi, false); + i40e_sync_vsi_filters(vsi); i40e_vsi_delete(vsi); i40e_vsi_free_q_vectors(vsi); @@ -9947,7 +10105,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) * the hash */ if ((pf->flags & I40E_FLAG_RSS_ENABLED)) - i40e_config_rss(pf); + i40e_pf_config_rss(pf); /* fill in link information and enable LSE reporting */ i40e_update_link_info(&pf->hw); @@ -9985,7 +10143,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) !(pf->flags & I40E_FLAG_MSIX_ENABLED)) { /* one qp for PF, no queues for anything else */ queues_left = 0; - pf->rss_size = pf->num_lan_qps = 1; + pf->alloc_rss_size = pf->num_lan_qps = 1; /* make sure all the fancies are disabled */ pf->flags &= ~(I40E_FLAG_RSS_ENABLED | @@ -10002,7 +10160,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) I40E_FLAG_FD_ATR_ENABLED | I40E_FLAG_DCB_CAPABLE))) { /* one qp for PF */ - pf->rss_size = pf->num_lan_qps = 1; + pf->alloc_rss_size = pf->num_lan_qps = 1; queues_left -= pf->num_lan_qps; pf->flags &= ~(I40E_FLAG_RSS_ENABLED | @@ -10072,8 +10230,9 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) "qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n", pf->hw.func_caps.num_tx_qp, !!(pf->flags & I40E_FLAG_FD_SB_ENABLED), - pf->num_lan_qps, pf->rss_size, pf->num_req_vfs, pf->num_vf_qps, - pf->num_vmdq_vsis, pf->num_vmdq_qps, queues_left); + pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs, + pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps, + queues_left); #ifdef I40E_FCOE dev_dbg(&pf->pdev->dev, "fcoe queues = %d\n", pf->num_fcoe_qps); #endif @@ -10111,55 +10270,53 @@ static int i40e_setup_pf_filter_control(struct i40e_pf *pf) } #define INFO_STRING_LEN 255 +#define REMAIN(__x) (INFO_STRING_LEN - (__x)) static void i40e_print_features(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; - char *buf, *string; + char *buf; + int i; - string = kzalloc(INFO_STRING_LEN, GFP_KERNEL); - if (!string) { - dev_err(&pf->pdev->dev, "Features string allocation failed\n"); + buf = kmalloc(INFO_STRING_LEN, GFP_KERNEL); + if (!buf) return; - } - buf = string; - - buf += sprintf(string, "Features: PF-id[%d] ", hw->pf_id); + i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id); #ifdef CONFIG_PCI_IOV - buf += sprintf(buf, "VFs: %d ", pf->num_req_vfs); + i += snprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs); #endif - buf += sprintf(buf, "VSIs: %d QP: %d RX: %s ", - pf->hw.func_caps.num_vsis, - pf->vsi[pf->lan_vsi]->num_queue_pairs, - pf->flags & I40E_FLAG_RX_PS_ENABLED ? "PS" : "1BUF"); + i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d RX: %s", + pf->hw.func_caps.num_vsis, + pf->vsi[pf->lan_vsi]->num_queue_pairs, + pf->flags & I40E_FLAG_RX_PS_ENABLED ? "PS" : "1BUF"); if (pf->flags & I40E_FLAG_RSS_ENABLED) - buf += sprintf(buf, "RSS "); + i += snprintf(&buf[i], REMAIN(i), " RSS"); if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) - buf += sprintf(buf, "FD_ATR "); + i += snprintf(&buf[i], REMAIN(i), " FD_ATR"); if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { - buf += sprintf(buf, "FD_SB "); - buf += sprintf(buf, "NTUPLE "); + i += snprintf(&buf[i], REMAIN(i), " FD_SB"); + i += snprintf(&buf[i], REMAIN(i), " NTUPLE"); } if (pf->flags & I40E_FLAG_DCB_CAPABLE) - buf += sprintf(buf, "DCB "); + i += snprintf(&buf[i], REMAIN(i), " DCB"); #if IS_ENABLED(CONFIG_VXLAN) - buf += sprintf(buf, "VxLAN "); + i += snprintf(&buf[i], REMAIN(i), " VxLAN"); #endif if (pf->flags & I40E_FLAG_PTP) - buf += sprintf(buf, "PTP "); + i += snprintf(&buf[i], REMAIN(i), " PTP"); #ifdef I40E_FCOE if (pf->flags & I40E_FLAG_FCOE_ENABLED) - buf += sprintf(buf, "FCOE "); + i += snprintf(&buf[i], REMAIN(i), " FCOE"); #endif if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) - buf += sprintf(buf, "VEB "); + i += snprintf(&buf[i], REMAIN(i), " VEB"); else - buf += sprintf(buf, "VEPA "); + i += snprintf(&buf[i], REMAIN(i), " VEPA"); - BUG_ON(buf > (string + INFO_STRING_LEN)); - dev_info(&pf->pdev->dev, "%s\n", string); - kfree(string); + dev_info(&pf->pdev->dev, "%s\n", buf); + kfree(buf); + WARN_ON(i > INFO_STRING_LEN); } /** @@ -10183,6 +10340,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) u16 link_status; int err; u32 len; + u32 val; u32 i; u8 set_fc_aq_fail; @@ -10296,6 +10454,16 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->hw.fc.requested_mode = I40E_FC_NONE; err = i40e_init_adminq(hw); + if (err) { + if (err == I40E_ERR_FIRMWARE_API_VERSION) + dev_info(&pdev->dev, + "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n"); + else + dev_info(&pdev->dev, + "The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n"); + + goto err_pf_reset; + } /* provide nvm, fw, api versions */ dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s\n", @@ -10303,12 +10471,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->aq.api_maj_ver, hw->aq.api_min_ver, i40e_nvm_version_str(hw)); - if (err) { - dev_info(&pdev->dev, - "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n"); - goto err_pf_reset; - } - if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) dev_info(&pdev->dev, @@ -10487,6 +10649,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i40e_stat_str(&pf->hw, err), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + /* Reconfigure hardware for allowing smaller MSS in the case + * of TSO, so that we avoid the MDD being fired and causing + * a reset in the case of small MSS+TSO. + */ + val = rd32(hw, I40E_REG_MSS); + if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) { + val &= ~I40E_REG_MSS_MIN_MASK; + val |= I40E_64BYTE_MSS; + wr32(hw, I40E_REG_MSS, val); + } + if (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) || (pf->hw.aq.fw_maj_ver < 4)) { msleep(75); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 635b3ac17877..b0ae3e695783 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -235,6 +235,9 @@ static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, "Filter deleted for PCTYPE %d loc = %d\n", fd_data->pctype, fd_data->fd_id); } + if (err) + kfree(raw_packet); + return err ? -EOPNOTSUPP : 0; } @@ -312,6 +315,9 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, fd_data->pctype, fd_data->fd_id); } + if (err) + kfree(raw_packet); + return err ? -EOPNOTSUPP : 0; } @@ -322,7 +328,7 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, * @fd_data: the flow director data required for the FDir descriptor * @add: true adds a filter, false removes it * - * Always returns -EOPNOTSUPP + * Returns 0 if the filters were successfully added or removed **/ static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi, struct i40e_fdir_filter *fd_data, @@ -387,6 +393,9 @@ static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, } } + if (err) + kfree(raw_packet); + return err ? -EOPNOTSUPP : 0; } @@ -506,9 +515,6 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, pf->auto_disable_flags |= I40E_FLAG_FD_SB_ENABLED; } - } else { - dev_info(&pdev->dev, - "FD filter programming failed due to incorrect filter parameters\n"); } } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { if (I40E_DEBUG_FD & pf->hw.debug_mask) @@ -526,11 +532,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - else - dev_kfree_skb_any(tx_buffer->skb); - + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -542,6 +544,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } + + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); @@ -1632,7 +1638,6 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) continue; } #endif - skb_mark_napi_id(skb, &rx_ring->q_vector->napi); i40e_receive_skb(rx_ring, skb, vlan_tag); rx_desc->wb.qword1.status_error_len = 0; @@ -1864,7 +1869,6 @@ enable_int: q_vector->itr_countdown--; else q_vector->itr_countdown = ITR_COUNTDOWN_START; - } /** @@ -1892,12 +1896,14 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) return 0; } + /* Clear hung_detected bit */ + clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected); /* Since the actual Tx work is minimal, we can give the Tx a larger * budget and be more aggressive about cleaning up the Tx descriptors. */ i40e_for_each_ring(ring, q_vector->tx) { clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); - arm_wb |= ring->arm_wb; + arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1926,8 +1932,10 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) /* If work not completed, return budget and polling will return */ if (!clean_complete) { tx_only: - if (arm_wb) + if (arm_wb) { + q_vector->tx.ring[0].tx_stats.tx_force_wb++; i40e_force_wb(vsi, q_vector); + } return budget; } @@ -2187,14 +2195,12 @@ out: * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @hdr_len: ptr to the size of the packet header - * @cd_type_cmd_tso_mss: ptr to u64 object - * @cd_tunneling: ptr to context descriptor bits + * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no TSO can happen, 1 if tso is going, or error **/ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, - u8 *hdr_len, u64 *cd_type_cmd_tso_mss, - u32 *cd_tunneling) + u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { u32 cd_cmd, cd_tso_len, cd_mss; struct ipv6hdr *ipv6h; @@ -2247,7 +2253,7 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @tx_flags: the collected send information - * @cd_type_cmd_tso_mss: ptr to u64 object + * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen **/ @@ -2807,6 +2813,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, int tsyn; int tso; + /* prefetch the data, we'll need it later */ + prefetch(skb->data); + if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) return NETDEV_TX_BUSY; @@ -2826,8 +2835,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(tx_ring, skb, &hdr_len, - &cd_type_cmd_tso_mss, &cd_tunneling); + tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss); if (tso < 0) goto out_drop; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 6779fb771d6a..dccc1eb576f2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -202,6 +202,7 @@ struct i40e_tx_queue_stats { u64 tx_busy; u64 tx_done_old; u64 tx_linearize; + u64 tx_force_wb; }; struct i40e_rx_queue_stats { diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 44462b40f2d7..b3bd81c3e1ce 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -290,8 +290,8 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, next_q = find_first_bit(&linklistmap, (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)); - vsi_queue_id = next_q/I40E_VIRTCHNL_SUPPORTED_QTYPES; - qtype = next_q%I40E_VIRTCHNL_SUPPORTED_QTYPES; + vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; + qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id); @@ -565,7 +565,7 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) } /* program mac filter */ - ret = i40e_sync_vsi_filters(vsi, false); + ret = i40e_sync_vsi_filters(vsi); if (ret) dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); @@ -1094,8 +1094,8 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, /* single place to detect unsuccessful return values */ if (v_retval) { vf->num_invalid_msgs++; - dev_err(&pf->pdev->dev, "Failed opcode %d Error: %d\n", - v_opcode, v_retval); + dev_err(&pf->pdev->dev, "VF %d failed opcode %d, error: %d\n", + vf->vf_id, v_opcode, v_retval); if (vf->num_invalid_msgs > I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED) { dev_err(&pf->pdev->dev, @@ -1623,7 +1623,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (!f) { dev_err(&pf->pdev->dev, - "Unable to add VF MAC filter\n"); + "Unable to add MAC filter %pM for VF %d\n", + al->list[i].addr, vf->vf_id); ret = I40E_ERR_PARAM; spin_unlock_bh(&vsi->mac_filter_list_lock); goto error_param; @@ -1632,8 +1633,10 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) spin_unlock_bh(&vsi->mac_filter_list_lock); /* program the updated filter list */ - if (i40e_sync_vsi_filters(vsi, false)) - dev_err(&pf->pdev->dev, "Unable to program VF MAC filters\n"); + ret = i40e_sync_vsi_filters(vsi); + if (ret) + dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n", + vf->vf_id, ret); error_param: /* send the response to the VF */ @@ -1669,8 +1672,8 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) for (i = 0; i < al->num_elements; i++) { if (is_broadcast_ether_addr(al->list[i].addr) || is_zero_ether_addr(al->list[i].addr)) { - dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n", - al->list[i].addr); + dev_err(&pf->pdev->dev, "Invalid MAC addr %pM for VF %d\n", + al->list[i].addr, vf->vf_id); ret = I40E_ERR_INVALID_MAC_ADDR; goto error_param; } @@ -1685,8 +1688,10 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) spin_unlock_bh(&vsi->mac_filter_list_lock); /* program the updated filter list */ - if (i40e_sync_vsi_filters(vsi, false)) - dev_err(&pf->pdev->dev, "Unable to program VF MAC filters\n"); + ret = i40e_sync_vsi_filters(vsi); + if (ret) + dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n", + vf->vf_id, ret); error_param: /* send the response to the VF */ @@ -1740,8 +1745,8 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (ret) dev_err(&pf->pdev->dev, - "Unable to add VF vlan filter %d, error %d\n", - vfl->vlan_id[i], ret); + "Unable to add VLAN filter %d for VF %d, error %d\n", + vfl->vlan_id[i], vf->vf_id, ret); } error_param: @@ -1792,8 +1797,8 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (ret) dev_err(&pf->pdev->dev, - "Unable to delete VF vlan filter %d, error %d\n", - vfl->vlan_id[i], ret); + "Unable to delete VLAN filter %d for VF %d, error %d\n", + vfl->vlan_id[i], vf->vf_id, ret); } error_param: @@ -2099,7 +2104,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id); /* program mac filter */ - if (i40e_sync_vsi_filters(vsi, false)) { + if (i40e_sync_vsi_filters(vsi)) { dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); ret = -EIO; goto error_param; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index fcb9ef34cc7a..1c76389bd888 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -2311,4 +2311,4 @@ struct i40e_aqc_debug_modify_internals { I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_internals); -#endif +#endif /* _I40E_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 47e9a90d6b10..4ca40651a228 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -51,11 +51,7 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - else - dev_kfree_skb_any(tx_buffer->skb); - + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -67,6 +63,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } + + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); @@ -127,17 +127,24 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) } /** - * i40e_get_head - Retrieve head from head writeback - * @tx_ring: tx ring to fetch head of + * i40evf_get_tx_pending - how many Tx descriptors not processed + * @tx_ring: the ring of descriptors * - * Returns value of Tx ring head based on value stored - * in head write-back location + * Since there is no access to the ring head register + * in XL710, we need to use our local copies **/ -static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +u32 i40evf_get_tx_pending(struct i40e_ring *ring) { - void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + u32 head, tail; - return le32_to_cpu(*(volatile __le32 *)head); + head = i40e_get_head(ring); + tail = readl(ring->tail); + + if (head != tail) + return (head < tail) ? + tail - head : (tail + ring->count - head); + + return 0; } #define WB_STRIDE 0x3 @@ -245,16 +252,6 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; - /* check to see if there are any non-cache aligned descriptors - * waiting to be written back, and kick the hardware to force - * them to be written back in case of napi polling - */ - if (budget && - !((i & WB_STRIDE) == WB_STRIDE) && - !test_bit(__I40E_DOWN, &tx_ring->vsi->state) && - (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) - tx_ring->arm_wb = true; - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), total_packets, total_bytes); @@ -414,7 +411,7 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) return false; } -/* +/** * i40evf_setup_tx_descriptors - Allocate the Tx descriptors * @tx_ring: the tx ring to set up * @@ -1090,7 +1087,6 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget) continue; } #endif - skb_mark_napi_id(skb, &rx_ring->q_vector->napi); i40e_receive_skb(rx_ring, skb, vlan_tag); rx_desc->wb.qword1.status_error_len = 0; @@ -1263,10 +1259,12 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } + if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } + if (rx || tx) { /* get the higher of the two ITR adjustments and * use the same value for both ITR registers @@ -1302,7 +1300,6 @@ enable_int: q_vector->itr_countdown--; else q_vector->itr_countdown = ITR_COUNTDOWN_START; - } /** @@ -1335,7 +1332,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) */ i40e_for_each_ring(ring, q_vector->tx) { clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); - arm_wb |= ring->arm_wb; + arm_wb = arm_wb || ring->arm_wb; ring->arm_wb = false; } @@ -1364,8 +1361,10 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) /* If work not completed, return budget and polling will return */ if (!clean_complete) { tx_only: - if (arm_wb) + if (arm_wb) { + q_vector->tx.ring[0].tx_stats.tx_force_wb++; i40evf_force_wb(vsi, q_vector); + } return budget; } @@ -1437,13 +1436,12 @@ out: * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @hdr_len: ptr to the size of the packet header - * @cd_tunneling: ptr to context descriptor bits + * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no TSO can happen, 1 if tso is going, or error **/ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, - u8 *hdr_len, u64 *cd_type_cmd_tso_mss, - u32 *cd_tunneling) + u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { u32 cd_cmd, cd_tso_len, cd_mss; struct ipv6hdr *ipv6h; @@ -1555,7 +1553,6 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, *tx_flags |= I40E_TX_FLAGS_IPV6; } - if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) && (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) && (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) { @@ -1654,7 +1651,7 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); } - /** +/** * i40e_chk_linearize - Check if there are more than 8 fragments per packet * @skb: send buffer * @tx_flags: collected send information @@ -1770,6 +1767,9 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u32 td_tag = 0; dma_addr_t dma; u16 gso_segs; + u16 desc_count = 0; + bool tail_bump = true; + bool do_rs = false; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; @@ -1810,6 +1810,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -1829,6 +1831,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; + desc_count++; + if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; @@ -1843,35 +1847,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_bi = &tx_ring->tx_bi[i]; } - /* Place RS bit on last descriptor of any packet that spans across the - * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. - */ -#define WB_STRIDE 0x3 - if (((i & WB_STRIDE) != WB_STRIDE) && - (first <= &tx_ring->tx_bi[i]) && - (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << - I40E_TXD_QW1_CMD_SHIFT); - } else { - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TXD_CMD << - I40E_TXD_QW1_CMD_SHIFT); - } - - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). - */ - wmb(); - /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; @@ -1881,15 +1856,72 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; + netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index), + first->bytecount); i40evf_maybe_stop_tx(tx_ring, DESC_NEEDED); + + /* Algorithm to optimize tail and RS bit setting: + * if xmit_more is supported + * if xmit_more is true + * do not update tail and do not mark RS bit. + * if xmit_more is false and last xmit_more was false + * if every packet spanned less than 4 desc + * then set RS bit on 4th packet and update tail + * on every packet + * else + * update tail and set RS bit on every packet. + * if xmit_more is false and last_xmit_more was true + * update tail and set RS bit. + * + * Optimization: wmb to be issued only in case of tail update. + * Also optimize the Descriptor WB path for RS bit with the same + * algorithm. + * + * Note: If there are less than 4 packets + * pending and interrupts were disabled the service task will + * trigger a force WB. + */ + if (skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index))) { + tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + tail_bump = false; + } else if (!skb->xmit_more && + !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index)) && + (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && + (tx_ring->packet_stride < WB_STRIDE) && + (desc_count < WB_STRIDE)) { + tx_ring->packet_stride++; + } else { + tx_ring->packet_stride = 0; + tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; + do_rs = true; + } + if (do_rs) + tx_ring->packet_stride = 0; + + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag) | + cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD : + I40E_TX_DESC_CMD_EOP) << + I40E_TXD_QW1_CMD_SHIFT); + /* notify HW of packet */ - if (!skb->xmit_more || - netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) - writel(i, tx_ring->tail); - else + if (!tail_bump) prefetchw(tx_desc + 1); + if (tail_bump) { + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(i, tx_ring->tail); + } + return; dma_error: @@ -1961,6 +1993,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, u8 hdr_len = 0; int tso; + /* prefetch the data, we'll need it later */ + prefetch(skb->data); + if (0 == i40evf_xmit_descriptor_count(skb, tx_ring)) return NETDEV_TX_BUSY; @@ -1980,8 +2015,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(tx_ring, skb, &hdr_len, - &cd_type_cmd_tso_mss, &cd_tunneling); + tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss); if (tso < 0) goto out_drop; @@ -2029,7 +2063,7 @@ out_drop: netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_ring *tx_ring = adapter->tx_rings[skb->queue_mapping]; + struct i40e_ring *tx_ring = &adapter->tx_rings[skb->queue_mapping]; /* hardware can't handle really short frames, hardware padding works * beyond this point diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index ebc1bf77f036..e29bb3e86cfd 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -201,6 +201,7 @@ struct i40e_tx_queue_stats { u64 tx_busy; u64 tx_done_old; u64 tx_linearize; + u64 tx_force_wb; }; struct i40e_rx_queue_stats { @@ -267,6 +268,8 @@ struct i40e_ring { bool ring_active; /* is ring online or not */ bool arm_wb; /* do something to arm write back */ + u8 packet_stride; +#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2) u16 flags; #define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) @@ -321,4 +324,19 @@ int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring); void i40evf_free_tx_resources(struct i40e_ring *tx_ring); void i40evf_free_rx_resources(struct i40e_ring *rx_ring); int i40evf_napi_poll(struct napi_struct *napi, int budget); +u32 i40evf_get_tx_pending(struct i40e_ring *ring); + +/** + * i40e_get_head - Retrieve head from head writeback + * @tx_ring: Tx ring to fetch head of + * + * Returns value of Tx ring head based on value stored + * in head write-back location + **/ +static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +{ + void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + + return le32_to_cpu(*(volatile __le32 *)head); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 22fc3d49c4b9..be1b72b93888 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -67,6 +67,8 @@ struct i40e_vsi { u16 rx_itr_setting; u16 tx_itr_setting; u16 qs_handle; + u8 *rss_hkey_user; /* User configured hash keys */ + u8 *rss_lut_user; /* User configured lookup table entries */ }; /* How many Rx Buffers do we bundle into one write to the hardware ? */ @@ -95,10 +97,10 @@ struct i40e_vsi { #define I40E_TX_DESC(R, i) (&(((struct i40e_tx_desc *)((R)->desc))[i])) #define I40E_TX_CTXTDESC(R, i) \ (&(((struct i40e_tx_context_desc *)((R)->desc))[i])) -#define MAX_RX_QUEUES 8 -#define MAX_TX_QUEUES MAX_RX_QUEUES +#define MAX_QUEUES 16 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4) +#define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4) /* MAX_MSIX_Q_VECTORS of these are allocated, * but we only use one per queue-specific vector. @@ -142,9 +144,6 @@ struct i40e_q_vector { #define OTHER_VECTOR 1 #define NONQ_VECS (OTHER_VECTOR) -#define MAX_MSIX_Q_VECTORS 4 -#define MAX_MSIX_COUNT 5 - #define MIN_MSIX_Q_VECTORS 1 #define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NONQ_VECS) @@ -190,19 +189,19 @@ struct i40evf_adapter { struct work_struct reset_task; struct work_struct adminq_task; struct delayed_work init_task; - struct i40e_q_vector *q_vector[MAX_MSIX_Q_VECTORS]; + struct i40e_q_vector *q_vectors; struct list_head vlan_filter_list; char misc_vector_name[IFNAMSIZ + 9]; int num_active_queues; /* TX */ - struct i40e_ring *tx_rings[I40E_MAX_VSI_QP]; + struct i40e_ring *tx_rings; u32 tx_timeout_count; struct list_head mac_filter_list; u32 tx_desc_count; /* RX */ - struct i40e_ring *rx_rings[I40E_MAX_VSI_QP]; + struct i40e_ring *rx_rings; u64 hw_csum_rx_error; u32 rx_desc_count; int num_msix_vectors; @@ -313,4 +312,8 @@ void i40evf_request_reset(struct i40evf_adapter *adapter); void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, enum i40e_virtchnl_ops v_opcode, i40e_status v_retval, u8 *msg, u16 msglen); +int i40evf_config_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, + u16 lut_size); +int i40evf_get_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, + u16 lut_size); #endif /* _I40EVF_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index 4790437a50ac..a4c9feb589e7 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -121,12 +121,12 @@ static void i40evf_get_ethtool_stats(struct net_device *netdev, data[i] = *(u64 *)p; } for (j = 0; j < adapter->num_active_queues; j++) { - data[i++] = adapter->tx_rings[j]->stats.packets; - data[i++] = adapter->tx_rings[j]->stats.bytes; + data[i++] = adapter->tx_rings[j].stats.packets; + data[i++] = adapter->tx_rings[j].stats.bytes; } for (j = 0; j < adapter->num_active_queues; j++) { - data[i++] = adapter->rx_rings[j]->stats.packets; - data[i++] = adapter->rx_rings[j]->stats.bytes; + data[i++] = adapter->rx_rings[j].stats.packets; + data[i++] = adapter->rx_rings[j].stats.bytes; } } @@ -351,7 +351,7 @@ static int i40evf_set_coalesce(struct net_device *netdev, vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC; for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) { - q_vector = adapter->q_vector[i]; + q_vector = &adapter->q_vectors[i]; q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); wr32(hw, I40E_VFINT_ITRN1(0, i), q_vector->rx.itr); q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); @@ -634,25 +634,34 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_hw *hw = &adapter->hw; - u32 hlut_val; - int i, j; + struct i40e_vsi *vsi = &adapter->vsi; + u8 *seed = NULL, *lut; + int ret; + u16 i; if (hfunc) *hfunc = ETH_RSS_HASH_TOP; if (!indir) return 0; - if (indir) { - for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { - hlut_val = rd32(hw, I40E_VFQF_HLUT(i)); - indir[j++] = hlut_val & 0xff; - indir[j++] = (hlut_val >> 8) & 0xff; - indir[j++] = (hlut_val >> 16) & 0xff; - indir[j++] = (hlut_val >> 24) & 0xff; - } - } - return 0; + seed = key; + + lut = kzalloc(I40EVF_HLUT_ARRAY_SIZE, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + ret = i40evf_get_rss(vsi, seed, lut, I40EVF_HLUT_ARRAY_SIZE); + if (ret) + goto out; + + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < I40EVF_HLUT_ARRAY_SIZE; i++) + indir[i] = (u32)lut[i]; + +out: + kfree(lut); + + return ret; } /** @@ -668,9 +677,9 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, const u8 hfunc) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_hw *hw = &adapter->hw; - u32 hlut_val; - int i, j; + struct i40e_vsi *vsi = &adapter->vsi; + u8 *seed = NULL; + u16 i; /* We do not allow change in unsupported parameters */ if (key || @@ -679,15 +688,29 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir, if (!indir) return 0; - for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { - hlut_val = indir[j++]; - hlut_val |= indir[j++] << 8; - hlut_val |= indir[j++] << 16; - hlut_val |= indir[j++] << 24; - wr32(hw, I40E_VFQF_HLUT(i), hlut_val); + if (key) { + if (!vsi->rss_hkey_user) { + vsi->rss_hkey_user = kzalloc(I40EVF_HKEY_ARRAY_SIZE, + GFP_KERNEL); + if (!vsi->rss_hkey_user) + return -ENOMEM; + } + memcpy(vsi->rss_hkey_user, key, I40EVF_HKEY_ARRAY_SIZE); + seed = vsi->rss_hkey_user; + } + if (!vsi->rss_lut_user) { + vsi->rss_lut_user = kzalloc(I40EVF_HLUT_ARRAY_SIZE, + GFP_KERNEL); + if (!vsi->rss_lut_user) + return -ENOMEM; } - return 0; + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < I40EVF_HLUT_ARRAY_SIZE; i++) + vsi->rss_lut_user[i] = (u8)(indir[i]); + + return i40evf_config_rss(vsi, seed, vsi->rss_lut_user, + I40EVF_HLUT_ARRAY_SIZE); } static const struct ethtool_ops i40evf_ethtool_ops = { diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index d962164dfb0f..b4c632f417f6 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -34,7 +34,7 @@ char i40evf_driver_name[] = "i40evf"; static const char i40evf_driver_string[] = "Intel(R) XL710/X710 Virtual Function Network Driver"; -#define DRV_VERSION "1.3.33" +#define DRV_VERSION "1.4.3" const char i40evf_driver_version[] = DRV_VERSION; static const char i40evf_copyright[] = "Copyright (c) 2013 - 2015 Intel Corporation."; @@ -259,7 +259,7 @@ static void i40evf_fire_sw_int(struct i40evf_adapter *adapter, u32 mask) { struct i40e_hw *hw = &adapter->hw; int i; - uint32_t dyn_ctl; + u32 dyn_ctl; if (mask & 1) { dyn_ctl = rd32(hw, I40E_VFINT_DYN_CTL01); @@ -307,10 +307,9 @@ static irqreturn_t i40evf_msix_aq(int irq, void *data) struct i40e_hw *hw = &adapter->hw; u32 val; - /* handle non-queue interrupts */ - rd32(hw, I40E_VFINT_ICR01); - rd32(hw, I40E_VFINT_ICR0_ENA1); - + /* handle non-queue interrupts, these reads clear the registers */ + val = rd32(hw, I40E_VFINT_ICR01); + val = rd32(hw, I40E_VFINT_ICR0_ENA1); val = rd32(hw, I40E_VFINT_DYN_CTL01) | I40E_VFINT_DYN_CTL01_CLEARPBA_MASK; @@ -348,8 +347,8 @@ static irqreturn_t i40evf_msix_clean_rings(int irq, void *data) static void i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) { - struct i40e_q_vector *q_vector = adapter->q_vector[v_idx]; - struct i40e_ring *rx_ring = adapter->rx_rings[r_idx]; + struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; + struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx]; rx_ring->q_vector = q_vector; rx_ring->next = q_vector->rx.ring; @@ -369,8 +368,8 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) static void i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) { - struct i40e_q_vector *q_vector = adapter->q_vector[v_idx]; - struct i40e_ring *tx_ring = adapter->tx_rings[t_idx]; + struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; + struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx]; tx_ring->q_vector = q_vector; tx_ring->next = q_vector->tx.ring; @@ -465,7 +464,7 @@ static void i40evf_netpoll(struct net_device *netdev) return; for (i = 0; i < q_vectors; i++) - i40evf_msix_clean_rings(0, adapter->q_vector[i]); + i40evf_msix_clean_rings(0, &adapter->q_vectors[i]); } #endif @@ -487,7 +486,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) q_vectors = adapter->num_msix_vectors - NONQ_VECS; for (vector = 0; vector < q_vectors; vector++) { - struct i40e_q_vector *q_vector = adapter->q_vector[vector]; + struct i40e_q_vector *q_vector = &adapter->q_vectors[vector]; if (q_vector->tx.ring && q_vector->rx.ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, @@ -532,7 +531,7 @@ free_queue_irqs: adapter->msix_entries[vector + NONQ_VECS].vector, NULL); free_irq(adapter->msix_entries[vector + NONQ_VECS].vector, - adapter->q_vector[vector]); + &adapter->q_vectors[vector]); } return err; } @@ -582,7 +581,7 @@ static void i40evf_free_traffic_irqs(struct i40evf_adapter *adapter) irq_set_affinity_hint(adapter->msix_entries[i+1].vector, NULL); free_irq(adapter->msix_entries[i+1].vector, - adapter->q_vector[i]); + &adapter->q_vectors[i]); } } @@ -611,7 +610,7 @@ static void i40evf_configure_tx(struct i40evf_adapter *adapter) int i; for (i = 0; i < adapter->num_active_queues; i++) - adapter->tx_rings[i]->tail = hw->hw_addr + I40E_QTX_TAIL1(i); + adapter->tx_rings[i].tail = hw->hw_addr + I40E_QTX_TAIL1(i); } /** @@ -656,8 +655,8 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter) } for (i = 0; i < adapter->num_active_queues; i++) { - adapter->rx_rings[i]->tail = hw->hw_addr + I40E_QRX_TAIL1(i); - adapter->rx_rings[i]->rx_buf_len = rx_buf_len; + adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i); + adapter->rx_rings[i].rx_buf_len = rx_buf_len; } } @@ -954,7 +953,7 @@ static void i40evf_napi_enable_all(struct i40evf_adapter *adapter) for (q_idx = 0; q_idx < q_vectors; q_idx++) { struct napi_struct *napi; - q_vector = adapter->q_vector[q_idx]; + q_vector = &adapter->q_vectors[q_idx]; napi = &q_vector->napi; napi_enable(napi); } @@ -971,7 +970,7 @@ static void i40evf_napi_disable_all(struct i40evf_adapter *adapter) int q_vectors = adapter->num_msix_vectors - NONQ_VECS; for (q_idx = 0; q_idx < q_vectors; q_idx++) { - q_vector = adapter->q_vector[q_idx]; + q_vector = &adapter->q_vectors[q_idx]; napi_disable(&q_vector->napi); } } @@ -992,7 +991,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter) adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_QUEUES; for (i = 0; i < adapter->num_active_queues; i++) { - struct i40e_ring *ring = adapter->rx_rings[i]; + struct i40e_ring *ring = &adapter->rx_rings[i]; i40evf_alloc_rx_buffers_1buf(ring, ring->count); ring->next_to_use = ring->count - 1; @@ -1112,16 +1111,10 @@ i40evf_acquire_msix_vectors(struct i40evf_adapter *adapter, int vectors) **/ static void i40evf_free_queues(struct i40evf_adapter *adapter) { - int i; - if (!adapter->vsi_res) return; - for (i = 0; i < adapter->num_active_queues; i++) { - if (adapter->tx_rings[i]) - kfree_rcu(adapter->tx_rings[i], rcu); - adapter->tx_rings[i] = NULL; - adapter->rx_rings[i] = NULL; - } + kfree(adapter->tx_rings); + kfree(adapter->rx_rings); } /** @@ -1136,13 +1129,20 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) { int i; + adapter->tx_rings = kcalloc(adapter->num_active_queues, + sizeof(struct i40e_ring), GFP_KERNEL); + if (!adapter->tx_rings) + goto err_out; + adapter->rx_rings = kcalloc(adapter->num_active_queues, + sizeof(struct i40e_ring), GFP_KERNEL); + if (!adapter->rx_rings) + goto err_out; + for (i = 0; i < adapter->num_active_queues; i++) { struct i40e_ring *tx_ring; struct i40e_ring *rx_ring; - tx_ring = kzalloc(sizeof(*tx_ring) * 2, GFP_KERNEL); - if (!tx_ring) - goto err_out; + tx_ring = &adapter->tx_rings[i]; tx_ring->queue_index = i; tx_ring->netdev = adapter->netdev; @@ -1150,14 +1150,12 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->count = adapter->tx_desc_count; if (adapter->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; - adapter->tx_rings[i] = tx_ring; - rx_ring = &tx_ring[1]; + rx_ring = &adapter->rx_rings[i]; rx_ring->queue_index = i; rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; - adapter->rx_rings[i] = rx_ring; } return 0; @@ -1207,115 +1205,273 @@ static int i40evf_set_interrupt_capability(struct i40evf_adapter *adapter) err = i40evf_acquire_msix_vectors(adapter, v_budget); out: - adapter->netdev->real_num_tx_queues = pairs; + netif_set_real_num_rx_queues(adapter->netdev, pairs); + netif_set_real_num_tx_queues(adapter->netdev, pairs); return err; } /** - * i40e_configure_rss_aq - Prepare for RSS using AQ commands + * i40e_config_rss_aq - Prepare for RSS using AQ commands * @vsi: vsi structure * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Return 0 on success, negative on failure **/ -static void i40evf_configure_rss_aq(struct i40e_vsi *vsi, const u8 *seed) +static int i40evf_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) { - struct i40e_aqc_get_set_rss_key_data rss_key; struct i40evf_adapter *adapter = vsi->back; struct i40e_hw *hw = &adapter->hw; - int ret = 0, i; - u8 *rss_lut; + int ret = 0; if (!vsi->id) - return; + return -EINVAL; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ dev_err(&adapter->pdev->dev, "Cannot confiure RSS, command %d pending\n", adapter->current_op); - return; + return -EBUSY; } - memset(&rss_key, 0, sizeof(rss_key)); - memcpy(&rss_key, seed, sizeof(rss_key)); + if (seed) { + struct i40e_aqc_get_set_rss_key_data *rss_key = + (struct i40e_aqc_get_set_rss_key_data *)seed; + ret = i40evf_aq_set_rss_key(hw, vsi->id, rss_key); + if (ret) { + dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } - rss_lut = kzalloc(((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4), GFP_KERNEL); - if (!rss_lut) - return; + if (lut) { + ret = i40evf_aq_set_rss_lut(hw, vsi->id, false, lut, lut_size); + if (ret) { + dev_err(&adapter->pdev->dev, + "Cannot set RSS lut, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } - /* Populate the LUT with max no. PF queues in round robin fashion */ - for (i = 0; i <= (I40E_VFQF_HLUT_MAX_INDEX * 4); i++) - rss_lut[i] = i % adapter->num_active_queues; + return ret; +} - ret = i40evf_aq_set_rss_key(hw, vsi->id, &rss_key); - if (ret) { - dev_err(&adapter->pdev->dev, - "Cannot set RSS key, err %s aq_err %s\n", - i40evf_stat_str(hw, ret), - i40evf_aq_str(hw, hw->aq.asq_last_status)); - return; +/** + * i40evf_config_rss_reg - Configure RSS keys and lut by writing registers + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + **/ +static int i40evf_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed, + const u8 *lut, u16 lut_size) +{ + struct i40evf_adapter *adapter = vsi->back; + struct i40e_hw *hw = &adapter->hw; + u16 i; + + if (seed) { + u32 *seed_dw = (u32 *)seed; + + for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++) + wr32(hw, I40E_VFQF_HKEY(i), seed_dw[i]); } - ret = i40evf_aq_set_rss_lut(hw, vsi->id, false, rss_lut, - (I40E_VFQF_HLUT_MAX_INDEX + 1) * 4); - if (ret) - dev_err(&adapter->pdev->dev, - "Cannot set RSS lut, err %s aq_err %s\n", - i40evf_stat_str(hw, ret), - i40evf_aq_str(hw, hw->aq.asq_last_status)); + if (lut) { + u32 *lut_dw = (u32 *)lut; + + if (lut_size != I40EVF_HLUT_ARRAY_SIZE) + return -EINVAL; + + for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) + wr32(hw, I40E_VFQF_HLUT(i), lut_dw[i]); + } + i40e_flush(hw); + + return 0; } /** - * i40e_configure_rss_reg - Prepare for RSS if used - * @adapter: board private structure - * @seed: RSS hash seed + * * i40evf_get_rss_aq - Get RSS keys and lut by using AQ commands + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Return 0 on success, negative on failure **/ -static void i40evf_configure_rss_reg(struct i40evf_adapter *adapter, - const u8 *seed) +static int i40evf_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) { + struct i40evf_adapter *adapter = vsi->back; struct i40e_hw *hw = &adapter->hw; - u32 *seed_dw = (u32 *)seed; - u32 cqueue = 0; - u32 lut = 0; - int i, j; + int ret = 0; - /* Fill out hash function seed */ - for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++) - wr32(hw, I40E_VFQF_HKEY(i), seed_dw[i]); - - /* Populate the LUT with max no. PF queues in round robin fashion */ - for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) { - lut = 0; - for (j = 0; j < 4; j++) { - if (cqueue == adapter->num_active_queues) - cqueue = 0; - lut |= ((cqueue) << (8 * j)); - cqueue++; + if (seed) { + ret = i40evf_aq_get_rss_key(hw, vsi->id, + (struct i40e_aqc_get_set_rss_key_data *)seed); + if (ret) { + dev_err(&adapter->pdev->dev, + "Cannot get RSS key, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; } - wr32(hw, I40E_VFQF_HLUT(i), lut); } - i40e_flush(hw); + + if (lut) { + ret = i40evf_aq_get_rss_lut(hw, vsi->id, seed, lut, lut_size); + if (ret) { + dev_err(&adapter->pdev->dev, + "Cannot get RSS lut, err %s aq_err %s\n", + i40evf_stat_str(hw, ret), + i40evf_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } + + return ret; } /** - * i40evf_configure_rss - Prepare for RSS + * * i40evf_get_rss_reg - Get RSS keys and lut by reading registers + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + **/ +static int i40evf_get_rss_reg(struct i40e_vsi *vsi, const u8 *seed, + const u8 *lut, u16 lut_size) +{ + struct i40evf_adapter *adapter = vsi->back; + struct i40e_hw *hw = &adapter->hw; + u16 i; + + if (seed) { + u32 *seed_dw = (u32 *)seed; + + for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++) + seed_dw[i] = rd32(hw, I40E_VFQF_HKEY(i)); + } + + if (lut) { + u32 *lut_dw = (u32 *)lut; + + if (lut_size != I40EVF_HLUT_ARRAY_SIZE) + return -EINVAL; + + for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) + lut_dw[i] = rd32(hw, I40E_VFQF_HLUT(i)); + } + + return 0; +} + +/** + * i40evf_config_rss - Configure RSS keys and lut + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + **/ +int i40evf_config_rss(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) +{ + struct i40evf_adapter *adapter = vsi->back; + + if (RSS_AQ(adapter)) + return i40evf_config_rss_aq(vsi, seed, lut, lut_size); + else + return i40evf_config_rss_reg(vsi, seed, lut, lut_size); +} + +/** + * i40evf_get_rss - Get RSS keys and lut + * @vsi: Pointer to vsi structure + * @seed: RSS hash seed + * @lut: Lookup table + * @lut_size: Lookup table size + * + * Returns 0 on success, negative on failure + **/ +int i40evf_get_rss(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, u16 lut_size) +{ + struct i40evf_adapter *adapter = vsi->back; + + if (RSS_AQ(adapter)) + return i40evf_get_rss_aq(vsi, seed, lut, lut_size); + else + return i40evf_get_rss_reg(vsi, seed, lut, lut_size); +} + +/** + * i40evf_fill_rss_lut - Fill the lut with default values + * @lut: Lookup table to be filled with + * @rss_table_size: Lookup table size + * @rss_size: Range of queue number for hashing + **/ +static void i40evf_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) +{ + u16 i; + + for (i = 0; i < rss_table_size; i++) + lut[i] = i % rss_size; +} + +/** + * i40evf_init_rss - Prepare for RSS * @adapter: board private structure + * + * Return 0 on success, negative on failure **/ -static void i40evf_configure_rss(struct i40evf_adapter *adapter) +static int i40evf_init_rss(struct i40evf_adapter *adapter) { + struct i40e_vsi *vsi = &adapter->vsi; struct i40e_hw *hw = &adapter->hw; u8 seed[I40EVF_HKEY_ARRAY_SIZE]; u64 hena; - - netdev_rss_key_fill((void *)seed, I40EVF_HKEY_ARRAY_SIZE); + u8 *lut; + int ret; /* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */ hena = I40E_DEFAULT_RSS_HENA; wr32(hw, I40E_VFQF_HENA(0), (u32)hena); wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32)); - if (RSS_AQ(adapter)) - i40evf_configure_rss_aq(&adapter->vsi, seed); + lut = kzalloc(I40EVF_HLUT_ARRAY_SIZE, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* Use user configured lut if there is one, otherwise use default */ + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, I40EVF_HLUT_ARRAY_SIZE); else - i40evf_configure_rss_reg(adapter, seed); + i40evf_fill_rss_lut(lut, I40EVF_HLUT_ARRAY_SIZE, + adapter->num_active_queues); + + /* Use user configured hash key if there is one, otherwise + * user default. + */ + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40EVF_HKEY_ARRAY_SIZE); + else + netdev_rss_key_fill((void *)seed, I40EVF_HKEY_ARRAY_SIZE); + ret = i40evf_config_rss(vsi, seed, lut, I40EVF_HLUT_ARRAY_SIZE); + kfree(lut); + + return ret; } /** @@ -1327,21 +1483,22 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter) **/ static int i40evf_alloc_q_vectors(struct i40evf_adapter *adapter) { - int q_idx, num_q_vectors; + int q_idx = 0, num_q_vectors; struct i40e_q_vector *q_vector; num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; + adapter->q_vectors = kcalloc(num_q_vectors, sizeof(*q_vector), + GFP_KERNEL); + if (!adapter->q_vectors) + goto err_out; for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL); - if (!q_vector) - goto err_out; + q_vector = &adapter->q_vectors[q_idx]; q_vector->adapter = adapter; q_vector->vsi = &adapter->vsi; q_vector->v_idx = q_idx; netif_napi_add(adapter->netdev, &q_vector->napi, i40evf_napi_poll, NAPI_POLL_WEIGHT); - adapter->q_vector[q_idx] = q_vector; } return 0; @@ -1349,11 +1506,10 @@ static int i40evf_alloc_q_vectors(struct i40evf_adapter *adapter) err_out: while (q_idx) { q_idx--; - q_vector = adapter->q_vector[q_idx]; + q_vector = &adapter->q_vectors[q_idx]; netif_napi_del(&q_vector->napi); - kfree(q_vector); - adapter->q_vector[q_idx] = NULL; } + kfree(adapter->q_vectors); return -ENOMEM; } @@ -1374,13 +1530,11 @@ static void i40evf_free_q_vectors(struct i40evf_adapter *adapter) napi_vectors = adapter->num_active_queues; for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - struct i40e_q_vector *q_vector = adapter->q_vector[q_idx]; - - adapter->q_vector[q_idx] = NULL; + struct i40e_q_vector *q_vector = &adapter->q_vectors[q_idx]; if (q_idx < napi_vectors) netif_napi_del(&q_vector->napi); - kfree(q_vector); } + kfree(adapter->q_vectors); } /** @@ -1439,6 +1593,22 @@ err_set_interrupt: } /** + * i40evf_clear_rss_config_user - Clear user configurations of RSS + * @vsi: Pointer to VSI structure + **/ +static void i40evf_clear_rss_config_user(struct i40e_vsi *vsi) +{ + if (!vsi) + return; + + kfree(vsi->rss_hkey_user); + vsi->rss_hkey_user = NULL; + + kfree(vsi->rss_lut_user); + vsi->rss_lut_user = NULL; +} + +/** * i40evf_watchdog_timer - Periodic call-back timer * @data: pointer to adapter disguised as unsigned long **/ @@ -1565,7 +1735,7 @@ static void i40evf_watchdog_task(struct work_struct *work) * PF, so we don't have to set current_op as we will * not get a response through the ARQ. */ - i40evf_configure_rss(adapter); + i40evf_init_rss(adapter); adapter->aq_required &= ~I40EVF_FLAG_AQ_CONFIGURE_RSS; goto watchdog_done; } @@ -1865,8 +2035,8 @@ void i40evf_free_all_tx_resources(struct i40evf_adapter *adapter) int i; for (i = 0; i < adapter->num_active_queues; i++) - if (adapter->tx_rings[i]->desc) - i40evf_free_tx_resources(adapter->tx_rings[i]); + if (adapter->tx_rings[i].desc) + i40evf_free_tx_resources(&adapter->tx_rings[i]); } /** @@ -1884,8 +2054,8 @@ static int i40evf_setup_all_tx_resources(struct i40evf_adapter *adapter) int i, err = 0; for (i = 0; i < adapter->num_active_queues; i++) { - adapter->tx_rings[i]->count = adapter->tx_desc_count; - err = i40evf_setup_tx_descriptors(adapter->tx_rings[i]); + adapter->tx_rings[i].count = adapter->tx_desc_count; + err = i40evf_setup_tx_descriptors(&adapter->tx_rings[i]); if (!err) continue; dev_err(&adapter->pdev->dev, @@ -1911,8 +2081,8 @@ static int i40evf_setup_all_rx_resources(struct i40evf_adapter *adapter) int i, err = 0; for (i = 0; i < adapter->num_active_queues; i++) { - adapter->rx_rings[i]->count = adapter->rx_desc_count; - err = i40evf_setup_rx_descriptors(adapter->rx_rings[i]); + adapter->rx_rings[i].count = adapter->rx_desc_count; + err = i40evf_setup_rx_descriptors(&adapter->rx_rings[i]); if (!err) continue; dev_err(&adapter->pdev->dev, @@ -1933,8 +2103,8 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter) int i; for (i = 0; i < adapter->num_active_queues; i++) - if (adapter->rx_rings[i]->desc) - i40evf_free_rx_resources(adapter->rx_rings[i]); + if (adapter->rx_rings[i].desc) + i40evf_free_rx_resources(&adapter->rx_rings[i]); } /** @@ -2263,6 +2433,14 @@ static void i40evf_init_task(struct work_struct *work) if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) { err = i40evf_send_vf_config_msg(adapter); goto err; + } else if (err == I40E_ERR_PARAM) { + /* We only get ERR_PARAM if the device is in a very bad + * state or if we've been disabled for previous bad + * behavior. Either way, we're done now. + */ + i40evf_shutdown_adminq(hw); + dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n"); + return; } if (err) { dev_err(&pdev->dev, "Unable to get VF config (%d)\n", @@ -2313,7 +2491,7 @@ static void i40evf_init_task(struct work_struct *work) I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) adapter->flags |= I40EVF_FLAG_WB_ON_ITR_CAPABLE; if (!RSS_AQ(adapter)) - i40evf_configure_rss(adapter); + i40evf_init_rss(adapter); err = i40evf_request_misc_irq(adapter); if (err) goto err_sw_init; @@ -2334,7 +2512,6 @@ static void i40evf_init_task(struct work_struct *work) if (netdev->features & NETIF_F_GRO) dev_info(&pdev->dev, "GRO is enabled\n"); - dev_info(&pdev->dev, "%s\n", i40evf_driver_string); adapter->state = __I40EVF_DOWN; set_bit(__I40E_DOWN, &adapter->vsi.state); i40evf_misc_irq_enable(adapter); @@ -2343,7 +2520,7 @@ static void i40evf_init_task(struct work_struct *work) adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS; mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); } else { - i40evf_configure_rss(adapter); + i40evf_init_rss(adapter); } return; restart: @@ -2438,8 +2615,7 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); - netdev = alloc_etherdev_mq(sizeof(struct i40evf_adapter), - MAX_TX_QUEUES); + netdev = alloc_etherdev_mq(sizeof(struct i40evf_adapter), MAX_QUEUES); if (!netdev) { err = -ENOMEM; goto err_alloc_etherdev; @@ -2626,6 +2802,9 @@ static void i40evf_remove(struct pci_dev *pdev) flush_scheduled_work(); + /* Clear user configurations for RSS */ + i40evf_clear_rss_config_user(&adapter->vsi); + if (hw->aq.asq.count) i40evf_shutdown_adminq(hw); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 32e620e1eb5c..3c9c008b168b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -242,7 +242,7 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter) adapter->current_op = I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES; len = sizeof(struct i40e_virtchnl_vsi_queue_config_info) + (sizeof(struct i40e_virtchnl_queue_pair_info) * pairs); - vqci = kzalloc(len, GFP_ATOMIC); + vqci = kzalloc(len, GFP_KERNEL); if (!vqci) return; @@ -255,19 +255,19 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter) for (i = 0; i < pairs; i++) { vqpi->txq.vsi_id = vqci->vsi_id; vqpi->txq.queue_id = i; - vqpi->txq.ring_len = adapter->tx_rings[i]->count; - vqpi->txq.dma_ring_addr = adapter->tx_rings[i]->dma; + vqpi->txq.ring_len = adapter->tx_rings[i].count; + vqpi->txq.dma_ring_addr = adapter->tx_rings[i].dma; vqpi->txq.headwb_enabled = 1; vqpi->txq.dma_headwb_addr = vqpi->txq.dma_ring_addr + (vqpi->txq.ring_len * sizeof(struct i40e_tx_desc)); vqpi->rxq.vsi_id = vqci->vsi_id; vqpi->rxq.queue_id = i; - vqpi->rxq.ring_len = adapter->rx_rings[i]->count; - vqpi->rxq.dma_ring_addr = adapter->rx_rings[i]->dma; + vqpi->rxq.ring_len = adapter->rx_rings[i].count; + vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma; vqpi->rxq.max_pkt_size = adapter->netdev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; - vqpi->rxq.databuffer_size = adapter->rx_rings[i]->rx_buf_len; + vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len; vqpi++; } @@ -353,14 +353,14 @@ void i40evf_map_queues(struct i40evf_adapter *adapter) len = sizeof(struct i40e_virtchnl_irq_map_info) + (adapter->num_msix_vectors * sizeof(struct i40e_virtchnl_vector_map)); - vimi = kzalloc(len, GFP_ATOMIC); + vimi = kzalloc(len, GFP_KERNEL); if (!vimi) return; vimi->num_vectors = adapter->num_msix_vectors; /* Queue vectors first */ for (v_idx = 0; v_idx < q_vectors; v_idx++) { - q_vector = adapter->q_vector[v_idx]; + q_vector = adapter->q_vectors + v_idx; vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS; vimi->vecmap[v_idx].txq_map = q_vector->ring_mask; @@ -391,6 +391,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) struct i40e_virtchnl_ether_addr_list *veal; int len, i = 0, count = 0; struct i40evf_mac_filter *f; + bool more = false; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -415,10 +416,12 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) count = (I40EVF_MAX_AQ_BUF_SIZE - sizeof(struct i40e_virtchnl_ether_addr_list)) / sizeof(struct i40e_virtchnl_ether_addr); - len = I40EVF_MAX_AQ_BUF_SIZE; + len = sizeof(struct i40e_virtchnl_ether_addr_list) + + (count * sizeof(struct i40e_virtchnl_ether_addr)); + more = true; } - veal = kzalloc(len, GFP_ATOMIC); + veal = kzalloc(len, GFP_KERNEL); if (!veal) return; @@ -431,7 +434,8 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) f->add = false; } } - adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER; + if (!more) + adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_MAC_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, (u8 *)veal, len); kfree(veal); @@ -450,6 +454,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) struct i40e_virtchnl_ether_addr_list *veal; struct i40evf_mac_filter *f, *ftmp; int len, i = 0, count = 0; + bool more = false; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -474,9 +479,11 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) count = (I40EVF_MAX_AQ_BUF_SIZE - sizeof(struct i40e_virtchnl_ether_addr_list)) / sizeof(struct i40e_virtchnl_ether_addr); - len = I40EVF_MAX_AQ_BUF_SIZE; + len = sizeof(struct i40e_virtchnl_ether_addr_list) + + (count * sizeof(struct i40e_virtchnl_ether_addr)); + more = true; } - veal = kzalloc(len, GFP_ATOMIC); + veal = kzalloc(len, GFP_KERNEL); if (!veal) return; @@ -490,7 +497,8 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) kfree(f); } } - adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER; + if (!more) + adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_MAC_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS, (u8 *)veal, len); kfree(veal); @@ -509,6 +517,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) struct i40e_virtchnl_vlan_filter_list *vvfl; int len, i = 0, count = 0; struct i40evf_vlan_filter *f; + bool more = false; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -534,9 +543,11 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) count = (I40EVF_MAX_AQ_BUF_SIZE - sizeof(struct i40e_virtchnl_vlan_filter_list)) / sizeof(u16); - len = I40EVF_MAX_AQ_BUF_SIZE; + len = sizeof(struct i40e_virtchnl_vlan_filter_list) + + (count * sizeof(u16)); + more = true; } - vvfl = kzalloc(len, GFP_ATOMIC); + vvfl = kzalloc(len, GFP_KERNEL); if (!vvfl) return; @@ -549,7 +560,8 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) f->add = false; } } - adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER; + if (!more) + adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_VLAN_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len); kfree(vvfl); } @@ -567,6 +579,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) struct i40e_virtchnl_vlan_filter_list *vvfl; struct i40evf_vlan_filter *f, *ftmp; int len, i = 0, count = 0; + bool more = false; if (adapter->current_op != I40E_VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -592,9 +605,11 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) count = (I40EVF_MAX_AQ_BUF_SIZE - sizeof(struct i40e_virtchnl_vlan_filter_list)) / sizeof(u16); - len = I40EVF_MAX_AQ_BUF_SIZE; + len = sizeof(struct i40e_virtchnl_vlan_filter_list) + + (count * sizeof(u16)); + more = true; } - vvfl = kzalloc(len, GFP_ATOMIC); + vvfl = kzalloc(len, GFP_KERNEL); if (!vvfl) return; @@ -608,7 +623,8 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) kfree(f); } } - adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER; + if (!more) + adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_VLAN_FILTER; i40evf_send_pf_msg(adapter, I40E_VIRTCHNL_OP_DEL_VLAN, (u8 *)vvfl, len); kfree(vvfl); } @@ -724,9 +740,29 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, return; } if (v_retval) { - dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", - v_retval, i40evf_stat_str(&adapter->hw, v_retval), - v_opcode); + switch (v_opcode) { + case I40E_VIRTCHNL_OP_ADD_VLAN: + dev_err(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + break; + case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS: + dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + break; + case I40E_VIRTCHNL_OP_DEL_VLAN: + dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + break; + case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS: + dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + break; + default: + dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", + v_retval, + i40evf_stat_str(&adapter->hw, v_retval), + v_opcode); + } } switch (v_opcode) { case I40E_VIRTCHNL_OP_GET_STATS: { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 1d2174526a4c..445b4c9169b6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -139,6 +139,7 @@ enum ixgbe_tx_flags { #define IXGBE_X540_VF_DEVICE_ID 0x1515 struct vf_data_storage { + struct pci_dev *vfdev; unsigned char vf_mac_addresses[ETH_ALEN]; u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES]; u16 num_vf_mc_hashes; @@ -224,6 +225,8 @@ struct ixgbe_rx_queue_stats { u64 csum_err; }; +#define IXGBE_TS_HDR_LEN 8 + enum ixgbe_ring_state_t { __IXGBE_TX_FDIR_INIT_DONE, __IXGBE_TX_XPS_INIT_DONE, @@ -282,6 +285,8 @@ struct ixgbe_ring { u16 next_to_use; u16 next_to_clean; + unsigned long last_rx_timestamp; + union { u16 next_to_alloc; struct { @@ -587,9 +592,10 @@ static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring) struct ixgbe_mac_addr { u8 addr[ETH_ALEN]; - u16 queue; + u16 pool; u16 state; /* bitmask */ }; + #define IXGBE_MAC_STATE_DEFAULT 0x1 #define IXGBE_MAC_STATE_MODIFIED 0x2 #define IXGBE_MAC_STATE_IN_USE 0x4 @@ -639,6 +645,8 @@ struct ixgbe_adapter { #define IXGBE_FLAG_SRIOV_CAPABLE (u32)(1 << 22) #define IXGBE_FLAG_SRIOV_ENABLED (u32)(1 << 23) #define IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE BIT(24) +#define IXGBE_FLAG_RX_HWTSTAMP_ENABLED BIT(25) +#define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER BIT(26) u32 flags2; #define IXGBE_FLAG2_RSC_CAPABLE (u32)(1 << 0) @@ -755,9 +763,12 @@ struct ixgbe_adapter { unsigned long last_rx_ptp_check; unsigned long last_rx_timestamp; spinlock_t tmreg_lock; - struct cyclecounter cc; - struct timecounter tc; + struct cyclecounter hw_cc; + struct timecounter hw_tc; u32 base_incval; + u32 tx_hwtstamp_timeouts; + u32 rx_hwtstamp_cleared; + void (*ptp_setup_sdp)(struct ixgbe_adapter *); /* SR-IOV */ DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS); @@ -883,9 +894,9 @@ int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter); #endif int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, - u8 *addr, u16 queue); + const u8 *addr, u16 queue); int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, - u8 *addr, u16 queue); + const u8 *addr, u16 queue); void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter); netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *, struct ixgbe_adapter *, struct ixgbe_ring *); @@ -968,12 +979,33 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter); void ixgbe_ptp_stop(struct ixgbe_adapter *adapter); void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter); void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter); -void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb); +void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *, struct sk_buff *); +void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *, struct sk_buff *skb); +static inline void ixgbe_ptp_rx_hwtstamp(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_TSIP))) { + ixgbe_ptp_rx_pktstamp(rx_ring->q_vector, skb); + return; + } + + if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))) + return; + + ixgbe_ptp_rx_rgtstamp(rx_ring->q_vector, skb); + + /* Update the last_rx_timestamp timer in order to enable watchdog check + * for error case of latched timestamp on a dropped packet. + */ + rx_ring->last_rx_timestamp = jiffies; +} + int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr); int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr); void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter); void ixgbe_ptp_reset(struct ixgbe_adapter *adapter); -void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr); +void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter); #ifdef CONFIG_PCI_IOV void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter); #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index 65db69b862fb..8f09d291a043 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -765,13 +765,14 @@ mac_reset_top: ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST; IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear indicating reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST) { status = IXGBE_ERR_RESET_FAILED; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index a39afcf03e2c..b8bd72589f72 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -990,13 +990,14 @@ mac_reset_top: ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear indicating reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST_MASK)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST_MASK) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index ce61b36b94f1..daec6aef5dc8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -2454,6 +2454,17 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) /* Always set this bit to ensure any future transactions are blocked */ IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS); + /* Poll for bit to read as set */ + for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + if (IXGBE_READ_REG(hw, IXGBE_CTRL) & IXGBE_CTRL_GIO_DIS) + break; + usleep_range(100, 120); + } + if (i >= IXGBE_PCI_MASTER_DISABLE_TIMEOUT) { + hw_dbg(hw, "GIO disable did not set - requesting resets\n"); + goto gio_disable_fail; + } + /* Exit if master requests are blocked */ if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO) || ixgbe_removed(hw->hw_addr)) @@ -2475,6 +2486,7 @@ static s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) * again to clear out any effects they may have had on our device. */ hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n"); +gio_disable_fail: hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; if (hw->mac.type >= ixgbe_mac_X550) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index 631c603fc966..5f988703e1b7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -620,8 +620,7 @@ static void ixgbe_fcoe_dma_pool_free(struct ixgbe_fcoe *fcoe, unsigned int cpu) struct ixgbe_fcoe_ddp_pool *ddp_pool; ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu); - if (ddp_pool->pool) - dma_pool_destroy(ddp_pool->pool); + dma_pool_destroy(ddp_pool->pool); ddp_pool->pool = NULL; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index f3168bcc7d87..e771e764daa3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -844,7 +844,6 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, /* initialize NAPI */ netif_napi_add(adapter->netdev, &q_vector->napi, ixgbe_poll, 64); - napi_hash_add(&q_vector->napi); #ifdef CONFIG_NET_RX_BUSY_POLL /* initialize busy poll */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 47395ff5d908..ebd4522e7879 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -65,9 +65,6 @@ #include "ixgbe_common.h" #include "ixgbe_dcb_82599.h" #include "ixgbe_sriov.h" -#ifdef CONFIG_IXGBE_VXLAN -#include <net/vxlan.h> -#endif char ixgbe_driver_name[] = "ixgbe"; static const char ixgbe_driver_string[] = @@ -175,6 +172,8 @@ MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +static struct workqueue_struct *ixgbe_wq; + static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev); static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter, @@ -316,7 +315,7 @@ static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter) if (!test_bit(__IXGBE_DOWN, &adapter->state) && !test_bit(__IXGBE_REMOVING, &adapter->state) && !test_and_set_bit(__IXGBE_SERVICE_SCHED, &adapter->state)) - schedule_work(&adapter->service_task); + queue_work(ixgbe_wq, &adapter->service_task); } static void ixgbe_remove_adapter(struct ixgbe_hw *hw) @@ -1635,6 +1634,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { struct net_device *dev = rx_ring->netdev; + u32 flags = rx_ring->q_vector->adapter->flags; ixgbe_update_rsc_stats(rx_ring, skb); @@ -1642,8 +1642,8 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, ixgbe_rx_checksum(rx_ring, rx_desc, skb); - if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))) - ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector->adapter, skb); + if (unlikely(flags & IXGBE_FLAG_RX_HWTSTAMP_ENABLED)) + ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { @@ -1659,6 +1659,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, struct sk_buff *skb) { + skb_mark_napi_id(skb, &q_vector->napi); if (ixgbe_qv_busy_polling(q_vector)) netif_receive_skb(skb); else @@ -2123,7 +2124,6 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, } #endif /* IXGBE_FCOE */ - skb_mark_napi_id(skb, &q_vector->napi); ixgbe_rx_skb(q_vector, skb); /* update budget accounting */ @@ -2741,7 +2741,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) ixgbe_check_fan_failure(adapter, eicr); if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) - ixgbe_ptp_check_pps_event(adapter, eicr); + ixgbe_ptp_check_pps_event(adapter); /* re-enable the original interrupt state, no lsc, no queues */ if (!test_bit(__IXGBE_DOWN, &adapter->state)) @@ -2757,7 +2757,7 @@ static irqreturn_t ixgbe_msix_clean_rings(int irq, void *data) /* EIAM disabled interrupts (on this vector) for us */ if (q_vector->rx.ring || q_vector->tx.ring) - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); return IRQ_HANDLED; } @@ -2786,7 +2786,8 @@ int ixgbe_poll(struct napi_struct *napi, int budget) ixgbe_for_each_ring(ring, q_vector->tx) clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring); - if (!ixgbe_qv_lock_napi(q_vector)) + /* Exit if we are called by netpoll or busy polling is active */ + if ((budget <= 0) || !ixgbe_qv_lock_napi(q_vector)) return budget; /* attempt to distribute budget to each queue fairly, but don't allow @@ -2947,10 +2948,10 @@ static irqreturn_t ixgbe_intr(int irq, void *data) ixgbe_check_fan_failure(adapter, eicr); if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) - ixgbe_ptp_check_pps_event(adapter, eicr); + ixgbe_ptp_check_pps_event(adapter); /* would disable interrupts here but EIAM disabled it */ - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); /* * re-enable link(maybe) and non-queue interrupts, no flush. @@ -3315,8 +3316,7 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, } /** - * Return a number of entries in the RSS indirection table - * + * ixgbe_rss_indir_tbl_entries - Return RSS indirection table entries * @adapter: device handle * * - 82598/82599/X540: 128 @@ -3334,8 +3334,7 @@ u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter) } /** - * Write the RETA table to HW - * + * ixgbe_store_reta - Write the RETA table to HW * @adapter: device handle * * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW. @@ -3374,8 +3373,7 @@ void ixgbe_store_reta(struct ixgbe_adapter *adapter) } /** - * Write the RETA table to HW (for x550 devices in SRIOV mode) - * + * ixgbe_store_vfreta - Write the RETA table to HW (x550 devices in SRIOV mode) * @adapter: device handle * * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW. @@ -4034,124 +4032,156 @@ static int ixgbe_write_mc_addr_list(struct net_device *netdev) #ifdef CONFIG_PCI_IOV void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) - hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr, - adapter->mac_table[i].queue, + + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED; + + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) + hw->mac.ops.set_rar(hw, i, + mac_table->addr, + mac_table->pool, IXGBE_RAH_AV); else hw->mac.ops.clear_rar(hw, i); - - adapter->mac_table[i].state &= ~(IXGBE_MAC_STATE_MODIFIED); } } -#endif +#endif static void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) { - if (adapter->mac_table[i].state & - IXGBE_MAC_STATE_IN_USE) - hw->mac.ops.set_rar(hw, i, - adapter->mac_table[i].addr, - adapter->mac_table[i].queue, - IXGBE_RAH_AV); - else - hw->mac.ops.clear_rar(hw, i); - adapter->mac_table[i].state &= - ~(IXGBE_MAC_STATE_MODIFIED); - } + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + if (!(mac_table->state & IXGBE_MAC_STATE_MODIFIED)) + continue; + + mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED; + + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) + hw->mac.ops.set_rar(hw, i, + mac_table->addr, + mac_table->pool, + IXGBE_RAH_AV); + else + hw->mac.ops.clear_rar(hw, i); } } static void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter) { - int i; + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; + int i; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; - eth_zero_addr(adapter->mac_table[i].addr); - adapter->mac_table[i].queue = 0; + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + mac_table->state |= IXGBE_MAC_STATE_MODIFIED; + mac_table->state &= ~IXGBE_MAC_STATE_IN_USE; } + ixgbe_sync_mac_table(adapter); } -static int ixgbe_available_rars(struct ixgbe_adapter *adapter) +static int ixgbe_available_rars(struct ixgbe_adapter *adapter, u16 pool) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i, count = 0; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state == 0) - count++; + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + /* do not count default RAR as available */ + if (mac_table->state & IXGBE_MAC_STATE_DEFAULT) + continue; + + /* only count unused and addresses that belong to us */ + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) { + if (mac_table->pool != pool) + continue; + } + + count++; } + return count; } /* this function destroys the first RAR entry */ -static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter, - u8 *addr) +static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; - memcpy(&adapter->mac_table[0].addr, addr, ETH_ALEN); - adapter->mac_table[0].queue = VMDQ_P(0); - adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT | - IXGBE_MAC_STATE_IN_USE); - hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr, - adapter->mac_table[0].queue, + memcpy(&mac_table->addr, hw->mac.addr, ETH_ALEN); + mac_table->pool = VMDQ_P(0); + + mac_table->state = IXGBE_MAC_STATE_DEFAULT | IXGBE_MAC_STATE_IN_USE; + + hw->mac.ops.set_rar(hw, 0, mac_table->addr, mac_table->pool, IXGBE_RAH_AV); } -int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, + const u8 *addr, u16 pool) { + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; int i; if (is_zero_ether_addr(addr)) return -EINVAL; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + if (mac_table->state & IXGBE_MAC_STATE_IN_USE) continue; - adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED | - IXGBE_MAC_STATE_IN_USE); - ether_addr_copy(adapter->mac_table[i].addr, addr); - adapter->mac_table[i].queue = queue; + + ether_addr_copy(mac_table->addr, addr); + mac_table->pool = pool; + + mac_table->state |= IXGBE_MAC_STATE_MODIFIED | + IXGBE_MAC_STATE_IN_USE; + ixgbe_sync_mac_table(adapter); + return i; } + return -ENOMEM; } -int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, + const u8 *addr, u16 pool) { - /* search table for addr, if found, set to 0 and sync */ - int i; + struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw; + int i; if (is_zero_ether_addr(addr)) return -EINVAL; - for (i = 0; i < hw->mac.num_rar_entries; i++) { - if (ether_addr_equal(addr, adapter->mac_table[i].addr) && - adapter->mac_table[i].queue == queue) { - adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; - adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; - eth_zero_addr(adapter->mac_table[i].addr); - adapter->mac_table[i].queue = 0; - ixgbe_sync_mac_table(adapter); - return 0; - } + /* search table for addr, if found clear IN_USE flag and sync */ + for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { + /* we can only delete an entry if it is in use */ + if (!(mac_table->state & IXGBE_MAC_STATE_IN_USE)) + continue; + /* we only care about entries that belong to the given pool */ + if (mac_table->pool != pool) + continue; + /* we only care about a specific MAC address */ + if (!ether_addr_equal(addr, mac_table->addr)) + continue; + + mac_table->state |= IXGBE_MAC_STATE_MODIFIED; + mac_table->state &= ~IXGBE_MAC_STATE_IN_USE; + + ixgbe_sync_mac_table(adapter); + + return 0; } + return -ENOMEM; } /** @@ -4169,7 +4199,7 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn) int count = 0; /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter)) + if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter, vfn)) return -ENOMEM; if (!netdev_uc_empty(netdev)) { @@ -4183,6 +4213,25 @@ static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn) return count; } +static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int ret; + + ret = ixgbe_add_mac_filter(adapter, addr, VMDQ_P(0)); + + return min_t(int, ret, 0); +} + +static int ixgbe_uc_unsync(struct net_device *netdev, const unsigned char *addr) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + ixgbe_del_mac_filter(adapter, addr, VMDQ_P(0)); + + return 0; +} + /** * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set * @netdev: network interface device structure @@ -4238,8 +4287,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev) * sufficient space to store all the addresses then enable * unicast promiscuous mode */ - count = ixgbe_write_uc_addr_list(netdev, VMDQ_P(0)); - if (count < 0) { + if (__dev_uc_sync(netdev, ixgbe_uc_sync, ixgbe_uc_unsync)) { fctrl |= IXGBE_FCTRL_UPE; vmolr |= IXGBE_VMOLR_ROPE; } @@ -5042,7 +5090,6 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; int err; - u8 old_addr[ETH_ALEN]; if (ixgbe_removed(hw->hw_addr)) return; @@ -5078,10 +5125,13 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) } clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); - /* do not flush user set addresses */ - memcpy(old_addr, &adapter->mac_table[0].addr, netdev->addr_len); + + /* flush entries out of MAC table */ ixgbe_flush_sw_mac_table(adapter); - ixgbe_mac_set_default_filter(adapter, old_addr); + __dev_uc_unsync(netdev, NULL); + + /* do not flush user set addresses */ + ixgbe_mac_set_default_filter(adapter); /* update SAN MAC vmdq pool selection */ if (hw->mac.san_mac_rar_index) @@ -6616,10 +6666,8 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; - struct pci_dev *vfdev; + unsigned int vf; u32 gpc; - int pos; - unsigned short vf_id; if (!(netif_carrier_ok(adapter->netdev))) return; @@ -6636,26 +6684,17 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter) if (!pdev) return; - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); - if (!pos) - return; - - /* get the device ID for the VF */ - pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id); - /* check status reg for all VFs owned by this PF */ - vfdev = pci_get_device(pdev->vendor, vf_id, NULL); - while (vfdev) { - if (vfdev->is_virtfn && (vfdev->physfn == pdev)) { - u16 status_reg; - - pci_read_config_word(vfdev, PCI_STATUS, &status_reg); - if (status_reg & PCI_STATUS_REC_MASTER_ABORT) - /* issue VFLR */ - ixgbe_issue_vf_flr(adapter, vfdev); - } + for (vf = 0; vf < adapter->num_vfs; ++vf) { + struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev; + u16 status_reg; - vfdev = pci_get_device(pdev->vendor, vf_id, vfdev); + if (!vfdev) + continue; + pci_read_config_word(vfdev, PCI_STATUS, &status_reg); + if (status_reg != IXGBE_FAILED_READ_CFG_WORD && + status_reg & PCI_STATUS_REC_MASTER_ABORT) + ixgbe_issue_vf_flr(adapter, vfdev); } } @@ -7024,6 +7063,7 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, struct tcphdr *tcphdr; u8 *raw; } transport_hdr; + __be16 frag_off; if (skb->encapsulation) { network_hdr.raw = skb_inner_network_header(skb); @@ -7047,13 +7087,17 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, case 6: vlan_macip_lens |= transport_hdr.raw - network_hdr.raw; l4_hdr = network_hdr.ipv6->nexthdr; + if (likely((transport_hdr.raw - network_hdr.raw) == + sizeof(struct ipv6hdr))) + break; + ipv6_skip_exthdr(skb, network_hdr.raw - skb->data + + sizeof(struct ipv6hdr), + &l4_hdr, &frag_off); + if (unlikely(frag_off)) + l4_hdr = NEXTHDR_FRAGMENT; break; default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but version=%d\n", - network_hdr.ipv4->version); - } + break; } switch (l4_hdr) { @@ -7074,16 +7118,18 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); + "partial checksum, version=%d, l4 proto=%x\n", + network_hdr.ipv4->version, l4_hdr); } - break; + skb_checksum_help(skb); + goto no_csum; } /* update TX checksum flag */ first->tx_flags |= IXGBE_TX_FLAGS_CSUM; } +no_csum: /* vlan_macip_lens: MACLEN, VLAN tag */ vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; @@ -7659,17 +7705,16 @@ static int ixgbe_set_mac(struct net_device *netdev, void *p) struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; struct sockaddr *addr = p; - int ret; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - ixgbe_del_mac_filter(adapter, hw->mac.addr, VMDQ_P(0)); memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); - ret = ixgbe_add_mac_filter(adapter, hw->mac.addr, VMDQ_P(0)); - return ret > 0 ? 0 : ret; + ixgbe_mac_set_default_filter(adapter); + + return 0; } static int @@ -8152,7 +8197,10 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], { /* guarantee we can provide a unique filter for the unicast address */ if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { - if (IXGBE_MAX_PF_MACVLANS <= netdev_uc_count(dev)) + struct ixgbe_adapter *adapter = netdev_priv(dev); + u16 pool = VMDQ_P(0); + + if (netdev_uc_count(dev) >= ixgbe_available_rars(adapter, pool)) return -ENOMEM; } @@ -8870,7 +8918,7 @@ skip_sriov: goto err_sw_init; } - ixgbe_mac_set_default_filter(adapter, hw->mac.perm_addr); + ixgbe_mac_set_default_filter(adapter); setup_timer(&adapter->service_timer, &ixgbe_service_timer, (unsigned long) adapter); @@ -9325,6 +9373,12 @@ static int __init ixgbe_init_module(void) pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version); pr_info("%s\n", ixgbe_copyright); + ixgbe_wq = create_singlethread_workqueue(ixgbe_driver_name); + if (!ixgbe_wq) { + pr_err("%s: Failed to create workqueue\n", ixgbe_driver_name); + return -ENOMEM; + } + ixgbe_dbg_init(); ret = pci_register_driver(&ixgbe_driver); @@ -9356,6 +9410,10 @@ static void __exit ixgbe_exit_module(void) pci_unregister_driver(&ixgbe_driver); ixgbe_dbg_exit(); + if (ixgbe_wq) { + destroy_workqueue(ixgbe_wq); + ixgbe_wq = NULL; + } } #ifdef CONFIG_IXGBE_DCA diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index fb8673d63806..db0731e05401 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -2393,6 +2393,9 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return 0; + if (!on && ixgbe_mng_present(hw)) + return 0; + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index e5ba04025e2b..ef1504d41890 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2013 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -27,6 +27,7 @@ *******************************************************************************/ #include "ixgbe.h" #include <linux/ptp_classify.h> +#include <linux/clocksource.h> /* * The 82599 and the X540 do not have true 64bit nanosecond scale @@ -93,7 +94,6 @@ #define IXGBE_INCVAL_SHIFT_82599 7 #define IXGBE_INCPER_SHIFT_82599 24 -#define IXGBE_MAX_TIMEADJ_VALUE 0x7FFFFFFFFFFFFFFFULL #define IXGBE_OVERFLOW_PERIOD (HZ * 30) #define IXGBE_PTP_TX_TIMEOUT (HZ * 15) @@ -104,8 +104,68 @@ */ #define IXGBE_PTP_PPS_HALF_SECOND 500000000ULL +/* In contrast, the X550 controller has two registers, SYSTIMEH and SYSTIMEL + * which contain measurements of seconds and nanoseconds respectively. This + * matches the standard linux representation of time in the kernel. In addition, + * the X550 also has a SYSTIMER register which represents residue, or + * subnanosecond overflow adjustments. To control clock adjustment, the TIMINCA + * register is used, but it is unlike the X540 and 82599 devices. TIMINCA + * represents units of 2^-32 nanoseconds, and uses 31 bits for this, with the + * high bit representing whether the adjustent is positive or negative. Every + * clock cycle, the X550 will add 12.5 ns + TIMINCA which can result in a range + * of 12 to 13 nanoseconds adjustment. Unlike the 82599 and X540 devices, the + * X550's clock for purposes of SYSTIME generation is constant and not dependent + * on the link speed. + * + * SYSTIMEH SYSTIMEL SYSTIMER + * +--------------+ +--------------+ +-------------+ + * X550 | 32 | | 32 | | 32 | + * *--------------+ +--------------+ +-------------+ + * \____seconds___/ \_nanoseconds_/ \__2^-32 ns__/ + * + * This results in a full 96 bits to represent the clock, with 32 bits for + * seconds, 32 bits for nanoseconds (largest value is 0d999999999 or just under + * 1 second) and an additional 32 bits to measure sub nanosecond adjustments for + * underflow of adjustments. + * + * The 32 bits of seconds for the X550 overflows every + * 2^32 / ( 365.25 * 24 * 60 * 60 ) = ~136 years. + * + * In order to adjust the clock frequency for the X550, the TIMINCA register is + * provided. This register represents a + or minus nearly 0.5 ns adjustment to + * the base frequency. It is measured in 2^-32 ns units, with the high bit being + * the sign bit. This register enables software to calculate frequency + * adjustments and apply them directly to the clock rate. + * + * The math for converting ppb into TIMINCA values is fairly straightforward. + * TIMINCA value = ( Base_Frequency * ppb ) / 1000000000ULL + * + * This assumes that ppb is never high enough to create a value bigger than + * TIMINCA's 31 bits can store. This is ensured by the stack. Calculating this + * value is also simple. + * Max ppb = ( Max Adjustment / Base Frequency ) / 1000000000ULL + * + * For the X550, the Max adjustment is +/- 0.5 ns, and the base frequency is + * 12.5 nanoseconds. This means that the Max ppb is 39999999 + * Note: We subtract one in order to ensure no overflow, because the TIMINCA + * register can only hold slightly under 0.5 nanoseconds. + * + * Because TIMINCA is measured in 2^-32 ns units, we have to convert 12.5 ns + * into 2^-32 units, which is + * + * 12.5 * 2^32 = C80000000 + * + * Some revisions of hardware have a faster base frequency than the registers + * were defined for. To fix this, we use a timecounter structure with the + * proper mult and shift to convert the cycles into nanoseconds of time. + */ +#define IXGBE_X550_BASE_PERIOD 0xC80000000ULL +#define INCVALUE_MASK 0x7FFFFFFF +#define ISGN 0x80000000 +#define MAX_TIMADJ 0x7FFFFFFF + /** - * ixgbe_ptp_setup_sdp + * ixgbe_ptp_setup_sdp_x540 * @hw: the hardware private structure * * this function enables or disables the clock out feature on SDP0 for @@ -116,83 +176,116 @@ * aligns the start of the PPS signal to that value. The shift is * necessary because it can change based on the link speed. */ -static void ixgbe_ptp_setup_sdp(struct ixgbe_adapter *adapter) +static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - int shift = adapter->cc.shift; + int shift = adapter->hw_cc.shift; u32 esdp, tsauxc, clktiml, clktimh, trgttiml, trgttimh, rem; u64 ns = 0, clock_edge = 0; - if ((adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED) && - (hw->mac.type == ixgbe_mac_X540)) { + /* disable the pin first */ + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); + IXGBE_WRITE_FLUSH(hw); - /* disable the pin first */ - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); - IXGBE_WRITE_FLUSH(hw); + if (!(adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED)) + return; - esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); - /* - * enable the SDP0 pin as output, and connected to the - * native function for Timesync (ClockOut) - */ - esdp |= (IXGBE_ESDP_SDP0_DIR | - IXGBE_ESDP_SDP0_NATIVE); + /* enable the SDP0 pin as output, and connected to the + * native function for Timesync (ClockOut) + */ + esdp |= IXGBE_ESDP_SDP0_DIR | + IXGBE_ESDP_SDP0_NATIVE; - /* - * enable the Clock Out feature on SDP0, and allow - * interrupts to occur when the pin changes - */ - tsauxc = (IXGBE_TSAUXC_EN_CLK | - IXGBE_TSAUXC_SYNCLK | - IXGBE_TSAUXC_SDP0_INT); + /* enable the Clock Out feature on SDP0, and allow + * interrupts to occur when the pin changes + */ + tsauxc = IXGBE_TSAUXC_EN_CLK | + IXGBE_TSAUXC_SYNCLK | + IXGBE_TSAUXC_SDP0_INT; - /* clock period (or pulse length) */ - clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift); - clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32); + /* clock period (or pulse length) */ + clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift); + clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32); - /* - * Account for the cyclecounter wrap-around value by - * using the converted ns value of the current time to - * check for when the next aligned second would occur. - */ - clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML); - clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32; - ns = timecounter_cyc2time(&adapter->tc, clock_edge); + /* Account for the cyclecounter wrap-around value by + * using the converted ns value of the current time to + * check for when the next aligned second would occur. + */ + clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML); + clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32; + ns = timecounter_cyc2time(&adapter->hw_tc, clock_edge); - div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem); - clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift); + div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem); + clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift); - /* specify the initial clock start time */ - trgttiml = (u32)clock_edge; - trgttimh = (u32)(clock_edge >> 32); + /* specify the initial clock start time */ + trgttiml = (u32)clock_edge; + trgttimh = (u32)(clock_edge >> 32); - IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml); - IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh); - IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml); - IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh); + IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml); + IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh); + IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml); + IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh); - IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc); - } else { - IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); - } + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc); IXGBE_WRITE_FLUSH(hw); } /** - * ixgbe_ptp_read - read raw cycle counter (to be used by time counter) + * ixgbe_ptp_read_X550 - read cycle counter value + * @hw_cc: cyclecounter structure + * + * This function reads SYSTIME registers. It is called by the cyclecounter + * structure to convert from internal representation into nanoseconds. We need + * this for X550 since some skews do not have expected clock frequency and + * result of SYSTIME is 32bits of "billions of cycles" and 32 bits of + * "cycles", rather than seconds and nanoseconds. + */ +static cycle_t ixgbe_ptp_read_X550(const struct cyclecounter *hw_cc) +{ + struct ixgbe_adapter *adapter = + container_of(hw_cc, struct ixgbe_adapter, hw_cc); + struct ixgbe_hw *hw = &adapter->hw; + struct timespec64 ts; + + /* storage is 32 bits of 'billions of cycles' and 32 bits of 'cycles'. + * Some revisions of hardware run at a higher frequency and so the + * cycles are not guaranteed to be nanoseconds. The timespec64 created + * here is used for its math/conversions but does not necessarily + * represent nominal time. + * + * It should be noted that this cyclecounter will overflow at a + * non-bitmask field since we have to convert our billions of cycles + * into an actual cycles count. This results in some possible weird + * situations at high cycle counter stamps. However given that 32 bits + * of "seconds" is ~138 years this isn't a problem. Even at the + * increased frequency of some revisions, this is still ~103 years. + * Since the SYSTIME values start at 0 and we never write them, it is + * highly unlikely for the cyclecounter to overflow in practice. + */ + IXGBE_READ_REG(hw, IXGBE_SYSTIMR); + ts.tv_nsec = IXGBE_READ_REG(hw, IXGBE_SYSTIML); + ts.tv_sec = IXGBE_READ_REG(hw, IXGBE_SYSTIMH); + + return (u64)timespec64_to_ns(&ts); +} + +/** + * ixgbe_ptp_read_82599 - read raw cycle counter (to be used by time counter) * @cc: the cyclecounter structure * * this function reads the cyclecounter registers and is called by the * cyclecounter structure used to construct a ns counter from the * arbitrary fixed point registers */ -static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc) +static cycle_t ixgbe_ptp_read_82599(const struct cyclecounter *cc) { struct ixgbe_adapter *adapter = - container_of(cc, struct ixgbe_adapter, cc); + container_of(cc, struct ixgbe_adapter, hw_cc); struct ixgbe_hw *hw = &adapter->hw; u64 stamp = 0; @@ -203,20 +296,79 @@ static cycle_t ixgbe_ptp_read(const struct cyclecounter *cc) } /** - * ixgbe_ptp_adjfreq + * ixgbe_ptp_convert_to_hwtstamp - convert register value to hw timestamp + * @adapter: private adapter structure + * @hwtstamp: stack timestamp structure + * @systim: unsigned 64bit system time value + * + * We need to convert the adapter's RX/TXSTMP registers into a hwtstamp value + * which can be used by the stack's ptp functions. + * + * The lock is used to protect consistency of the cyclecounter and the SYSTIME + * registers. However, it does not need to protect against the Rx or Tx + * timestamp registers, as there can't be a new timestamp until the old one is + * unlatched by reading. + * + * In addition to the timestamp in hardware, some controllers need a software + * overflow cyclecounter, and this function takes this into account as well. + **/ +static void ixgbe_ptp_convert_to_hwtstamp(struct ixgbe_adapter *adapter, + struct skb_shared_hwtstamps *hwtstamp, + u64 timestamp) +{ + unsigned long flags; + struct timespec64 systime; + u64 ns; + + memset(hwtstamp, 0, sizeof(*hwtstamp)); + + switch (adapter->hw.mac.type) { + /* X550 and later hardware supposedly represent time using a seconds + * and nanoseconds counter, instead of raw 64bits nanoseconds. We need + * to convert the timestamp into cycles before it can be fed to the + * cyclecounter. We need an actual cyclecounter because some revisions + * of hardware run at a higher frequency and thus the counter does + * not represent seconds/nanoseconds. Instead it can be thought of as + * cycles and billions of cycles. + */ + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + /* Upper 32 bits represent billions of cycles, lower 32 bits + * represent cycles. However, we use timespec64_to_ns for the + * correct math even though the units haven't been corrected + * yet. + */ + systime.tv_sec = timestamp >> 32; + systime.tv_nsec = timestamp & 0xFFFFFFFF; + + timestamp = timespec64_to_ns(&systime); + break; + default: + break; + } + + spin_lock_irqsave(&adapter->tmreg_lock, flags); + ns = timecounter_cyc2time(&adapter->hw_tc, timestamp); + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + hwtstamp->hwtstamp = ns_to_ktime(ns); +} + +/** + * ixgbe_ptp_adjfreq_82599 * @ptp: the ptp clock structure * @ppb: parts per billion adjustment from base * * adjust the frequency of the ptp cycle counter by the * indicated ppb from the base frequency. */ -static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int ixgbe_ptp_adjfreq_82599(struct ptp_clock_info *ptp, s32 ppb) { struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); struct ixgbe_hw *hw = &adapter->hw; - u64 freq; - u32 diff, incval; + u64 freq, incval; + u32 diff; int neg_adj = 0; if (ppb < 0) { @@ -235,12 +387,16 @@ static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) switch (hw->mac.type) { case ixgbe_mac_X540: - IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval); + if (incval > 0xFFFFFFFFULL) + e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); + IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (u32)incval); break; case ixgbe_mac_82599EB: + if (incval > 0x00FFFFFFULL) + e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (1 << IXGBE_INCPER_SHIFT_82599) | - incval); + ((u32)incval & 0x00FFFFFFUL)); break; default: break; @@ -250,6 +406,43 @@ static int ixgbe_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) } /** + * ixgbe_ptp_adjfreq_X550 + * @ptp: the ptp clock structure + * @ppb: parts per billion adjustment from base + * + * adjust the frequency of the SYSTIME registers by the indicated ppb from base + * frequency + */ +static int ixgbe_ptp_adjfreq_X550(struct ptp_clock_info *ptp, s32 ppb) +{ + struct ixgbe_adapter *adapter = + container_of(ptp, struct ixgbe_adapter, ptp_caps); + struct ixgbe_hw *hw = &adapter->hw; + int neg_adj = 0; + u64 rate = IXGBE_X550_BASE_PERIOD; + u32 inca; + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + rate *= ppb; + rate = div_u64(rate, 1000000000ULL); + + /* warn if rate is too large */ + if (rate >= INCVALUE_MASK) + e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); + + inca = rate & INCVALUE_MASK; + if (neg_adj) + inca |= ISGN; + + IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, inca); + + return 0; +} + +/** * ixgbe_ptp_adjtime * @ptp: the ptp clock structure * @delta: offset to adjust the cycle counter by @@ -263,10 +456,11 @@ static int ixgbe_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) unsigned long flags; spin_lock_irqsave(&adapter->tmreg_lock, flags); - timecounter_adjtime(&adapter->tc, delta); + timecounter_adjtime(&adapter->hw_tc, delta); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - ixgbe_ptp_setup_sdp(adapter); + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); return 0; } @@ -283,11 +477,11 @@ static int ixgbe_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); - u64 ns; unsigned long flags; + u64 ns; spin_lock_irqsave(&adapter->tmreg_lock, flags); - ns = timecounter_read(&adapter->tc); + ns = timecounter_read(&adapter->hw_tc); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); *ts = ns_to_timespec64(ns); @@ -308,17 +502,16 @@ static int ixgbe_ptp_settime(struct ptp_clock_info *ptp, { struct ixgbe_adapter *adapter = container_of(ptp, struct ixgbe_adapter, ptp_caps); - u64 ns; unsigned long flags; - - ns = timespec64_to_ns(ts); + u64 ns = timespec64_to_ns(ts); /* reset the timecounter */ spin_lock_irqsave(&adapter->tmreg_lock, flags); - timecounter_init(&adapter->tc, &adapter->cc, ns); + timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ns); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - ixgbe_ptp_setup_sdp(adapter); + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); return 0; } @@ -343,33 +536,26 @@ static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp, * event when the clock SDP triggers. Clear mask when PPS is * disabled */ - if (rq->type == PTP_CLK_REQ_PPS) { - switch (adapter->hw.mac.type) { - case ixgbe_mac_X540: - if (on) - adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED; - else - adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED; - - ixgbe_ptp_setup_sdp(adapter); - return 0; - default: - break; - } - } + if (rq->type != PTP_CLK_REQ_PPS || !adapter->ptp_setup_sdp) + return -ENOTSUPP; + + if (on) + adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED; + else + adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED; - return -ENOTSUPP; + adapter->ptp_setup_sdp(adapter); + return 0; } /** * ixgbe_ptp_check_pps_event * @adapter: the private adapter structure - * @eicr: the interrupt cause register value * * This function is called by the interrupt routine when checking for * interrupts. It will check and handle a pps event. */ -void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr) +void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ptp_clock_event event; @@ -425,7 +611,9 @@ void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); + struct ixgbe_ring *rx_ring; unsigned long rx_event; + int n; /* if we don't have a valid timestamp in the registers, just update the * timeout counter and exit @@ -437,19 +625,43 @@ void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter) /* determine the most recent watchdog or rx_timestamp event */ rx_event = adapter->last_rx_ptp_check; - if (time_after(adapter->last_rx_timestamp, rx_event)) - rx_event = adapter->last_rx_timestamp; + for (n = 0; n < adapter->num_rx_queues; n++) { + rx_ring = adapter->rx_ring[n]; + if (time_after(rx_ring->last_rx_timestamp, rx_event)) + rx_event = rx_ring->last_rx_timestamp; + } /* only need to read the high RXSTMP register to clear the lock */ - if (time_is_before_jiffies(rx_event + 5*HZ)) { + if (time_is_before_jiffies(rx_event + 5 * HZ)) { IXGBE_READ_REG(hw, IXGBE_RXSTMPH); adapter->last_rx_ptp_check = jiffies; + adapter->rx_hwtstamp_cleared++; e_warn(drv, "clearing RX Timestamp hang\n"); } } /** + * ixgbe_ptp_clear_tx_timestamp - utility function to clear Tx timestamp state + * @adapter: the private adapter structure + * + * This function should be called whenever the state related to a Tx timestamp + * needs to be cleared. This helps ensure that all related bits are reset for + * the next Tx timestamp event. + */ +static void ixgbe_ptp_clear_tx_timestamp(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + IXGBE_READ_REG(hw, IXGBE_TXSTMPH); + if (adapter->ptp_tx_skb) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + } + clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); +} + +/** * ixgbe_ptp_tx_hwtstamp - utility function which checks for TX time stamp * @adapter: the private adapter struct * @@ -461,23 +673,15 @@ static void ixgbe_ptp_tx_hwtstamp(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct skb_shared_hwtstamps shhwtstamps; - u64 regval = 0, ns; - unsigned long flags; + u64 regval = 0; regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPH) << 32; - spin_lock_irqsave(&adapter->tmreg_lock, flags); - ns = timecounter_cyc2time(&adapter->tc, regval); - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - memset(&shhwtstamps, 0, sizeof(shhwtstamps)); - shhwtstamps.hwtstamp = ns_to_ktime(ns); + ixgbe_ptp_convert_to_hwtstamp(adapter, &shhwtstamps, regval); skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); + ixgbe_ptp_clear_tx_timestamp(adapter); } /** @@ -497,38 +701,85 @@ static void ixgbe_ptp_tx_hwtstamp_work(struct work_struct *work) IXGBE_PTP_TX_TIMEOUT); u32 tsynctxctl; - if (timeout) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); - e_warn(drv, "clearing Tx Timestamp hang\n"); + /* we have to have a valid skb to poll for a timestamp */ + if (!adapter->ptp_tx_skb) { + ixgbe_ptp_clear_tx_timestamp(adapter); return; } + /* stop polling once we have a valid timestamp */ tsynctxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL); - if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID) + if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID) { ixgbe_ptp_tx_hwtstamp(adapter); - else + return; + } + + if (timeout) { + ixgbe_ptp_clear_tx_timestamp(adapter); + adapter->tx_hwtstamp_timeouts++; + e_warn(drv, "clearing Tx Timestamp hang\n"); + } else { /* reschedule to keep checking if it's not available yet */ schedule_work(&adapter->ptp_tx_work); + } } /** - * ixgbe_ptp_rx_hwtstamp - utility function which checks for RX time stamp - * @adapter: pointer to adapter struct + * ixgbe_ptp_rx_pktstamp - utility function to get RX time stamp from buffer + * @q_vector: structure containing interrupt and ring information + * @skb: the packet + * + * This function will be called by the Rx routine of the timestamp for this + * packet is stored in the buffer. The value is stored in little endian format + * starting at the end of the packet data. + */ +void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *q_vector, + struct sk_buff *skb) +{ + __le64 regval; + + /* copy the bits out of the skb, and then trim the skb length */ + skb_copy_bits(skb, skb->len - IXGBE_TS_HDR_LEN, ®val, + IXGBE_TS_HDR_LEN); + __pskb_trim(skb, skb->len - IXGBE_TS_HDR_LEN); + + /* The timestamp is recorded in little endian format, and is stored at + * the end of the packet. + * + * DWORD: N N + 1 N + 2 + * Field: End of Packet SYSTIMH SYSTIML + */ + ixgbe_ptp_convert_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), + le64_to_cpu(regval)); +} + +/** + * ixgbe_ptp_rx_rgtstamp - utility function which checks for RX time stamp + * @q_vector: structure containing interrupt and ring information * @skb: particular skb to send timestamp with * * if the timestamp is valid, we convert it into the timecounter ns * value, then store that result into the shhwtstamps structure which * is passed up the network stack */ -void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb) +void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *q_vector, + struct sk_buff *skb) { - struct ixgbe_hw *hw = &adapter->hw; - struct skb_shared_hwtstamps *shhwtstamps; - u64 regval = 0, ns; + struct ixgbe_adapter *adapter; + struct ixgbe_hw *hw; + u64 regval = 0; u32 tsyncrxctl; - unsigned long flags; + + /* we cannot process timestamps on a ring without a q_vector */ + if (!q_vector || !q_vector->adapter) + return; + + adapter = q_vector->adapter; + hw = &adapter->hw; + + /* Read the tsyncrxctl register afterwards in order to prevent taking an + * I/O hit on every packet. + */ tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID)) @@ -537,17 +788,7 @@ void ixgbe_ptp_rx_hwtstamp(struct ixgbe_adapter *adapter, struct sk_buff *skb) regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32; - spin_lock_irqsave(&adapter->tmreg_lock, flags); - ns = timecounter_cyc2time(&adapter->tc, regval); - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - shhwtstamps = skb_hwtstamps(skb); - shhwtstamps->hwtstamp = ns_to_ktime(ns); - - /* Update the last_rx_timestamp timer in order to enable watchdog check - * for error case of latched timestamp on a dropped packet. - */ - adapter->last_rx_timestamp = jiffies; + ixgbe_ptp_convert_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); } int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr) @@ -610,14 +851,20 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, case HWTSTAMP_FILTER_NONE: tsync_rx_ctl = 0; tsync_rx_mtrl = 0; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; tsync_rx_mtrl |= IXGBE_RXMTRL_V1_SYNC_MSG; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1; tsync_rx_mtrl |= IXGBE_RXMTRL_V1_DELAY_REQ_MSG; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: @@ -631,9 +878,21 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2; is_l2 = true; config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); break; case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_ALL: + /* The X550 controller is capable of timestamping all packets, + * which allows it to accept any filter. + */ + if (hw->mac.type >= ixgbe_mac_X550) { + tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_ALL; + config->rx_filter = HWTSTAMP_FILTER_ALL; + adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; + break; + } + /* fall through */ default: /* * register RXMTRL must be set in order to do V1 packets, @@ -641,16 +900,46 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, * Delay_Req messages and hardware does not support * timestamping all packets => return error */ + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); config->rx_filter = HWTSTAMP_FILTER_NONE; return -ERANGE; } if (hw->mac.type == ixgbe_mac_82598EB) { + adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED | + IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER); if (tsync_rx_ctl | tsync_tx_ctl) return -ERANGE; return 0; } + /* Per-packet timestamping only works if the filter is set to all + * packets. Since this is desired, always timestamp all packets as long + * as any Rx filter was configured. + */ + switch (hw->mac.type) { + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + /* enable timestamping all packets only if at least some + * packets were requested. Otherwise, play nice and disable + * timestamping + */ + if (config->rx_filter == HWTSTAMP_FILTER_NONE) + break; + + tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED | + IXGBE_TSYNCRXCTL_TYPE_ALL | + IXGBE_TSYNCRXCTL_TSIP_UT_EN; + config->rx_filter = HWTSTAMP_FILTER_ALL; + adapter->flags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED; + adapter->flags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER; + is_l2 = true; + break; + default: + break; + } + /* define ethertype filter for timestamping L2 packets */ if (is_l2) IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_1588), @@ -678,8 +967,8 @@ static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter, IXGBE_WRITE_FLUSH(hw); /* clear TX/RX time stamp registers, just to be sure */ - regval = IXGBE_READ_REG(hw, IXGBE_TXSTMPH); - regval = IXGBE_READ_REG(hw, IXGBE_RXSTMPH); + ixgbe_ptp_clear_tx_timestamp(adapter); + IXGBE_READ_REG(hw, IXGBE_RXSTMPH); return 0; } @@ -712,23 +1001,9 @@ int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr) -EFAULT : 0; } -/** - * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw - * @adapter: pointer to the adapter structure - * - * This function should be called to set the proper values for the TIMINCA - * register and tell the cyclecounter structure what the tick rate of SYSTIME - * is. It does not directly modify SYSTIME registers or the timecounter - * structure. It should be called whenever a new TIMINCA value is necessary, - * such as during initialization or when the link speed changes. - */ -void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) +static void ixgbe_ptp_link_speed_adjust(struct ixgbe_adapter *adapter, + u32 *shift, u32 *incval) { - struct ixgbe_hw *hw = &adapter->hw; - u32 incval = 0; - u32 shift = 0; - unsigned long flags; - /** * Scale the NIC cycle counter by a large factor so that * relatively small corrections to the frequency can be added @@ -745,36 +1020,98 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) */ switch (adapter->link_speed) { case IXGBE_LINK_SPEED_100_FULL: - incval = IXGBE_INCVAL_100; - shift = IXGBE_INCVAL_SHIFT_100; + *shift = IXGBE_INCVAL_SHIFT_100; + *incval = IXGBE_INCVAL_100; break; case IXGBE_LINK_SPEED_1GB_FULL: - incval = IXGBE_INCVAL_1GB; - shift = IXGBE_INCVAL_SHIFT_1GB; + *shift = IXGBE_INCVAL_SHIFT_1GB; + *incval = IXGBE_INCVAL_1GB; break; case IXGBE_LINK_SPEED_10GB_FULL: default: - incval = IXGBE_INCVAL_10GB; - shift = IXGBE_INCVAL_SHIFT_10GB; + *shift = IXGBE_INCVAL_SHIFT_10GB; + *incval = IXGBE_INCVAL_10GB; break; } +} - /** - * Modify the calculated values to fit within the correct - * number of bits specified by the hardware. The 82599 doesn't - * have the same space as the X540, so bitshift the calculated - * values to fit. +/** + * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw + * @adapter: pointer to the adapter structure + * + * This function should be called to set the proper values for the TIMINCA + * register and tell the cyclecounter structure what the tick rate of SYSTIME + * is. It does not directly modify SYSTIME registers or the timecounter + * structure. It should be called whenever a new TIMINCA value is necessary, + * such as during initialization or when the link speed changes. + */ +void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct cyclecounter cc; + unsigned long flags; + u32 incval = 0; + u32 tsauxc = 0; + u32 fuse0 = 0; + + /* For some of the boards below this mask is technically incorrect. + * The timestamp mask overflows at approximately 61bits. However the + * particular hardware does not overflow on an even bitmask value. + * Instead, it overflows due to conversion of upper 32bits billions of + * cycles. Timecounters are not really intended for this purpose so + * they do not properly function if the overflow point isn't 2^N-1. + * However, the actual SYSTIME values in question take ~138 years to + * overflow. In practice this means they won't actually overflow. A + * proper fix to this problem would require modification of the + * timecounter delta calculations. */ + cc.mask = CLOCKSOURCE_MASK(64); + cc.mult = 1; + cc.shift = 0; + switch (hw->mac.type) { + case ixgbe_mac_X550EM_x: + /* SYSTIME assumes X550EM_x board frequency is 300Mhz, and is + * designed to represent seconds and nanoseconds when this is + * the case. However, some revisions of hardware have a 400Mhz + * clock and we have to compensate for this frequency + * variation using corrected mult and shift values. + */ + fuse0 = IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)); + if (!(fuse0 & IXGBE_FUSES0_300MHZ)) { + cc.mult = 3; + cc.shift = 2; + } + /* fallthrough */ + case ixgbe_mac_X550: + cc.read = ixgbe_ptp_read_X550; + + /* enable SYSTIME counter */ + IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0); + IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0); + IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0); + tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC); + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, + tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME); + IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS); + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC); + + IXGBE_WRITE_FLUSH(hw); + break; case ixgbe_mac_X540: + cc.read = ixgbe_ptp_read_82599; + + ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval); IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval); break; case ixgbe_mac_82599EB: + cc.read = ixgbe_ptp_read_82599; + + ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval); incval >>= IXGBE_INCVAL_SHIFT_82599; - shift -= IXGBE_INCVAL_SHIFT_82599; + cc.shift -= IXGBE_INCVAL_SHIFT_82599; IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, - (1 << IXGBE_INCPER_SHIFT_82599) | - incval); + (1 << IXGBE_INCPER_SHIFT_82599) | incval); break; default: /* other devices aren't supported */ @@ -787,13 +1124,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) /* need lock to prevent incorrect read while modifying cyclecounter */ spin_lock_irqsave(&adapter->tmreg_lock, flags); - - memset(&adapter->cc, 0, sizeof(adapter->cc)); - adapter->cc.read = ixgbe_ptp_read; - adapter->cc.mask = CYCLECOUNTER_MASK(64); - adapter->cc.shift = shift; - adapter->cc.mult = 1; - + memcpy(&adapter->hw_cc, &cc, sizeof(adapter->hw_cc)); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); } @@ -814,29 +1145,27 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; unsigned long flags; - /* set SYSTIME registers to 0 just in case */ - IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0x00000000); - IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0x00000000); - IXGBE_WRITE_FLUSH(hw); - /* reset the hardware timestamping mode */ ixgbe_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + /* 82598 does not support PTP */ + if (hw->mac.type == ixgbe_mac_82598EB) + return; + ixgbe_ptp_start_cyclecounter(adapter); spin_lock_irqsave(&adapter->tmreg_lock, flags); - - /* reset the ns time counter */ - timecounter_init(&adapter->tc, &adapter->cc, + timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ktime_to_ns(ktime_get_real())); - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - /* - * Now that the shift has been calculated and the systime + adapter->last_overflow_check = jiffies; + + /* Now that the shift has been calculated and the systime * registers reset, (re-)enable the Clock out feature */ - ixgbe_ptp_setup_sdp(adapter); + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); } /** @@ -845,11 +1174,11 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter) * * This function performs setup of the user entry point function table and * initializes the PTP clock device, which is used to access the clock-like - * features of the PTP core. It will be called by ixgbe_ptp_init, only if - * there isn't already a clock device (such as after a suspend/resume cycle, - * where the clock device wasn't destroyed). + * features of the PTP core. It will be called by ixgbe_ptp_init, and may + * reuse a previously initialized clock (such as during a suspend/resume + * cycle). */ -static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) +static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; long err; @@ -869,11 +1198,12 @@ static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_caps.n_ext_ts = 0; adapter->ptp_caps.n_per_out = 0; adapter->ptp_caps.pps = 1; - adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq; + adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599; adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime; adapter->ptp_caps.settime64 = ixgbe_ptp_settime; adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; + adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_x540; break; case ixgbe_mac_82599EB: snprintf(adapter->ptp_caps.name, @@ -885,14 +1215,31 @@ static int ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_caps.n_ext_ts = 0; adapter->ptp_caps.n_per_out = 0; adapter->ptp_caps.pps = 0; - adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq; + adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599; + adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; + adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime; + adapter->ptp_caps.settime64 = ixgbe_ptp_settime; + adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; + break; + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 30000000; + adapter->ptp_caps.n_alarm = 0; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.n_per_out = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_X550; adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; adapter->ptp_caps.gettime64 = ixgbe_ptp_gettime; adapter->ptp_caps.settime64 = ixgbe_ptp_settime; adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; + adapter->ptp_setup_sdp = NULL; break; default: adapter->ptp_clock = NULL; + adapter->ptp_setup_sdp = NULL; return -EOPNOTSUPP; } @@ -961,18 +1308,13 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter) if (!test_and_clear_bit(__IXGBE_PTP_RUNNING, &adapter->state)) return; - /* since this might be called in suspend, we don't clear the state, - * but simply reset the auxiliary PPS signal control register - */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_TSAUXC, 0x0); + adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED; + if (adapter->ptp_setup_sdp) + adapter->ptp_setup_sdp(adapter); /* ensure that we cancel any pending PTP Tx work item in progress */ cancel_work_sync(&adapter->ptp_tx_work); - if (adapter->ptp_tx_skb) { - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); - } + ixgbe_ptp_clear_tx_timestamp(adapter); } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index fcd8b27a0ccb..31de6cf7adb0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2014 Intel Corporation. + Copyright(c) 1999 - 2015 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -130,6 +130,38 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter) return -ENOMEM; } +/** + * ixgbe_get_vfs - Find and take references to all vf devices + * @adapter: Pointer to adapter struct + */ +static void ixgbe_get_vfs(struct ixgbe_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + u16 vendor = pdev->vendor; + struct pci_dev *vfdev; + int vf = 0; + u16 vf_id; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) + return; + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id); + + vfdev = pci_get_device(vendor, vf_id, NULL); + for (; vfdev; vfdev = pci_get_device(vendor, vf_id, vfdev)) { + if (!vfdev->is_virtfn) + continue; + if (vfdev->physfn != pdev) + continue; + if (vf >= adapter->num_vfs) + continue; + pci_dev_get(vfdev); + adapter->vfinfo[vf].vfdev = vfdev; + ++vf; + } +} + /* Note this function is called when the user wants to enable SR-IOV * VFs using the now deprecated module parameter */ @@ -170,8 +202,10 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter) } } - if (!__ixgbe_enable_sriov(adapter)) + if (!__ixgbe_enable_sriov(adapter)) { + ixgbe_get_vfs(adapter); return; + } /* If we have gotten to this point then there is no memory available * to manage the VF devices - print message and bail. @@ -184,6 +218,7 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter) #endif /* #ifdef CONFIG_PCI_IOV */ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { + unsigned int num_vfs = adapter->num_vfs, vf; struct ixgbe_hw *hw = &adapter->hw; u32 gpie; u32 vmdctl; @@ -192,6 +227,16 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) /* set num VFs to 0 to prevent access to vfinfo */ adapter->num_vfs = 0; + /* put the reference to all of the vf devices */ + for (vf = 0; vf < num_vfs; ++vf) { + struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev; + + if (!vfdev) + continue; + adapter->vfinfo[vf].vfdev = NULL; + pci_dev_put(vfdev); + } + /* free VF control structures */ kfree(adapter->vfinfo); adapter->vfinfo = NULL; @@ -289,6 +334,7 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) e_dev_warn("Failed to enable PCI sriov: %d\n", err); return err; } + ixgbe_get_vfs(adapter); ixgbe_sriov_reinit(adapter); return num_vfs; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 995f03107eac..1329eddfc9ce 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1020,6 +1020,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_TXSTMPH 0x08C08 /* Tx timestamp value High - RO */ #define IXGBE_SYSTIML 0x08C0C /* System time register Low - RO */ #define IXGBE_SYSTIMH 0x08C10 /* System time register High - RO */ +#define IXGBE_SYSTIMR 0x08C58 /* System time register Residue - RO */ #define IXGBE_TIMINCA 0x08C14 /* Increment attributes register - RW */ #define IXGBE_TIMADJL 0x08C18 /* Time Adjustment Offset register Low - RW */ #define IXGBE_TIMADJH 0x08C1C /* Time Adjustment Offset register High - RW */ @@ -1036,6 +1037,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_AUXSTMPH0 0x08C40 /* Auxiliary Time Stamp 0 register High - RO */ #define IXGBE_AUXSTMPL1 0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */ #define IXGBE_AUXSTMPH1 0x08C48 /* Auxiliary Time Stamp 1 register High - RO */ +#define IXGBE_TSIM 0x08C68 /* TimeSync Interrupt Mask Register - RW */ /* Diagnostic Registers */ #define IXGBE_RDSTATCTL 0x02C20 @@ -1345,7 +1347,10 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK 0xFF01 /* int chip-wide mask */ #define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG 0xFC01 /* int chip-wide mask */ #define IXGBE_MDIO_GLOBAL_ALARM_1 0xCC00 /* Global alarm 1 */ +#define IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT 0x0010 /* device fault */ #define IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL 0x4000 /* high temp failure */ +#define IXGBE_MDIO_GLOBAL_FAULT_MSG 0xC850 /* global fault msg */ +#define IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP 0x8007 /* high temp failure */ #define IXGBE_MDIO_GLOBAL_INT_MASK 0xD400 /* Global int mask */ /* autoneg vendor alarm int enable */ #define IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN 0x1000 @@ -1353,6 +1358,7 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN 0x1 /* vendor alarm int enable */ #define IXGBE_MDIO_GLOBAL_STD_ALM2_INT 0x200 /* vendor alarm2 int mask */ #define IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN 0x4000 /* int high temp enable */ +#define IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN 0x0010 /*int dev fault enable */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR 0xC30A /* PHY_XS SDA/SCL Addr Reg */ #define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA 0xC30B /* PHY_XS SDA/SCL Data Reg */ @@ -2209,6 +2215,7 @@ enum { #define IXGBE_TSAUXC_EN_CLK 0x00000004 #define IXGBE_TSAUXC_SYNCLK 0x00000008 #define IXGBE_TSAUXC_SDP0_INT 0x00000040 +#define IXGBE_TSAUXC_DISABLE_SYSTIME 0x80000000 #define IXGBE_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ #define IXGBE_TSYNCTXCTL_ENABLED 0x00000010 /* Tx timestamping enabled */ @@ -2218,8 +2225,12 @@ enum { #define IXGBE_TSYNCRXCTL_TYPE_L2_V2 0x00 #define IXGBE_TSYNCRXCTL_TYPE_L4_V1 0x02 #define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 +#define IXGBE_TSYNCRXCTL_TYPE_ALL 0x08 #define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2 0x0A #define IXGBE_TSYNCRXCTL_ENABLED 0x00000010 /* Rx Timestamping enabled */ +#define IXGBE_TSYNCRXCTL_TSIP_UT_EN 0x00800000 /* Rx Timestamp in Packet */ + +#define IXGBE_TSIM_TXTS 0x00000002 #define IXGBE_RXMTRL_V1_CTRLT_MASK 0x000000FF #define IXGBE_RXMTRL_V1_SYNC_MSG 0x00 @@ -2332,6 +2343,7 @@ enum { #define IXGBE_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ #define IXGBE_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */ #define IXGBE_RXD_STAT_LLINT 0x800 /* Pkt caused Low Latency Interrupt */ +#define IXGBE_RXD_STAT_TSIP 0x08000 /* Time Stamp in packet buffer */ #define IXGBE_RXD_STAT_TS 0x10000 /* Time Stamp */ #define IXGBE_RXD_STAT_SECP 0x20000 /* Security Processing */ #define IXGBE_RXD_STAT_LB 0x40000 /* Loopback Status */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index c1d4584f6469..bf8225ceab8e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -57,8 +57,7 @@ s32 ixgbe_get_invariants_X540(struct ixgbe_hw *hw) struct ixgbe_phy_info *phy = &hw->phy; /* set_phy_power was set by default to NULL */ - if (!ixgbe_mng_present(hw)) - phy->ops.set_phy_power = ixgbe_set_copper_phy_power; + phy->ops.set_phy_power = ixgbe_set_copper_phy_power; mac->mcft_size = IXGBE_X540_MC_TBL_SIZE; mac->vft_size = IXGBE_X540_VFT_TBL_SIZE; @@ -110,13 +109,14 @@ mac_reset_top: ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear indicating reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST_MASK)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST_MASK) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index ebe0ac950b14..f4ef0d1a5dbe 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -26,6 +26,8 @@ #include "ixgbe_common.h" #include "ixgbe_phy.h" +static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *, ixgbe_link_speed); + static s32 ixgbe_get_invariants_X550_x(struct ixgbe_hw *hw) { struct ixgbe_mac_info *mac = &hw->mac; @@ -85,79 +87,6 @@ static s32 ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value) } /** - * ixgbe_check_cs4227_reg - Perform diag on a CS4227 register - * @hw: pointer to hardware structure - * @reg: the register to check - * - * Performs a diagnostic on a register in the CS4227 chip. Returns an error - * if it is not operating correctly. - * This function assumes that the caller has acquired the proper semaphore. - */ -static s32 ixgbe_check_cs4227_reg(struct ixgbe_hw *hw, u16 reg) -{ - s32 status; - u32 retry; - u16 reg_val; - - reg_val = (IXGBE_CS4227_EDC_MODE_DIAG << 1) | 1; - status = ixgbe_write_cs4227(hw, reg, reg_val); - if (status) - return status; - for (retry = 0; retry < IXGBE_CS4227_RETRIES; retry++) { - msleep(IXGBE_CS4227_CHECK_DELAY); - reg_val = 0xFFFF; - ixgbe_read_cs4227(hw, reg, ®_val); - if (!reg_val) - break; - } - if (reg_val) { - hw_err(hw, "CS4227 reg 0x%04X failed diagnostic\n", reg); - return status; - } - - return 0; -} - -/** - * ixgbe_get_cs4227_status - Return CS4227 status - * @hw: pointer to hardware structure - * - * Performs a diagnostic on the CS4227 chip. Returns an error if it is - * not operating correctly. - * This function assumes that the caller has acquired the proper semaphore. - */ -static s32 ixgbe_get_cs4227_status(struct ixgbe_hw *hw) -{ - s32 status; - u16 value = 0; - - /* Exit if the diagnostic has already been performed. */ - status = ixgbe_read_cs4227(hw, IXGBE_CS4227_SCRATCH, &value); - if (status) - return status; - if (value == IXGBE_CS4227_RESET_COMPLETE) - return 0; - - /* Check port 0. */ - status = ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_LINE_SPARE24_LSB); - if (status) - return status; - - status = ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_HOST_SPARE24_LSB); - if (status) - return status; - - /* Check port 1. */ - status = ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_LINE_SPARE24_LSB + - (1 << 12)); - if (status) - return status; - - return ixgbe_check_cs4227_reg(hw, IXGBE_CS4227_HOST_SPARE24_LSB + - (1 << 12)); -} - -/** * ixgbe_read_pe - Read register from port expander * @hw: pointer to hardware structure * @reg: register number to read @@ -326,13 +255,6 @@ static void ixgbe_check_cs4227(struct ixgbe_hw *hw) return; } - /* Is the CS4227 working correctly? */ - status = ixgbe_get_cs4227_status(hw); - if (status) { - hw_err(hw, "CS4227 status failed: %d", status); - goto out; - } - /* Record completion for next time. */ status = ixgbe_write_cs4227(hw, IXGBE_CS4227_SCRATCH, IXGBE_CS4227_RESET_COMPLETE); @@ -1257,31 +1179,71 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, if (status) return status; - /* Configure CS4227 LINE side to 10G SR. */ - slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12); - value = IXGBE_CS4227_SPEED_10G; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, slice, - value); - - /* Configure CS4227 for HOST connection rate then type. */ - slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12); - value = speed & IXGBE_LINK_SPEED_10GB_FULL ? - IXGBE_CS4227_SPEED_10G : IXGBE_CS4227_SPEED_1G; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, slice, - value); + if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { + /* Configure CS4227 LINE side to 10G SR. */ + slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12); + value = IXGBE_CS4227_SPEED_10G; + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; - slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12); - if (setup_linear) - value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; - else + slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12); value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; - status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, slice, - value); + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; + + /* Configure CS4227 for HOST connection rate then type. */ + slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12); + value = speed & IXGBE_LINK_SPEED_10GB_FULL ? + IXGBE_CS4227_SPEED_10G : IXGBE_CS4227_SPEED_1G; + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; - /* If internal link mode is XFI, then setup XFI internal link. */ - if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) + slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12); + if (setup_linear) + value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; + else + value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; + + /* Setup XFI internal link. */ status = ixgbe_setup_ixfi_x550em(hw, &speed); + if (status) { + hw_dbg(hw, "setup_ixfi failed with %d\n", status); + return status; + } + } else { + /* Configure internal PHY for KR/KX. */ + status = ixgbe_setup_kr_speed_x550em(hw, speed); + if (status) { + hw_dbg(hw, "setup_kr_speed failed with %d\n", status); + return status; + } + + /* Configure CS4227 LINE side to proper mode. */ + slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12); + if (setup_linear) + value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1; + else + value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1; + status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227, + slice, value); + if (status) + goto i2c_err; + } + return 0; + +i2c_err: + hw_dbg(hw, "combined i2c access failed with %d\n", status); return status; } @@ -1482,7 +1444,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) IXGBE_MDIO_GLOBAL_ALARM_1_INT))) return status; - /* High temperature failure alarm triggered */ + /* Global alarm triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_ALARM_1, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); @@ -1496,6 +1458,21 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc) ixgbe_set_copper_phy_power(hw, false); return IXGBE_ERR_OVERTEMP; } + if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) { + /* device fault alarm triggered */ + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_FAULT_MSG, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + ®); + if (status) + return status; + + /* if device fault was due to high temp alarm handle and exit */ + if (reg == IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP) { + /* power down the PHY in case the PHY FW didn't */ + ixgbe_set_copper_phy_power(hw, false); + return IXGBE_ERR_OVERTEMP; + } + } /* Vendor alarm 2 triggered */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG, @@ -1549,14 +1526,15 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) if (status) return status; - /* Enables high temperature failure alarm */ + /* Enable high temperature failure and global fault alarms */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®); if (status) return status; - reg |= IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN; + reg |= (IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN | + IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN); status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, @@ -1765,6 +1743,12 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return IXGBE_ERR_CONFIG; + if (hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) { + speed = IXGBE_LINK_SPEED_10GB_FULL | + IXGBE_LINK_SPEED_1GB_FULL; + return ixgbe_setup_kr_speed_x550em(hw, speed); + } + /* If link is not up, then there is no setup necessary so return */ status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up); if (status) @@ -1969,7 +1953,6 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw) static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) { struct ixgbe_phy_info *phy = &hw->phy; - ixgbe_link_speed speed; s32 ret_val; hw->mac.ops.set_lan_id(hw); @@ -1982,13 +1965,6 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) * to determine internal PHY mode. */ phy->nw_mng_if_sel = IXGBE_READ_REG(hw, IXGBE_NW_MNG_IF_SEL); - - /* If internal PHY mode is KR, then initialize KR link */ - if (phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE) { - speed = IXGBE_LINK_SPEED_10GB_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - ret_val = ixgbe_setup_kr_speed_x550em(hw, speed); - } } /* Identify the PHY or SFP module */ @@ -2020,14 +1996,8 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) /* If internal link mode is XFI, then setup iXFI internal link, * else setup KR now. */ - if (!(phy->nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { - phy->ops.setup_internal_link = - ixgbe_setup_internal_phy_t_x550em; - } else { - speed = IXGBE_LINK_SPEED_10GB_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - ret_val = ixgbe_setup_kr_speed_x550em(hw, speed); - } + phy->ops.setup_internal_link = + ixgbe_setup_internal_phy_t_x550em; /* setup SW LPLU only for first revision */ if (!(IXGBE_FUSES0_REV1 & IXGBE_READ_REG(hw, @@ -2176,13 +2146,14 @@ mac_reset_top: ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); IXGBE_WRITE_FLUSH(hw); + usleep_range(1000, 1200); /* Poll for reset bit to self-clear meaning reset is complete */ for (i = 0; i < 10; i++) { - udelay(1); ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); if (!(ctrl & IXGBE_CTRL_RST_MASK)) break; + udelay(1); } if (ctrl & IXGBE_CTRL_RST_MASK) { diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index d3e5f5b37999..c48aef613b0a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -774,7 +774,7 @@ static int ixgbevf_set_coalesce(struct net_device *netdev, adapter->tx_itr_setting = ec->tx_coalesce_usecs; if (adapter->tx_itr_setting == 1) - tx_itr_param = IXGBE_10K_ITR; + tx_itr_param = IXGBE_12K_ITR; else tx_itr_param = adapter->tx_itr_setting; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index ec3147279621..68ec7daa04fd 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -326,8 +326,7 @@ static inline bool ixgbevf_qv_disable(struct ixgbevf_q_vector *q_vector) #define IXGBE_MIN_RSC_ITR 24 #define IXGBE_100K_ITR 40 #define IXGBE_20K_ITR 200 -#define IXGBE_10K_ITR 400 -#define IXGBE_8K_ITR 500 +#define IXGBE_12K_ITR 336 /* Helper macros to switch between ints/sec and what the register uses. * And yes, it's the same math going both ways. The lowest value diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 592ff237d692..f098952d4fb4 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -59,7 +59,7 @@ static const char ixgbevf_driver_string[] = #define DRV_VERSION "2.12.1-k" const char ixgbevf_driver_version[] = DRV_VERSION; static char ixgbevf_copyright[] = - "Copyright (c) 2009 - 2012 Intel Corporation."; + "Copyright (c) 2009 - 2015 Intel Corporation."; static const struct ixgbevf_info *ixgbevf_info_tbl[] = { [board_82599_vf] = &ixgbevf_82599_vf_info, @@ -96,12 +96,14 @@ static int debug = -1; module_param(debug, int, 0); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); +static struct workqueue_struct *ixgbevf_wq; + static void ixgbevf_service_event_schedule(struct ixgbevf_adapter *adapter) { if (!test_bit(__IXGBEVF_DOWN, &adapter->state) && !test_bit(__IXGBEVF_REMOVING, &adapter->state) && !test_and_set_bit(__IXGBEVF_SERVICE_SCHED, &adapter->state)) - schedule_work(&adapter->service_task); + queue_work(ixgbevf_wq, &adapter->service_task); } static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter) @@ -1138,7 +1140,7 @@ static void ixgbevf_configure_msix(struct ixgbevf_adapter *adapter) if (q_vector->tx.ring && !q_vector->rx.ring) { /* Tx only vector */ if (adapter->tx_itr_setting == 1) - q_vector->itr = IXGBE_10K_ITR; + q_vector->itr = IXGBE_12K_ITR; else q_vector->itr = adapter->tx_itr_setting; } else { @@ -1196,7 +1198,7 @@ static void ixgbevf_update_itr(struct ixgbevf_q_vector *q_vector, /* simple throttle rate management * 0-20MB/s lowest (100000 ints/s) * 20-100MB/s low (20000 ints/s) - * 100-1249MB/s bulk (8000 ints/s) + * 100-1249MB/s bulk (12000 ints/s) */ /* what was last interrupt timeslice? */ timepassed_us = q_vector->itr >> 2; @@ -1247,7 +1249,7 @@ static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector) break; case bulk_latency: default: - new_itr = IXGBE_8K_ITR; + new_itr = IXGBE_12K_ITR; break; } @@ -1288,7 +1290,7 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data) /* EIAM disabled interrupts (on this vector) for us */ if (q_vector->rx.ring || q_vector->tx.ring) - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); return IRQ_HANDLED; } @@ -1332,7 +1334,6 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) int txr_remaining = adapter->num_tx_queues; int i, j; int rqpv, tqpv; - int err = 0; q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; @@ -1345,7 +1346,7 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) for (; txr_idx < txr_remaining; v_start++, txr_idx++) map_vector_to_txq(adapter, v_start, txr_idx); - goto out; + return 0; } /* If we don't have enough vectors for a 1-to-1 @@ -1370,8 +1371,7 @@ static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) } } -out: - return err; + return 0; } /** @@ -1469,9 +1469,7 @@ static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter) **/ static int ixgbevf_request_irq(struct ixgbevf_adapter *adapter) { - int err = 0; - - err = ixgbevf_request_msix_irqs(adapter); + int err = ixgbevf_request_msix_irqs(adapter); if (err) hw_dbg(&adapter->hw, "request_irq failed, Error %d\n", err); @@ -1830,7 +1828,7 @@ static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; - int err = -EOPNOTSUPP; + int err; spin_lock_bh(&adapter->mbx_lock); @@ -2046,7 +2044,7 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) ixgbe_mbox_api_11, ixgbe_mbox_api_10, ixgbe_mbox_api_unknown }; - int err = 0, idx = 0; + int err, idx = 0; spin_lock_bh(&adapter->mbx_lock); @@ -2260,10 +2258,8 @@ void ixgbevf_reset(struct ixgbevf_adapter *adapter) } if (is_valid_ether_addr(adapter->hw.mac.addr)) { - memcpy(netdev->dev_addr, adapter->hw.mac.addr, - netdev->addr_len); - memcpy(netdev->perm_addr, adapter->hw.mac.addr, - netdev->addr_len); + ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); + ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } adapter->last_reset = jiffies; @@ -2421,7 +2417,7 @@ err_allocation: static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; - int err = 0; + int err; int vector, v_budget; /* It's easy to be greedy for MSI-X vectors, but it really @@ -2439,26 +2435,21 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) */ adapter->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), GFP_KERNEL); - if (!adapter->msix_entries) { - err = -ENOMEM; - goto out; - } + if (!adapter->msix_entries) + return -ENOMEM; for (vector = 0; vector < v_budget; vector++) adapter->msix_entries[vector].entry = vector; err = ixgbevf_acquire_msix_vectors(adapter, v_budget); if (err) - goto out; + return err; err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); if (err) - goto out; - - err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + return err; -out: - return err; + return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); } /** @@ -2483,9 +2474,6 @@ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) q_vector->v_idx = q_idx; netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64); -#ifdef CONFIG_NET_RX_BUSY_POLL - napi_hash_add(&q_vector->napi); -#endif adapter->q_vector[q_idx] = q_vector; } @@ -2662,13 +2650,14 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter) else if (is_zero_ether_addr(adapter->hw.mac.addr)) dev_info(&pdev->dev, "MAC address not assigned by administrator.\n"); - memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); + ether_addr_copy(netdev->dev_addr, hw->mac.addr); } if (!is_valid_ether_addr(netdev->dev_addr)) { dev_info(&pdev->dev, "Assigning random MAC address\n"); eth_hw_addr_random(netdev); - memcpy(hw->mac.addr, netdev->dev_addr, netdev->addr_len); + ether_addr_copy(hw->mac.addr, netdev->dev_addr); + ether_addr_copy(hw->mac.perm_addr, netdev->dev_addr); } /* Enable dynamic interrupt throttling rates */ @@ -3355,6 +3344,7 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, if (skb->ip_summed == CHECKSUM_PARTIAL) { u8 l4_hdr = 0; + __be16 frag_off; switch (first->protocol) { case htons(ETH_P_IP): @@ -3365,13 +3355,16 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, case htons(ETH_P_IPV6): vlan_macip_lens |= skb_network_header_len(skb); l4_hdr = ipv6_hdr(skb)->nexthdr; + if (likely(skb_network_header_len(skb) == + sizeof(struct ipv6hdr))) + break; + ipv6_skip_exthdr(skb, skb_network_offset(skb) + + sizeof(struct ipv6hdr), + &l4_hdr, &frag_off); + if (unlikely(frag_off)) + l4_hdr = NEXTHDR_FRAGMENT; break; default: - if (unlikely(net_ratelimit())) { - dev_warn(tx_ring->dev, - "partial checksum but proto=%x!\n", - first->protocol); - } break; } @@ -3393,16 +3386,18 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); + "partial checksum, l3 proto=%x, l4 proto=%x\n", + first->protocol, l4_hdr); } - break; + skb_checksum_help(skb); + goto no_csum; } /* update TX checksum flag */ first->tx_flags |= IXGBE_TX_FLAGS_CSUM; } +no_csum: /* vlan_macip_lens: MACLEN, VLAN tag */ vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; @@ -3698,8 +3693,8 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); + ether_addr_copy(netdev->dev_addr, addr->sa_data); + ether_addr_copy(hw->mac.addr, addr->sa_data); spin_lock_bh(&adapter->mbx_lock); @@ -4248,15 +4243,17 @@ static struct pci_driver ixgbevf_driver = { **/ static int __init ixgbevf_init_module(void) { - int ret; - pr_info("%s - version %s\n", ixgbevf_driver_string, ixgbevf_driver_version); pr_info("%s\n", ixgbevf_copyright); + ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name); + if (!ixgbevf_wq) { + pr_err("%s: Failed to create workqueue\n", ixgbevf_driver_name); + return -ENOMEM; + } - ret = pci_register_driver(&ixgbevf_driver); - return ret; + return pci_register_driver(&ixgbevf_driver); } module_init(ixgbevf_init_module); @@ -4270,6 +4267,10 @@ module_init(ixgbevf_init_module); static void __exit ixgbevf_exit_module(void) { pci_unregister_driver(&ixgbevf_driver); + if (ixgbevf_wq) { + destroy_workqueue(ixgbevf_wq); + ixgbevf_wq = NULL; + } } #ifdef DEBUG diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 427f3605cbfc..61a98f4c5746 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -117,7 +117,9 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw) msgbuf[0] != (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_NACK)) return IXGBE_ERR_INVALID_MAC_ADDR; - ether_addr_copy(hw->mac.perm_addr, addr); + if (msgbuf[0] == (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_ACK)) + ether_addr_copy(hw->mac.perm_addr, addr); + hw->mac.mc_filter_type = msgbuf[IXGBE_VF_MC_TYPE_WORD]; return 0; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 4182290fdbcf..4eba2ed53052 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -3257,25 +3257,20 @@ static struct platform_driver mv643xx_eth_driver = { }, }; +static struct platform_driver * const drivers[] = { + &mv643xx_eth_shared_driver, + &mv643xx_eth_driver, +}; + static int __init mv643xx_eth_init_module(void) { - int rc; - - rc = platform_driver_register(&mv643xx_eth_shared_driver); - if (!rc) { - rc = platform_driver_register(&mv643xx_eth_driver); - if (rc) - platform_driver_unregister(&mv643xx_eth_shared_driver); - } - - return rc; + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } module_init(mv643xx_eth_init_module); static void __exit mv643xx_eth_cleanup_module(void) { - platform_driver_unregister(&mv643xx_eth_driver); - platform_driver_unregister(&mv643xx_eth_shared_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } module_exit(mv643xx_eth_cleanup_module); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ed622fa29dfa..528c2544389b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -371,7 +371,7 @@ struct mvneta_port { unsigned int duplex; unsigned int speed; unsigned int tx_csum_limit; - int use_inband_status:1; + unsigned int use_inband_status:1; u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)]; }; @@ -973,6 +973,44 @@ static void mvneta_set_other_mcast_table(struct mvneta_port *pp, int queue) mvreg_write(pp, MVNETA_DA_FILT_OTH_MCAST + offset, val); } +static void mvneta_set_autoneg(struct mvneta_port *pp, int enable) +{ + u32 val; + + if (enable) { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_FORCE_LINK_PASS | + MVNETA_GMAC_FORCE_LINK_DOWN | + MVNETA_GMAC_AN_FLOW_CTRL_EN); + val |= MVNETA_GMAC_INBAND_AN_ENABLE | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN; + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); + + val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); + val |= MVNETA_GMAC_1MS_CLOCK_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); + + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + val |= MVNETA_GMAC2_INBAND_AN_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); + } else { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE | + MVNETA_GMAC_AN_SPEED_EN | + MVNETA_GMAC_AN_DUPLEX_EN); + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); + + val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); + val &= ~MVNETA_GMAC_1MS_CLOCK_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); + + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + val &= ~MVNETA_GMAC2_INBAND_AN_ENABLE; + mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); + } +} + /* This method sets defaults to the NETA port: * Clears interrupt Cause and Mask registers. * Clears all MAC tables. @@ -1058,26 +1096,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) val &= ~MVNETA_PHY_POLLING_ENABLE; mvreg_write(pp, MVNETA_UNIT_CONTROL, val); - if (pp->use_inband_status) { - val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); - val &= ~(MVNETA_GMAC_FORCE_LINK_PASS | - MVNETA_GMAC_FORCE_LINK_DOWN | - MVNETA_GMAC_AN_FLOW_CTRL_EN); - val |= MVNETA_GMAC_INBAND_AN_ENABLE | - MVNETA_GMAC_AN_SPEED_EN | - MVNETA_GMAC_AN_DUPLEX_EN; - mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); - val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER); - val |= MVNETA_GMAC_1MS_CLOCK_ENABLE; - mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val); - } else { - val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); - val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE | - MVNETA_GMAC_AN_SPEED_EN | - MVNETA_GMAC_AN_DUPLEX_EN); - mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); - } - + mvneta_set_autoneg(pp, pp->use_inband_status); mvneta_set_ucast_table(pp, -1); mvneta_set_special_mcast_table(pp, -1); mvneta_set_other_mcast_table(pp, -1); @@ -2943,10 +2962,43 @@ int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct mvneta_port *pp = netdev_priv(dev); + struct phy_device *phydev = pp->phy_dev; - if (!pp->phy_dev) + if (!phydev) return -ENODEV; + if ((cmd->autoneg == AUTONEG_ENABLE) != pp->use_inband_status) { + u32 val; + + mvneta_set_autoneg(pp, cmd->autoneg == AUTONEG_ENABLE); + + if (cmd->autoneg == AUTONEG_DISABLE) { + val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); + val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED | + MVNETA_GMAC_CONFIG_GMII_SPEED | + MVNETA_GMAC_CONFIG_FULL_DUPLEX); + + if (phydev->duplex) + val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX; + + if (phydev->speed == SPEED_1000) + val |= MVNETA_GMAC_CONFIG_GMII_SPEED; + else if (phydev->speed == SPEED_100) + val |= MVNETA_GMAC_CONFIG_MII_SPEED; + + mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); + } + + pp->use_inband_status = (cmd->autoneg == AUTONEG_ENABLE); + netdev_info(pp->dev, "autoneg status set to %i\n", + pp->use_inband_status); + + if (netif_running(dev)) { + mvneta_port_down(pp); + mvneta_port_up(pp); + } + } + return phy_ethtool_sset(pp->phy_dev, cmd); } @@ -3230,9 +3282,6 @@ static int mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) return -EINVAL; } - if (pp->use_inband_status) - ctrl |= MVNETA_GMAC2_INBAND_AN_ENABLE; - /* Cancel Port Reset */ ctrl &= ~MVNETA_GMAC2_PORT_RESET; mvreg_write(pp, MVNETA_GMAC_CTRL_2, ctrl); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index eb8a4988de63..af975a2b74c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -155,13 +155,11 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq; cq->mcq.event = mlx4_en_cq_event; - if (cq->is_tx) { - netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, - NAPI_POLL_WEIGHT); - } else { + if (cq->is_tx) + netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, + NAPI_POLL_WEIGHT); + else netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); - napi_hash_add(&cq->napi); - } napi_enable(&cq->napi); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index ddb5541882f5..dd84cabb2a51 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -337,11 +337,7 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset) case ETH_SS_STATS: return bitmap_iterator_count(&it) + (priv->tx_ring_num * 2) + -#ifdef CONFIG_NET_RX_BUSY_POLL - (priv->rx_ring_num * 5); -#else (priv->rx_ring_num * 2); -#endif case ETH_SS_TEST: return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2; @@ -408,11 +404,6 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, for (i = 0; i < priv->rx_ring_num; i++) { data[index++] = priv->rx_ring[i]->packets; data[index++] = priv->rx_ring[i]->bytes; -#ifdef CONFIG_NET_RX_BUSY_POLL - data[index++] = priv->rx_ring[i]->yields; - data[index++] = priv->rx_ring[i]->misses; - data[index++] = priv->rx_ring[i]->cleaned; -#endif } spin_unlock_bh(&priv->stats_lock); @@ -486,14 +477,6 @@ static void mlx4_en_get_strings(struct net_device *dev, "rx%d_packets", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_bytes", i); -#ifdef CONFIG_NET_RX_BUSY_POLL - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_napi_yield", i); - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_misses", i); - sprintf(data + (index++) * ETH_GSTRING_LEN, - "rx%d_cleaned", i); -#endif } break; case ETH_SS_PRIV_FLAGS: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 886e1bc86374..659209ff7af6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -69,34 +69,6 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } -#ifdef CONFIG_NET_RX_BUSY_POLL -/* must be called with local_bh_disable()d */ -static int mlx4_en_low_latency_recv(struct napi_struct *napi) -{ - struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); - struct net_device *dev = cq->dev; - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; - int done; - - if (!priv->port_up) - return LL_FLUSH_FAILED; - - if (!mlx4_en_cq_lock_poll(cq)) - return LL_FLUSH_BUSY; - - done = mlx4_en_process_rx_cq(dev, cq, 4); - if (likely(done)) - rx_ring->cleaned += done; - else - rx_ring->misses++; - - mlx4_en_cq_unlock_poll(cq); - - return done; -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - #ifdef CONFIG_RFS_ACCEL struct mlx4_en_filter { @@ -1561,8 +1533,6 @@ int mlx4_en_start_port(struct net_device *dev) for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; - mlx4_en_cq_init_lock(cq); - err = mlx4_en_init_affinity_hint(priv, i); if (err) { en_err(priv, "Failed preparing IRQ affinity hint\n"); @@ -1859,13 +1829,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) for (i = 0; i < priv->rx_ring_num; i++) { struct mlx4_en_cq *cq = priv->rx_cq[i]; - local_bh_disable(); - while (!mlx4_en_cq_lock_napi(cq)) { - pr_info("CQ %d locked\n", i); - mdelay(1); - } - local_bh_enable(); - napi_synchronize(&cq->napi); mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); @@ -2504,9 +2467,6 @@ static const struct net_device_ops mlx4_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif -#ifdef CONFIG_NET_RX_BUSY_POLL - .ndo_busy_poll = mlx4_en_low_latency_recv, -#endif .ndo_get_phys_port_id = mlx4_en_get_phys_port_id, #ifdef CONFIG_MLX4_EN_VXLAN .ndo_add_vxlan_port = mlx4_en_add_vxlan_port, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index e7a5000aa12c..41440b2b20a3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -873,10 +873,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud * - TCP/IP (v4) * - without IP options * - not an IP fragment - * - no LLS polling in progress */ - if (!mlx4_en_cq_busy_polling(cq) && - (dev->features & NETIF_F_GRO)) { + if (dev->features & NETIF_F_GRO) { struct sk_buff *gro_skb = napi_get_frags(&cq->napi); if (!gro_skb) goto next; @@ -927,7 +925,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud PKT_HASH_TYPE_L3); skb_record_rx_queue(gro_skb, cq->ring); - skb_mark_napi_id(gro_skb, &cq->napi); if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { timestamp = mlx4_en_get_cqe_ts(cqe); @@ -990,13 +987,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud timestamp); } - skb_mark_napi_id(skb, &cq->napi); - - if (!mlx4_en_cq_busy_polling(cq)) - napi_gro_receive(&cq->napi, skb); - else - netif_receive_skb(skb); - + napi_gro_receive(&cq->napi, skb); next: for (nr = 0; nr < priv->num_frags; nr++) mlx4_en_free_frag(priv, frags, nr); @@ -1038,13 +1029,8 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) struct mlx4_en_priv *priv = netdev_priv(dev); int done; - if (!mlx4_en_cq_lock_napi(cq)) - return budget; - done = mlx4_en_process_rx_cq(dev, cq, budget); - mlx4_en_cq_unlock_napi(cq); - /* If we used up all the quota - we're probably not done yet... */ if (done == budget) { const struct cpumask *aff; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index c41f15102ae0..35de7d2e6b34 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -320,11 +320,6 @@ struct mlx4_en_rx_ring { void *rx_info; unsigned long bytes; unsigned long packets; -#ifdef CONFIG_NET_RX_BUSY_POLL - unsigned long yields; - unsigned long misses; - unsigned long cleaned; -#endif unsigned long csum_ok; unsigned long csum_none; unsigned long csum_complete; @@ -347,18 +342,6 @@ struct mlx4_en_cq { struct mlx4_cqe *buf; #define MLX4_EN_OPCODE_ERROR 0x1e -#ifdef CONFIG_NET_RX_BUSY_POLL - unsigned int state; -#define MLX4_EN_CQ_STATE_IDLE 0 -#define MLX4_EN_CQ_STATE_NAPI 1 /* NAPI owns this CQ */ -#define MLX4_EN_CQ_STATE_POLL 2 /* poll owns this CQ */ -#define MLX4_CQ_LOCKED (MLX4_EN_CQ_STATE_NAPI | MLX4_EN_CQ_STATE_POLL) -#define MLX4_EN_CQ_STATE_NAPI_YIELD 4 /* NAPI yielded this CQ */ -#define MLX4_EN_CQ_STATE_POLL_YIELD 8 /* poll yielded this CQ */ -#define CQ_YIELD (MLX4_EN_CQ_STATE_NAPI_YIELD | MLX4_EN_CQ_STATE_POLL_YIELD) -#define CQ_USER_PEND (MLX4_EN_CQ_STATE_POLL | MLX4_EN_CQ_STATE_POLL_YIELD) - spinlock_t poll_lock; /* protects from LLS/napi conflicts */ -#endif /* CONFIG_NET_RX_BUSY_POLL */ struct irq_desc *irq_desc; }; @@ -622,115 +605,6 @@ static inline struct mlx4_cqe *mlx4_en_get_cqe(void *buf, int idx, int cqe_sz) return buf + idx * cqe_sz; } -#ifdef CONFIG_NET_RX_BUSY_POLL -static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) -{ - spin_lock_init(&cq->poll_lock); - cq->state = MLX4_EN_CQ_STATE_IDLE; -} - -/* called from the device poll rutine to get ownership of a cq */ -static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) -{ - int rc = true; - spin_lock(&cq->poll_lock); - if (cq->state & MLX4_CQ_LOCKED) { - WARN_ON(cq->state & MLX4_EN_CQ_STATE_NAPI); - cq->state |= MLX4_EN_CQ_STATE_NAPI_YIELD; - rc = false; - } else - /* we don't care if someone yielded */ - cq->state = MLX4_EN_CQ_STATE_NAPI; - spin_unlock(&cq->poll_lock); - return rc; -} - -/* returns true is someone tried to get the cq while napi had it */ -static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) -{ - int rc = false; - spin_lock(&cq->poll_lock); - WARN_ON(cq->state & (MLX4_EN_CQ_STATE_POLL | - MLX4_EN_CQ_STATE_NAPI_YIELD)); - - if (cq->state & MLX4_EN_CQ_STATE_POLL_YIELD) - rc = true; - cq->state = MLX4_EN_CQ_STATE_IDLE; - spin_unlock(&cq->poll_lock); - return rc; -} - -/* called from mlx4_en_low_latency_poll() */ -static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) -{ - int rc = true; - spin_lock_bh(&cq->poll_lock); - if ((cq->state & MLX4_CQ_LOCKED)) { - struct net_device *dev = cq->dev; - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; - - cq->state |= MLX4_EN_CQ_STATE_POLL_YIELD; - rc = false; - rx_ring->yields++; - } else - /* preserve yield marks */ - cq->state |= MLX4_EN_CQ_STATE_POLL; - spin_unlock_bh(&cq->poll_lock); - return rc; -} - -/* returns true if someone tried to get the cq while it was locked */ -static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) -{ - int rc = false; - spin_lock_bh(&cq->poll_lock); - WARN_ON(cq->state & (MLX4_EN_CQ_STATE_NAPI)); - - if (cq->state & MLX4_EN_CQ_STATE_POLL_YIELD) - rc = true; - cq->state = MLX4_EN_CQ_STATE_IDLE; - spin_unlock_bh(&cq->poll_lock); - return rc; -} - -/* true if a socket is polling, even if it did not get the lock */ -static inline bool mlx4_en_cq_busy_polling(struct mlx4_en_cq *cq) -{ - WARN_ON(!(cq->state & MLX4_CQ_LOCKED)); - return cq->state & CQ_USER_PEND; -} -#else -static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) -{ -} - -static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) -{ - return true; -} - -static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) -{ - return false; -} - -static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) -{ - return false; -} - -static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) -{ - return false; -} - -static inline bool mlx4_en_cq_busy_polling(struct mlx4_en_cq *cq) -{ - return false; -} -#endif /* CONFIG_NET_RX_BUSY_POLL */ - #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) void mlx4_en_update_loopback_state(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 26a68b8af2c5..a0755919ccaf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \ + mad.o transobj.o vport.o sriov.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o eswitch.o \ en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 22e72bf1ae48..89313d46952d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -465,6 +465,7 @@ enum { }; struct mlx5e_vlan_db { + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; u32 active_vlans_ft_ix[VLAN_N_VID]; u32 untagged_rule_ft_ix; u32 any_vlan_rule_ft_ix; @@ -564,7 +565,7 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq); -bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c index 22d603f78273..5b93c9c6e341 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c @@ -502,6 +502,49 @@ add_eth_addr_rule_out: return err; } +static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) +{ + struct net_device *ndev = priv->netdev; + int max_list_size; + int list_size; + u16 *vlans; + int vlan; + int err; + int i; + + list_size = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) + list_size++; + + max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); + + if (list_size > max_list_size) { + netdev_warn(ndev, + "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", + list_size, max_list_size); + list_size = max_list_size; + } + + vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL); + if (!vlans) + return -ENOMEM; + + i = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) { + if (i >= list_size) + break; + vlans[i++] = vlan; + } + + err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size); + if (err) + netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n", + err); + + kfree(vlans); + return err; +} + enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_UNTAGGED, MLX5E_VLAN_RULE_TYPE_ANY_VID, @@ -552,6 +595,10 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, 1); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ + err = mlx5e_vport_context_update_vlans(priv); + if (err) + goto add_vlan_rule_out; + ft_ix = &priv->vlan.active_vlans_ft_ix[vid]; MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, 1); @@ -588,6 +635,7 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, case MLX5E_VLAN_RULE_TYPE_MATCH_VID: mlx5_del_flow_table_entry(priv->ft.vlan, priv->vlan.active_vlans_ft_ix[vid]); + mlx5e_vport_context_update_vlans(priv); break; } } @@ -619,6 +667,8 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); + set_bit(vid, priv->vlan.active_vlans); + return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); } @@ -627,6 +677,8 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); + clear_bit(vid, priv->vlan.active_vlans); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); return 0; @@ -671,6 +723,91 @@ static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) netif_addr_unlock_bh(netdev); } +static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, + u8 addr_array[][ETH_ALEN], int size) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct net_device *ndev = priv->netdev; + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int i = 0; + int hi; + + addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + + if (is_uc) /* Make sure our own address is pushed first */ + ether_addr_copy(addr_array[i++], ndev->dev_addr); + else if (priv->eth_addr.broadcast_enabled) + ether_addr_copy(addr_array[i++], ndev->broadcast); + + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { + if (ether_addr_equal(ndev->dev_addr, hn->ai.addr)) + continue; + if (i >= size) + break; + ether_addr_copy(addr_array[i++], hn->ai.addr); + } +} + +static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, + int list_type) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct mlx5e_eth_addr_hash_node *hn; + u8 (*addr_array)[ETH_ALEN] = NULL; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int max_size; + int size; + int err; + int hi; + + size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0); + max_size = is_uc ? + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); + + addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) + size++; + + if (size > max_size) { + netdev_warn(priv->netdev, + "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", + is_uc ? "UC" : "MC", size, max_size); + size = max_size; + } + + if (size) { + addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!addr_array) { + err = -ENOMEM; + goto out; + } + mlx5e_fill_addr_array(priv, list_type, addr_array, size); + } + + err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size); +out: + if (err) + netdev_err(priv->netdev, + "Failed to modify vport %s list err(%d)\n", + is_uc ? "UC" : "MC", err); + kfree(addr_array); +} + +static void mlx5e_vport_context_update(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + + mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC); + mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC); + mlx5_modify_nic_vport_promisc(priv->mdev, 0, + ea->allmulti_enabled, + ea->promisc_enabled); +} + static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) { struct mlx5e_eth_addr_hash_node *hn; @@ -748,6 +885,8 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) ea->promisc_enabled = promisc_enabled; ea->allmulti_enabled = allmulti_enabled; ea->broadcast_enabled = broadcast_enabled; + + mlx5e_vport_context_update(priv); } void mlx5e_init_eth_addr(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1e52db32c73d..d67058afe87e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -32,6 +32,7 @@ #include <linux/mlx5/flow_table.h> #include "en.h" +#include "eswitch.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -63,7 +64,7 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv) u8 port_state; port_state = mlx5_query_vport_state(mdev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT); + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); if (port_state == VPORT_STATE_UP) netif_carrier_on(priv->netdev); @@ -1020,6 +1021,7 @@ err_close_tx_cqs: err_napi_del: netif_napi_del(&c->napi); + napi_hash_del(&c->napi); kfree(c); return err; @@ -1033,6 +1035,10 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); netif_napi_del(&c->napi); + + napi_hash_del(&c->napi); + synchronize_rcu(); + kfree(c); } @@ -1926,6 +1932,79 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) return err; } +static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); +} + +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, + vlan, qos); +} + +static int mlx5_vport_link2ifla(u8 esw_link) +{ + switch (esw_link) { + case MLX5_ESW_VPORT_ADMIN_STATE_DOWN: + return IFLA_VF_LINK_STATE_DISABLE; + case MLX5_ESW_VPORT_ADMIN_STATE_UP: + return IFLA_VF_LINK_STATE_ENABLE; + } + return IFLA_VF_LINK_STATE_AUTO; +} + +static int mlx5_ifla_link2vport(u8 ifla_link) +{ + switch (ifla_link) { + case IFLA_VF_LINK_STATE_DISABLE: + return MLX5_ESW_VPORT_ADMIN_STATE_DOWN; + case IFLA_VF_LINK_STATE_ENABLE: + return MLX5_ESW_VPORT_ADMIN_STATE_UP; + } + return MLX5_ESW_VPORT_ADMIN_STATE_AUTO; +} + +static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, + int link_state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, + mlx5_ifla_link2vport(link_state)); +} + +static int mlx5e_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); + if (err) + return err; + ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate); + return 0; +} + +static int mlx5e_get_vf_stats(struct net_device *dev, + int vf, struct ifla_vf_stats *vf_stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, + vf_stats); +} + static struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -1936,7 +2015,7 @@ static struct net_device_ops mlx5e_netdev_ops = { .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, .ndo_set_features = mlx5e_set_features, - .ndo_change_mtu = mlx5e_change_mtu, + .ndo_change_mtu = mlx5e_change_mtu }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -2023,7 +2102,7 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5_query_nic_vport_mac_address(priv->mdev, netdev->dev_addr); + mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr); } static void mlx5e_build_netdev(struct net_device *netdev) @@ -2036,6 +2115,14 @@ static void mlx5e_build_netdev(struct net_device *netdev) if (priv->params.num_tc > 1) mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue; + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + mlx5e_netdev_ops.ndo_set_vf_mac = mlx5e_set_vf_mac; + mlx5e_netdev_ops.ndo_set_vf_vlan = mlx5e_set_vf_vlan; + mlx5e_netdev_ops.ndo_get_vf_config = mlx5e_get_vf_config; + mlx5e_netdev_ops.ndo_set_vf_link_state = mlx5e_set_vf_link_state; + mlx5e_netdev_ops.ndo_get_vf_stats = mlx5e_get_vf_stats; + } + netdev->netdev_ops = &mlx5e_netdev_ops; netdev->watchdog_timeo = 15 * HZ; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index cf0098596e85..7c8c4088d1be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -33,6 +33,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> +#include <net/busy_poll.h> #include "en.h" static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, @@ -215,16 +216,16 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, be16_to_cpu(cqe->vlan_info)); } -bool mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) +int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - int i; + int work_done; /* avoid accessing cq (dma coherent memory) if not needed */ if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags)) - return false; + return 0; - for (i = 0; i < budget; i++) { + for (work_done = 0; work_done < budget; work_done++) { struct mlx5e_rx_wqe *wqe; struct mlx5_cqe64 *cqe; struct sk_buff *skb; @@ -269,10 +270,8 @@ wq_ll_pop: /* ensure cq space is freed before enabling more cqes */ wmb(); - if (i == budget) { + if (work_done == budget) set_bit(MLX5E_CQ_HAS_CQES, &cq->flags); - return true; - } - return false; + return work_done; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 2c7cb6755d1d..4ac8d716dbdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -54,6 +54,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, napi); bool busy = false; + int work_done; int i; clear_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); @@ -61,26 +62,26 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq); - busy |= mlx5e_poll_rx_cq(&c->rq.cq, budget); - + work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); + busy |= work_done == budget; busy |= mlx5e_post_rx_wqes(&c->rq); if (busy) return budget; - napi_complete(napi); + napi_complete_done(napi, work_done); /* avoid losing completion event during/after polling cqs */ if (test_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags)) { napi_schedule(napi); - return 0; + return work_done; } for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); mlx5e_cq_arm(&c->rq.cq); - return 0; + return work_done; } void mlx5e_completion_event(struct mlx5_core_cq *mcq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 713ead583347..23c244a7e5d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -35,6 +35,9 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif enum { MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), @@ -287,6 +290,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) break; #endif +#ifdef CONFIG_MLX5_CORE_EN + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); + break; +#endif default: mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); @@ -459,6 +467,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, pg)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT); + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && + MLX5_CAP_GEN(dev, vport_group_manager) && + mlx5_core_is_pf(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", &dev->priv.uuari.uars[0]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c new file mode 100644 index 000000000000..d8939e597c54 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -0,0 +1,1282 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/flow_table.h> +#include "mlx5_core.h" +#include "eswitch.h" + +#define UPLINK_VPORT 0xFFFF + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(dev, format, ...) \ + pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_warn(dev, format, ...) \ + pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +enum { + MLX5_ACTION_NONE = 0, + MLX5_ACTION_ADD = 1, + MLX5_ACTION_DEL = 2, +}; + +/* E-Switch UC L2 table hash node */ +struct esw_uc_addr { + struct l2addr_node node; + u32 table_index; + u32 vport; +}; + +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; +}; + +/* Vport UC/MC hash node */ +struct vport_addr { + struct l2addr_node node; + u8 action; + u32 vport; + struct mlx5_flow_rule *flow_rule; /* SRIOV only */ +}; + +enum { + UC_ADDR_CHANGE = BIT(0), + MC_ADDR_CHANGE = BIT(1), +}; + +/* Vport context events */ +#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ + MC_ADDR_CHANGE) + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, + u32 events_mask) +{ + int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]; + int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int err; + + memset(out, 0, sizeof(out)); + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_nic_vport_context_in, in, + opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); + + if (events_mask & UC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_uc_address_change, 1); + if (events_mask & MC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_mc_address_change, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + goto ex; + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto ex; + return 0; +ex: + return err; +} + +/* E-Switch vport context HW commands */ +static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + + MLX5_SET(query_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_esw_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); +} + +static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 *vlan, u8 *qos) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)]; + int err; + bool cvlan_strip; + bool cvlan_insert; + + memset(out, 0, sizeof(out)); + + *vlan = 0; + *qos = 0; + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -ENOTSUPP; + + err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out)); + if (err) + goto out; + + cvlan_strip = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.vport_cvlan_strip); + + cvlan_insert = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.vport_cvlan_insert); + + if (cvlan_strip || cvlan_insert) { + *vlan = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.cvlan_id); + *qos = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.cvlan_pcp); + } + + esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n", + vport, *vlan, *qos); +out: + return err; +} + +static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, + void *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)]; + + memset(out, 0, sizeof(out)); + + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + + MLX5_SET(modify_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); + + return mlx5_cmd_exec_check_status(dev, in, inlen, + out, sizeof(out)); +} + +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 vlan, u8 qos, bool set) +{ + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -ENOTSUPP; + + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n", + vport, vlan, qos, set); + + if (set) { + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_strip, 1); + /* insert only if no vlan in packet */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, 1); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_pcp, qos); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_id, vlan); + } + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_strip, 1); + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_insert, 1); + + return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in)); +} + +/* HW L2 Table (MPFS) management */ +static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, + u8 *mac, u8 vlan_valid, u16 vlan) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)]; + u8 *in_mac_addr; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(set_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid); + MLX5_SET(set_l2_table_entry_in, in, vlan, vlan); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(delete_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2_table->bitmap, l2_table->size); + if (*ix >= l2_table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2_table->bitmap); + + return err; +} + +static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix) +{ + __clear_bit(ix, l2_table->bitmap); +} + +static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac, + u8 vlan_valid, u16 vlan, + u32 *index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + int err; + + err = alloc_l2_table_index(l2_table, index); + if (err) + return err; + + err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan); + if (err) + free_l2_table_index(l2_table, *index); + + return err; +} + +static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + + del_l2_table_entry_cmd(dev, index); + free_l2_table_index(l2_table, index); +} + +/* E-Switch FDB flow steering */ +struct dest_node { + struct list_head list; + struct mlx5_flow_destination dest; +}; + +static int _mlx5_flow_rule_apply(struct mlx5_flow_rule *fr) +{ + bool was_valid = fr->valid; + struct dest_node *dest_n; + u32 dest_list_size = 0; + void *in_match_value; + u32 *flow_context; + u32 flow_index; + int err; + int i; + + if (list_empty(&fr->dest_list)) { + if (fr->valid) + mlx5_del_flow_table_entry(fr->ft, fr->fi); + fr->valid = false; + return 0; + } + + list_for_each_entry(dest_n, &fr->dest_list, list) + dest_list_size++; + + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct) * + dest_list_size); + if (!flow_context) + return -ENOMEM; + + MLX5_SET(flow_context, flow_context, flow_tag, fr->flow_tag); + MLX5_SET(flow_context, flow_context, action, fr->action); + MLX5_SET(flow_context, flow_context, destination_list_size, + dest_list_size); + + i = 0; + list_for_each_entry(dest_n, &fr->dest_list, list) { + void *dest_addr = MLX5_ADDR_OF(flow_context, flow_context, + destination[i++]); + + MLX5_SET(dest_format_struct, dest_addr, destination_type, + dest_n->dest.type); + MLX5_SET(dest_format_struct, dest_addr, destination_id, + dest_n->dest.vport_num); + } + + in_match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + memcpy(in_match_value, fr->match_value, MLX5_ST_SZ_BYTES(fte_match_param)); + + err = mlx5_add_flow_table_entry(fr->ft, fr->match_criteria_enable, + fr->match_criteria, flow_context, + &flow_index); + if (!err) { + if (was_valid) + mlx5_del_flow_table_entry(fr->ft, fr->fi); + fr->fi = flow_index; + fr->valid = true; + } + kfree(flow_context); + return err; +} + +static int mlx5_flow_rule_add_dest(struct mlx5_flow_rule *fr, + struct mlx5_flow_destination *new_dest) +{ + struct dest_node *dest_n; + int err; + + dest_n = kzalloc(sizeof(*dest_n), GFP_KERNEL); + if (!dest_n) + return -ENOMEM; + + memcpy(&dest_n->dest, new_dest, sizeof(dest_n->dest)); + mutex_lock(&fr->mutex); + list_add(&dest_n->list, &fr->dest_list); + err = _mlx5_flow_rule_apply(fr); + if (err) { + list_del(&dest_n->list); + kfree(dest_n); + } + mutex_unlock(&fr->mutex); + return err; +} + +static int mlx5_flow_rule_del_dest(struct mlx5_flow_rule *fr, + struct mlx5_flow_destination *dest) +{ + struct dest_node *dest_n; + struct dest_node *n; + int err; + + mutex_lock(&fr->mutex); + list_for_each_entry_safe(dest_n, n, &fr->dest_list, list) { + if (dest->vport_num == dest_n->dest.vport_num) + goto found; + } + mutex_unlock(&fr->mutex); + return -ENOENT; + +found: + list_del(&dest_n->list); + err = _mlx5_flow_rule_apply(fr); + mutex_unlock(&fr->mutex); + kfree(dest_n); + + return err; +} + +static struct mlx5_flow_rule *find_fr(struct mlx5_eswitch *esw, + u8 match_criteria_enable, + u32 *match_value) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *dmac_v; + + dmac_v = MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dmac_47_16); + + /* UNICAST FULL MATCH */ + if (!is_multicast_ether_addr(dmac_v)) + return NULL; + + /* MULTICAST FULL MATCH */ + esw_mc = l2addr_hash_find(hash, dmac_v, struct esw_mc_addr); + + return esw_mc ? esw_mc->uplink_rule : NULL; +} + +static struct mlx5_flow_rule *alloc_fr(void *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag) +{ + struct mlx5_flow_rule *fr = kzalloc(sizeof(*fr), GFP_KERNEL); + + if (!fr) + return NULL; + + fr->match_criteria = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + fr->match_value = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!fr->match_criteria || !fr->match_value) { + kfree(fr->match_criteria); + kfree(fr->match_value); + kfree(fr); + return NULL; + } + + memcpy(fr->match_criteria, match_criteria, MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(fr->match_value, match_value, MLX5_ST_SZ_BYTES(fte_match_param)); + fr->match_criteria_enable = match_criteria_enable; + fr->flow_tag = flow_tag; + fr->action = action; + + mutex_init(&fr->mutex); + INIT_LIST_HEAD(&fr->dest_list); + atomic_set(&fr->refcount, 0); + fr->ft = ft; + return fr; +} + +static void deref_fr(struct mlx5_flow_rule *fr) +{ + if (!atomic_dec_and_test(&fr->refcount)) + return; + + kfree(fr->match_criteria); + kfree(fr->match_value); + kfree(fr); +} + +static struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_eswitch *esw, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *fr; + int err; + + fr = find_fr(esw, match_criteria_enable, match_value); + fr = fr ? fr : alloc_fr(esw->fdb_table.fdb, match_criteria_enable, match_criteria, + match_value, action, flow_tag); + if (!fr) + return NULL; + + atomic_inc(&fr->refcount); + + err = mlx5_flow_rule_add_dest(fr, dest); + if (err) { + deref_fr(fr); + return NULL; + } + + return fr; +} + +static void mlx5_del_flow_rule(struct mlx5_flow_rule *fr, u32 vport) +{ + struct mlx5_flow_destination dest; + + dest.vport_num = vport; + mlx5_flow_rule_del_dest(fr, &dest); + deref_fr(fr); +} + +/* E-Switch FDB */ +static struct mlx5_flow_rule * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +{ + int match_header = MLX5_MATCH_OUTER_HEADERS; + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + u32 *match_v; + u32 *match_c; + u8 *dmac_v; + u8 *dmac_c; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + pr_warn("FDB: Failed to alloc match parameters\n"); + goto out; + } + dmac_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers.dmac_47_16); + dmac_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers.dmac_47_16); + + ether_addr_copy(dmac_v, mac); + /* Match criteria mask */ + memset(dmac_c, 0xff, 6); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + esw_debug(esw->dev, + "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", + dmac_v, dmac_c, vport); + flow_rule = + mlx5_add_flow_rule(esw, + match_header, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR_OR_NULL(flow_rule)) { + pr_warn( + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", + dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); + flow_rule = NULL; + } +out: + kfree(match_v); + kfree(match_c); + return flow_rule; +} + +static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table_group g; + struct mlx5_flow_table *fdb; + u8 *dmac; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + memset(&g, 0, sizeof(g)); + /* UC MC Full match rules*/ + g.log_sz = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + g.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g.match_criteria, + outer_headers.dmac_47_16); + /* Match criteria mask */ + memset(dmac, 0xff, 6); + + fdb = mlx5_create_flow_table(dev, 0, + MLX5_FLOW_TABLE_TYPE_ESWITCH, + 1, &g); + if (fdb) + esw_debug(dev, "ESW: FDB Table created fdb->id %d\n", mlx5_get_flow_table_id(fdb)); + else + esw_warn(dev, "ESW: Failed to create FDB Table\n"); + + esw->fdb_table.fdb = fdb; + return fdb ? 0 : -ENOMEM; +} + +static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.fdb) + return; + + esw_debug(esw->dev, "Destroy FDB Table fdb(%d)\n", + mlx5_get_flow_table_id(esw->fdb_table.fdb)); + mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw->fdb_table.fdb = NULL; +} + +/* E-Switch vport UC/MC lists management */ +typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, + struct vport_addr *vaddr); + +static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + int err; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (esw_uc) { + esw_warn(esw->dev, + "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", + mac, vport, esw_uc->vport); + return -EEXIST; + } + + esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL); + if (!esw_uc) + return -ENOMEM; + esw_uc->vport = vport; + + err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index); + if (err) + goto abort; + + if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + return err; +abort: + l2addr_hash_del(esw_uc); + return err; +} + +static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (!esw_uc || esw_uc->vport != vport) { + esw_debug(esw->dev, + "MAC(%pM) doesn't belong to vport (%d)\n", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + + del_l2_table_entry(esw->dev, esw_uc->table_index); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule, vport); + vaddr->flow_rule = NULL; + + l2addr_hash_del(esw_uc); + return 0; +} + +static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (esw_mc) + goto add; + + esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL); + if (!esw_mc) + return -ENOMEM; + + esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ + esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); +add: + esw_mc->refcnt++; + /* Forward MC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + esw_debug(esw->dev, + "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, + esw_mc->refcnt, esw_mc->uplink_rule); + return 0; +} + +static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to find eswitch MC addr for MAC(%pM) vport(%d)", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, + "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, esw_mc->refcnt, + esw_mc->uplink_rule); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule, vport); + vaddr->flow_rule = NULL; + + if (--esw_mc->refcnt) + return 0; + + if (esw_mc->uplink_rule) + mlx5_del_flow_rule(esw_mc->uplink_rule, UPLINK_VPORT); + + l2addr_hash_del(esw_mc); + return 0; +} + +/* Apply vport UC/MC list to HW l2 table and FDB table */ +static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + vport_addr_action vport_addr_add; + vport_addr_action vport_addr_del; + struct vport_addr *addr; + struct l2addr_node *node; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + vport_addr_add = is_uc ? esw_add_uc_addr : + esw_add_mc_addr; + vport_addr_del = is_uc ? esw_del_uc_addr : + esw_del_mc_addr; + + hash = is_uc ? vport->uc_list : vport->mc_list; + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + switch (addr->action) { + case MLX5_ACTION_ADD: + vport_addr_add(esw, addr); + addr->action = MLX5_ACTION_NONE; + break; + case MLX5_ACTION_DEL: + vport_addr_del(esw, addr); + l2addr_hash_del(addr); + break; + } + } +} + +/* Sync vport UC/MC list from vport context */ +static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + u8 (*mac_list)[ETH_ALEN]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int size; + int err; + int hi; + int i; + + size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) : + MLX5_MAX_MC_PER_VPORT(esw->dev); + + mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!mac_list) + return; + + hash = is_uc ? vport->uc_list : vport->mc_list; + + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type, + mac_list, &size); + if (err) + return; + esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", + vport_num, is_uc ? "UC" : "MC", size); + + for (i = 0; i < size; i++) { + if (is_uc && !is_valid_ether_addr(mac_list[i])) + continue; + + if (!is_uc && !is_multicast_ether_addr(mac_list[i])) + continue; + + addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); + if (addr) { + addr->action = MLX5_ACTION_NONE; + continue; + } + + addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add MAC(%pM) to vport[%d] DB\n", + mac_list[i], vport_num); + continue; + } + addr->vport = vport_num; + addr->action = MLX5_ACTION_ADD; + } + kfree(mac_list); +} + +static void esw_vport_change_handler(struct work_struct *work) +{ + struct mlx5_vport *vport = + container_of(work, struct mlx5_vport, vport_change_handler); + struct mlx5_core_dev *dev = vport->dev; + struct mlx5_eswitch *esw = dev->priv.eswitch; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", + vport->vport, mac); + + if (vport->enabled_events & UC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + } + + if (vport->enabled_events & MC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + } + + esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); + if (vport->enabled) + arm_vport_context_events_cmd(dev, vport->vport, + vport->enabled_events); +} + +static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, + int enable_events) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + WARN_ON(vport->enabled); + + esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_AUTO); + + /* Sync with current vport context */ + vport->enabled_events = enable_events; + esw_vport_change_handler(&vport->vport_change_handler); + + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = true; + spin_unlock_irqrestore(&vport->lock, flags); + + arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); + + esw->enabled_vports++; + esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); +} + +static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_node *tmp; + int hi; + + for_each_l2hash_node(node, tmp, vport->uc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_UC); + + for_each_l2hash_node(node, tmp, vport->mc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_MC); +} + +static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + if (!vport->enabled) + return; + + esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); + /* Mark this vport as disabled to discard new events */ + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = false; + vport->enabled_events = 0; + spin_unlock_irqrestore(&vport->lock, flags); + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_DOWN); + /* Wait for current already scheduled events to complete */ + flush_workqueue(esw->work_queue); + /* Disable events from this vport */ + arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + /* We don't assume VFs will cleanup after themselves */ + esw_cleanup_vport(esw, vport_num); + esw->enabled_vports--; +} + +/* Public E-Switch API */ +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) +{ + int err; + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + return -ENOTSUPP; + } + + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); + + esw_disable_vport(esw, 0); + + err = esw_create_fdb_table(esw, nvfs + 1); + if (err) + goto abort; + + for (i = 0; i <= nvfs; i++) + esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS); + + esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", + esw->enabled_vports); + return 0; + +abort: + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + return err; +} + +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) +{ + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", + esw->enabled_vports); + + for (i = 0; i < esw->total_vports; i++) + esw_disable_vport(esw, i); + + esw_destroy_fdb_table(esw); + + /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); +} + +int mlx5_eswitch_init(struct mlx5_core_dev *dev) +{ + int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + int total_vports = 1 + pci_sriov_get_totalvfs(dev->pdev); + struct mlx5_eswitch *esw; + int vport_num; + int err; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + esw_info(dev, + "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n", + total_vports, l2_table_size, + MLX5_MAX_UC_PER_VPORT(dev), + MLX5_MAX_MC_PER_VPORT(dev)); + + esw = kzalloc(sizeof(*esw), GFP_KERNEL); + if (!esw) + return -ENOMEM; + + esw->dev = dev; + + esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size), + sizeof(uintptr_t), GFP_KERNEL); + if (!esw->l2_table.bitmap) { + err = -ENOMEM; + goto abort; + } + esw->l2_table.size = l2_table_size; + + esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); + if (!esw->work_queue) { + err = -ENOMEM; + goto abort; + } + + esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport), + GFP_KERNEL); + if (!esw->vports) { + err = -ENOMEM; + goto abort; + } + + for (vport_num = 0; vport_num < total_vports; vport_num++) { + struct mlx5_vport *vport = &esw->vports[vport_num]; + + vport->vport = vport_num; + vport->dev = dev; + INIT_WORK(&vport->vport_change_handler, + esw_vport_change_handler); + spin_lock_init(&vport->lock); + } + + esw->total_vports = total_vports; + esw->enabled_vports = 0; + + dev->priv.eswitch = esw; + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + /* VF Vports will be enabled when SRIOV is enabled */ + return 0; +abort: + if (esw->work_queue) + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); + return err; +} + +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "cleanup\n"); + esw_disable_vport(esw, 0); + + esw->dev->priv.eswitch = NULL; + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); +} + +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) +{ + struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; + u16 vport_num = be16_to_cpu(vc_eqe->vport_num); + struct mlx5_vport *vport; + + if (!esw) { + pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", + vport_num); + return; + } + + vport = &esw->vports[vport_num]; + spin_lock(&vport->lock); + if (vport->enabled) + queue_work(esw->work_queue, &vport->vport_change_handler); + spin_unlock(&vport->lock); +} + +/* Vport Administration */ +#define ESW_ALLOWED(esw) \ + (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) +#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) + +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]) +{ + int err = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", + vport, err); + return err; + } + + return err; +} + +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state) +{ + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + return mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport, link_state); +} + +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct ifla_vf_info *ivi) +{ + u16 vlan; + u8 qos; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vport - 1; + + mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac); + ivi->linkstate = mlx5_query_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport); + query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos); + ivi->vlan = vlan; + ivi->qos = qos; + ivi->spoofchk = 0; + + return 0; +} + +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos) +{ + int set = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) + return -EINVAL; + + if (vlan || qos) + set = 1; + + return modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); +} + +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + int vport, + struct ifla_vf_stats *vf_stats) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + int err = 0; + u32 *out; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + + memset(out, 0, outlen); + err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); + if (err) + goto free_out; + + #define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) + + memset(vf_stats, 0, sizeof(*vf_stats)); + vf_stats->rx_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets) + + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + vf_stats->rx_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets) + + MLX5_GET_CTR(out, received_eth_multicast.octets) + + MLX5_GET_CTR(out, received_eth_broadcast.octets); + + vf_stats->tx_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + + vf_stats->tx_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + vf_stats->multicast = + MLX5_GET_CTR(out, received_eth_multicast.packets); + + vf_stats->broadcast = + MLX5_GET_CTR(out, received_eth_broadcast.packets); + +free_out: + kvfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h new file mode 100644 index 000000000000..02ff3eade026 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_ESWITCH_H__ +#define __MLX5_ESWITCH_H__ + +#include <linux/if_ether.h> +#include <linux/if_link.h> +#include <linux/mlx5/device.h> + +#define MLX5_MAX_UC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) + +#define MLX5_MAX_MC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) + +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +/* L2 -mac address based- hash helpers */ +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &hash[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &hash[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&ptr->node.hlist); \ + kfree(ptr); \ +}) + +struct mlx5_flow_rule { + void *ft; + u32 fi; + u8 match_criteria_enable; + u32 *match_criteria; + u32 *match_value; + u32 action; + u32 flow_tag; + bool valid; + atomic_t refcount; + struct mutex mutex; /* protect flow rule updates */ + struct list_head dest_list; +}; + +struct mlx5_vport { + struct mlx5_core_dev *dev; + int vport; + struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct work_struct vport_change_handler; + + /* This spinlock protects access to vport data, between + * "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event" + * once vport marked as disabled new interrupts are discarded. + */ + spinlock_t lock; /* vport events sync */ + bool enabled; + u16 enabled_events; +}; + +struct mlx5_l2_table { + struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE]; + u32 size; + unsigned long *bitmap; +}; + +struct mlx5_eswitch_fdb { + void *fdb; +}; + +struct mlx5_eswitch { + struct mlx5_core_dev *dev; + struct mlx5_l2_table l2_table; + struct mlx5_eswitch_fdb fdb_table; + struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; + struct workqueue_struct *work_queue; + struct mlx5_vport *vports; + int total_vports; + int enabled_vports; +}; + +/* E-Switch API */ +int mlx5_eswitch_init(struct mlx5_core_dev *dev); +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]); +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state); +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos); +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct ifla_vf_info *ivi); +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + int vport, + struct ifla_vf_stats *vf_stats); + +#endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 9335e5ae18cc..1c9f9a54a873 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -160,6 +160,30 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) if (err) return err; } + + if (MLX5_CAP_GEN(dev, vport_group_manager) && + MLX5_CAP_GEN(dev, eswitch_flow_table)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, vport_group_manager)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4ac8d4cc4973..c6de3240f76f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -49,6 +49,9 @@ #include <linux/delay.h> #include <linux/mlx5/mlx5_ifc.h> #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); @@ -454,6 +457,9 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) struct mlx5_reg_host_endianess he_out; int err; + if (!mlx5_core_is_pf(dev)) + return 0; + memset(&he_in, 0, sizeof(he_in)); he_in.he = MLX5_SET_HOST_ENDIANNESS; err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), @@ -462,42 +468,39 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) return err; } -static int mlx5_core_enable_hca(struct mlx5_core_dev *dev) +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) { + u32 out[MLX5_ST_SZ_DW(enable_hca_out)]; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)]; int err; - struct mlx5_enable_hca_mbox_in in; - struct mlx5_enable_hca_mbox_out out; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ENABLE_HCA); + memset(in, 0, sizeof(in)); + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, func_id); + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return 0; + return mlx5_cmd_status_to_err_v2(out); } -static int mlx5_core_disable_hca(struct mlx5_core_dev *dev) +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) { + u32 out[MLX5_ST_SZ_DW(disable_hca_out)]; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)]; int err; - struct mlx5_disable_hca_mbox_in in; - struct mlx5_disable_hca_mbox_out out; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DISABLE_HCA); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + memset(in, 0, sizeof(in)); + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, func_id); + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return 0; + return mlx5_cmd_status_to_err_v2(out); } static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) @@ -942,7 +945,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_pagealloc_init(dev); - err = mlx5_core_enable_hca(dev); + err = mlx5_core_enable_hca(dev, 0); if (err) { dev_err(&pdev->dev, "enable hca failed\n"); goto err_pagealloc_cleanup; @@ -1052,6 +1055,20 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_srq_table(dev); mlx5_init_mr_table(dev); +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_init(dev); + if (err) { + dev_err(&pdev->dev, "eswitch init failed %d\n", err); + goto err_reg_dev; + } +#endif + + err = mlx5_sriov_init(dev); + if (err) { + dev_err(&pdev->dev, "sriov init failed %d\n", err); + goto err_sriov; + } + err = mlx5_register_device(dev); if (err) { dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); @@ -1068,6 +1085,13 @@ out: return 0; +err_sriov: + if (mlx5_sriov_cleanup(dev)) + dev_err(&dev->pdev->dev, "sriov cleanup failed\n"); + +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif err_reg_dev: mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); @@ -1106,7 +1130,7 @@ reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); err_disable_hca: - mlx5_core_disable_hca(dev); + mlx5_core_disable_hca(dev, 0); err_pagealloc_cleanup: mlx5_pagealloc_cleanup(dev); @@ -1123,6 +1147,13 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { int err = 0; + err = mlx5_sriov_cleanup(dev); + if (err) { + dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n", + __func__); + return err; + } + mutex_lock(&dev->intf_state_mutex); if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", @@ -1130,6 +1161,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } mlx5_unregister_device(dev); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif + mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); @@ -1149,7 +1184,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) } mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); - mlx5_core_disable_hca(dev); + mlx5_core_disable_hca(dev, 0); mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); @@ -1195,6 +1230,7 @@ static int init_one(struct pci_dev *pdev, return -ENOMEM; } priv = &dev->priv; + priv->pci_dev_data = id->driver_data; pci_set_drvdata(pdev, dev); @@ -1365,12 +1401,12 @@ static const struct pci_error_handlers mlx5_err_handler = { }; static const struct pci_device_id mlx5_core_pci_table[] = { - { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ - { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */ - { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ - { PCI_VDEVICE(MELLANOX, 0x1014) }, /* ConnectX-4 VF */ - { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ - { PCI_VDEVICE(MELLANOX, 0x1016) }, /* ConnectX-4LX VF */ + { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ + { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ + { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ + { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ + { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ + { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ { 0, } }; @@ -1381,7 +1417,8 @@ static struct pci_driver mlx5_core_driver = { .id_table = mlx5_core_pci_table, .probe = init_one, .remove = remove_one, - .err_handler = &mlx5_err_handler + .err_handler = &mlx5_err_handler, + .sriov_configure = mlx5_core_sriov_configure, }; static int __init init(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index cee5b7a839bc..bee7da822dfe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -36,6 +36,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/if_link.h> #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "3.0-1" @@ -90,6 +91,10 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); void mlx5e_init(void); void mlx5e_cleanup(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 4d3377b12657..9eeee0545f1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -33,6 +33,7 @@ #include <linux/highmem.h> #include <linux/kernel.h> #include <linux/module.h> +#include <linux/delay.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/cmd.h> #include "mlx5_core.h" @@ -95,6 +96,7 @@ struct mlx5_manage_pages_outbox { enum { MAX_RECLAIM_TIME_MSECS = 5000, + MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60, }; enum { @@ -352,6 +354,10 @@ retry: goto out_4k; } + dev->priv.fw_pages += npages; + if (func_id) + dev->priv.vfs_pages += npages; + mlx5_core_dbg(dev, "err %d\n", err); kvfree(in); @@ -405,6 +411,12 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, } num_claimed = be32_to_cpu(out->num_entries); + if (num_claimed > npages) { + mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n", + num_claimed, npages); + err = -EINVAL; + goto out_free; + } if (nclaimed) *nclaimed = num_claimed; @@ -412,6 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, addr = be64_to_cpu(out->pas[i]); free_4k(dev, addr); } + dev->priv.fw_pages -= num_claimed; + if (func_id) + dev->priv.vfs_pages -= num_claimed; out_free: kvfree(out); @@ -548,3 +563,26 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) { destroy_workqueue(dev->priv.pg_wq); } + +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +{ + unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + int prev_vfs_pages = dev->priv.vfs_pages; + + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + dev->priv.name); + while (dev->priv.vfs_pages) { + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + return -ETIMEDOUT; + } + if (dev->priv.vfs_pages < prev_vfs_pages) { + end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + prev_vfs_pages = dev->priv.vfs_pages; + } + msleep(50); + } + + mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c new file mode 100644 index 000000000000..7b24386794f9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/pci.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif + +static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + int vf; + + for (vf = 1; vf <= num_vfs; vf++) { + err = mlx5_core_enable_hca(dev, vf); + if (err) { + mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1); + } else { + sriov->vfs_ctx[vf - 1].enabled = 1; + mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1); + } + } +} + +static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int vf; + + for (vf = 1; vf <= num_vfs; vf++) { + if (sriov->vfs_ctx[vf - 1].enabled) { + if (mlx5_core_disable_hca(dev, vf)) + mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1); + else + sriov->vfs_ctx[vf - 1].enabled = 0; + } + } +} + +static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + if (pci_num_vf(pdev)) + pci_disable_sriov(pdev); + + enable_vfs(dev, num_vfs); + + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "enable sriov failed %d\n", err); + goto ex; + } + + return 0; + +ex: + disable_vfs(dev, num_vfs); + return err; +} + +static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC); + if (!sriov->vfs_ctx) + return -ENOMEM; + + sriov->enabled_vfs = num_vfs; + err = mlx5_core_create_vfs(pdev, num_vfs); + if (err) { + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = NULL; + return err; + } + + return 0; +} + +static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + sriov->num_vfs = num_vfs; +} + +static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov; + + sriov = &dev->priv.sriov; + disable_vfs(dev, sriov->num_vfs); + + if (mlx5_wait_for_vf_pages(dev)) + mlx5_core_warn(dev, "timeout claiming VFs pages\n"); + + sriov->num_vfs = 0; +} + +int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + + mlx5_core_dbg(dev, "requsted num_vfs %d\n", num_vfs); + if (!mlx5_core_is_pf(dev)) + return -EPERM; + + mlx5_core_cleanup_vfs(dev); + + if (!num_vfs) { +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_disable_sriov(dev->priv.eswitch); +#endif + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = NULL; + if (!pci_vfs_assigned(pdev)) + pci_disable_sriov(pdev); + else + pr_info("unloading PF driver while leaving orphan VFs\n"); + return 0; + } + + err = mlx5_core_sriov_enable(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "mlx5_core_sriov_enable failed %d\n", err); + return err; + } + + mlx5_core_init_vfs(dev, num_vfs); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs); +#endif + + return num_vfs; +} + +static int sync_required(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int cur_vfs = pci_num_vf(pdev); + + if (cur_vfs != sriov->num_vfs) { + pr_info("current VFs %d, registered %d - sync needed\n", cur_vfs, sriov->num_vfs); + return 1; + } + + return 0; +} + +int mlx5_sriov_init(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct pci_dev *pdev = dev->pdev; + int cur_vfs; + + if (!mlx5_core_is_pf(dev)) + return 0; + + if (!sync_required(dev->pdev)) + return 0; + + cur_vfs = pci_num_vf(pdev); + sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + if (!sriov->vfs_ctx) + return -ENOMEM; + + sriov->enabled_vfs = cur_vfs; + + mlx5_core_init_vfs(dev, cur_vfs); +#ifdef CONFIG_MLX5_CORE_EN + if (cur_vfs) + mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs); +#endif + + enable_vfs(dev, cur_vfs); + + return 0; +} + +int mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + if (!mlx5_core_is_pf(dev)) + return 0; + + err = mlx5_core_sriov_configure(pdev, 0); + if (err) + return err; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b94177ebcf3a..076197efea9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -36,54 +36,399 @@ #include <linux/mlx5/vport.h> #include "mlx5_core.h" -u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) +static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; - u32 out[MLX5_ST_SZ_DW(query_vport_state_out)]; int err; + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; memset(in, 0, sizeof(in)); MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); MLX5_SET(query_vport_state_in, in, op_mod, opmod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_state_in, in, other_vport, 1); - err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, - sizeof(out)); + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); if (err) mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n"); + return err; +} + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + return MLX5_GET(query_vport_state_out, out, state); } -EXPORT_SYMBOL(mlx5_query_vport_state); +EXPORT_SYMBOL_GPL(mlx5_query_vport_state); + +u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + + return MLX5_GET(query_vport_state_out, out, admin_state); +} +EXPORT_SYMBOL(mlx5_query_vport_admin_state); -void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u8 state) +{ + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]; + u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_vport_state_in, in, opcode, + MLX5_CMD_OP_MODIFY_VPORT_STATE); + MLX5_SET(modify_vport_state_in, in, op_mod, opmod); + MLX5_SET(modify_vport_state_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_vport_state_in, in, other_vport, 1); + + MLX5_SET(modify_vport_state_in, in, admin_state, state); + + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, + sizeof(out)); + if (err) + mlx5_core_warn(mdev, "MLX5_CMD_OP_MODIFY_VPORT_STATE failed\n"); + + return err; +} +EXPORT_SYMBOL(mlx5_modify_vport_admin_state); + +static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); +} + +static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); +} + +int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) { - u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); u8 *out_addr; + int err; out = mlx5_vzalloc(outlen); if (!out) - return; + return -ENOMEM; out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, nic_vport_context.permanent_address); + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + ether_addr_copy(addr, &out_addr[2]); + +out: + kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address); + +int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + void *nic_vport_ctx; + u8 *perm_mac; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.permanent_address, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx, + permanent_address); + + ether_addr_copy(&perm_mac[2], addr); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address); + +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; MLX5_SET(query_nic_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); - memset(out, 0, outlen); - mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); - ether_addr_copy(addr, &out_addr[2]); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; - kvfree(out); + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(addr_list[i], mac_addr); + } +out: + kfree(out); + return err; } -EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address); +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list); + +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, list_type); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(curr_mac, addr_list[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); + +int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, + u32 vport, + u16 vlans[], + int *size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int req_list_size; + int max_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *size; + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, + MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *size = req_list_size; + for (i = 0; i < req_list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans); + +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, @@ -343,3 +688,65 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, return err; } EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); + +int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + int *promisc_uc, + int *promisc_mc, + int *promisc_all) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + *promisc_uc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_uc); + *promisc_mc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_mc); + *promisc_all = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_all); + +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc); + +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_err(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_uc, promisc_uc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_mc, promisc_mc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_all, promisc_all); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index e36e12219c9b..ec8caf8fedc6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -10,6 +10,14 @@ config MLXSW_CORE To compile this driver as a module, choose M here: the module will be called mlxsw_core. +config MLXSW_CORE_HWMON + bool "HWMON support for Mellanox Technologies Switch ASICs" + depends on MLXSW_CORE && HWMON + depends on !(MLXSW_CORE=y && HWMON=m) + default y + ---help--- + Say Y here if you want to expose HWMON interface on mlxsw devices. + config MLXSW_PCI tristate "PCI bus implementation for Mellanox Technologies Switch ASICs" depends on PCI && HAS_DMA && HAS_IOMEM && MLXSW_CORE diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index af015818fd19..584cac444852 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_MLXSW_CORE) += mlxsw_core.o mlxsw_core-objs := core.o +mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o obj-$(CONFIG_MLXSW_PCI) += mlxsw_pci.o mlxsw_pci-objs := pci.o obj-$(CONFIG_MLXSW_SWITCHX2) += mlxsw_switchx2.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 97f0d93caf99..af8a48b3b3ad 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -105,6 +105,10 @@ struct mlxsw_core { struct debugfs_blob_wrapper vsd_blob; struct debugfs_blob_wrapper psid_blob; } dbg; + struct { + u8 *mapping; /* lag_id+port_index to local_port mapping */ + } lag; + struct mlxsw_hwmon *hwmon; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ }; @@ -814,6 +818,17 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_alloc_stats; } + if (mlxsw_driver->profile->used_max_lag && + mlxsw_driver->profile->used_max_port_per_lag) { + alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag * + mlxsw_driver->profile->max_port_per_lag; + mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL); + if (!mlxsw_core->lag.mapping) { + err = -ENOMEM; + goto err_alloc_lag_mapping; + } + } + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile); if (err) goto err_bus_init; @@ -822,6 +837,10 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_emad_init; + err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon); + if (err) + goto err_hwmon_init; + err = mlxsw_driver->init(mlxsw_core->driver_priv, mlxsw_core, mlxsw_bus_info); if (err) @@ -836,10 +855,14 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, err_debugfs_init: mlxsw_core->driver->fini(mlxsw_core->driver_priv); err_driver_init: + mlxsw_hwmon_fini(mlxsw_core->hwmon); +err_hwmon_init: mlxsw_emad_fini(mlxsw_core); err_emad_init: mlxsw_bus->fini(bus_priv); err_bus_init: + kfree(mlxsw_core->lag.mapping); +err_alloc_lag_mapping: free_percpu(mlxsw_core->pcpu_stats); err_alloc_stats: kfree(mlxsw_core); @@ -855,8 +878,10 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) mlxsw_core_debugfs_fini(mlxsw_core); mlxsw_core->driver->fini(mlxsw_core->driver_priv); + mlxsw_hwmon_fini(mlxsw_core->hwmon); mlxsw_emad_fini(mlxsw_core); mlxsw_core->bus->fini(mlxsw_core->bus_priv); + kfree(mlxsw_core->lag.mapping); free_percpu(mlxsw_core->pcpu_stats); kfree(mlxsw_core); mlxsw_core_driver_put(device_kind); @@ -1188,11 +1213,25 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, struct mlxsw_rx_listener_item *rxl_item; const struct mlxsw_rx_listener *rxl; struct mlxsw_core_pcpu_stats *pcpu_stats; - u8 local_port = rx_info->sys_port; + u8 local_port; bool found = false; - dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: sys_port = %d, trap_id = 0x%x\n", - __func__, rx_info->sys_port, rx_info->trap_id); + if (rx_info->is_lag) { + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: lag_id = %d, lag_port_index = 0x%x\n", + __func__, rx_info->u.lag_id, + rx_info->trap_id); + /* Upper layer does not care if the skb came from LAG or not, + * so just get the local_port for the lag port and push it up. + */ + local_port = mlxsw_core_lag_mapping_get(mlxsw_core, + rx_info->u.lag_id, + rx_info->lag_port_index); + } else { + local_port = rx_info->u.sys_port; + } + + dev_dbg_ratelimited(mlxsw_core->bus_info->dev, "%s: local_port = %d, trap_id = 0x%x\n", + __func__, local_port, rx_info->trap_id); if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) || (local_port >= MLXSW_PORT_MAX_PORTS)) @@ -1236,6 +1275,48 @@ drop: } EXPORT_SYMBOL(mlxsw_core_skb_receive); +static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index) +{ + return mlxsw_core->driver->profile->max_port_per_lag * lag_id + + port_index; +} + +void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index, u8 local_port) +{ + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, port_index); + + mlxsw_core->lag.mapping[index] = local_port; +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_set); + +u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index) +{ + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, port_index); + + return mlxsw_core->lag.mapping[index]; +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_get); + +void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 local_port) +{ + int i; + + for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) { + int index = mlxsw_core_lag_mapping_index(mlxsw_core, + lag_id, i); + + if (mlxsw_core->lag.mapping[index] == local_port) + mlxsw_core->lag.mapping[index] = 0; + } +} +EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear); + int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, u32 in_mod, bool out_mbox_direct, char *in_mbox, size_t in_mbox_size, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 807827350a89..4833fb33ce07 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -112,13 +112,25 @@ int mlxsw_reg_write(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload); struct mlxsw_rx_info { - u16 sys_port; + bool is_lag; + union { + u16 sys_port; + u16 lag_id; + } u; + u8 lag_port_index; int trap_id; }; void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, struct mlxsw_rx_info *rx_info); +void mlxsw_core_lag_mapping_set(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index, u8 local_port); +u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 port_index); +void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, + u16 lag_id, u8 local_port); + #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 struct mlxsw_swid_config { @@ -209,4 +221,28 @@ struct mlxsw_bus_info { u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; }; +struct mlxsw_hwmon; + +#ifdef CONFIG_MLXSW_CORE_HWMON + +int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon); +void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon); + +#else + +static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon) +{ + return 0; +} + +static inline void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon) +{ +} + +#endif + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c new file mode 100644 index 000000000000..4dad146b41ae --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -0,0 +1,342 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c + * Copyright (c) 2015 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/device.h> +#include <linux/sysfs.h> +#include <linux/hwmon.h> +#include <linux/err.h> + +#include "core.h" + +#define MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT 127 +#define MLXSW_HWMON_ATTR_COUNT (MLXSW_HWMON_TEMP_SENSOR_MAX_COUNT * 4 + \ + MLXSW_MFCR_TACHOS_MAX + MLXSW_MFCR_PWMS_MAX) + +struct mlxsw_hwmon_attr { + struct device_attribute dev_attr; + struct mlxsw_hwmon *hwmon; + unsigned int type_index; + char name[16]; +}; + +struct mlxsw_hwmon { + struct mlxsw_core *core; + const struct mlxsw_bus_info *bus_info; + struct device *hwmon_dev; + struct attribute_group group; + const struct attribute_group *groups[2]; + struct attribute *attrs[MLXSW_HWMON_ATTR_COUNT + 1]; + struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; + unsigned int attrs_count; +}; + +static ssize_t mlxsw_hwmon_temp_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned int temp; + int err; + + mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, + false, false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + return sprintf(buf, "%u\n", temp); +} + +static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned int temp_max; + int err; + + mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, + false, false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); + return err; + } + mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL); + return sprintf(buf, "%u\n", temp_max); +} + +static ssize_t mlxsw_hwmon_fan_rpm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mfsm_pl[MLXSW_REG_MFSM_LEN]; + int err; + + mlxsw_reg_mfsm_pack(mfsm_pl, mlwsw_hwmon_attr->type_index); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsm), mfsm_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query fan\n"); + return err; + } + return sprintf(buf, "%u\n", mlxsw_reg_mfsm_rpm_get(mfsm_pl)); +} + +static ssize_t mlxsw_hwmon_pwm_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mfsc_pl[MLXSW_REG_MFSC_LEN]; + int err; + + mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, 0); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query PWM\n"); + return err; + } + return sprintf(buf, "%u\n", + mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl)); +} + +static ssize_t mlxsw_hwmon_pwm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + char mfsc_pl[MLXSW_REG_MFSC_LEN]; + unsigned long val; + int err; + + err = kstrtoul(buf, 10, &val); + if (err) + return err; + if (val > 255) + return -EINVAL; + + mlxsw_reg_mfsc_pack(mfsc_pl, mlwsw_hwmon_attr->type_index, val); + err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mfsc), mfsc_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to write PWM\n"); + return err; + } + return err ? err : len; +} + +enum mlxsw_hwmon_attr_type { + MLXSW_HWMON_ATTR_TYPE_TEMP, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, + MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + MLXSW_HWMON_ATTR_TYPE_PWM, +}; + +static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, + enum mlxsw_hwmon_attr_type attr_type, + unsigned int type_index, unsigned int num) { + struct mlxsw_hwmon_attr *mlxsw_hwmon_attr; + unsigned int attr_index; + + attr_index = mlxsw_hwmon->attrs_count; + mlxsw_hwmon_attr = &mlxsw_hwmon->hwmon_attrs[attr_index]; + + switch (attr_type) { + case MLXSW_HWMON_ATTR_TYPE_TEMP: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_MAX: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_temp_max_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_highest", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_FAN_RPM: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_fan_rpm_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "fan%u_input", num + 1); + break; + case MLXSW_HWMON_ATTR_TYPE_PWM: + mlxsw_hwmon_attr->dev_attr.show = mlxsw_hwmon_pwm_show; + mlxsw_hwmon_attr->dev_attr.store = mlxsw_hwmon_pwm_store; + mlxsw_hwmon_attr->dev_attr.attr.mode = S_IWUSR | S_IRUGO; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "pwm%u", num + 1); + break; + default: + WARN_ON(1); + } + + mlxsw_hwmon_attr->type_index = type_index; + mlxsw_hwmon_attr->hwmon = mlxsw_hwmon; + mlxsw_hwmon_attr->dev_attr.attr.name = mlxsw_hwmon_attr->name; + sysfs_attr_init(&mlxsw_hwmon_attr->dev_attr.attr); + + mlxsw_hwmon->attrs[attr_index] = &mlxsw_hwmon_attr->dev_attr.attr; + mlxsw_hwmon->attrs_count++; +} + +static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + char mtcap_pl[MLXSW_REG_MTCAP_LEN]; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + u8 sensor_count; + int i; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtcap), mtcap_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get number of temp sensors\n"); + return err; + } + sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl); + for (i = 0; i < sensor_count; i++) { + mlxsw_reg_mtmp_pack(mtmp_pl, 0, true, true); + err = mlxsw_reg_write(mlxsw_hwmon->core, + MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n", + i); + return err; + } + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP, i, i); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, i, i); + } + return 0; +} + +static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + char mfcr_pl[MLXSW_REG_MFCR_LEN]; + enum mlxsw_reg_mfcr_pwm_frequency freq; + unsigned int type_index; + unsigned int num; + u16 tacho_active; + u8 pwm_active; + int err; + + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mfcr), mfcr_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to get to probe PWMs and Tachometers\n"); + return err; + } + mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active); + num = 0; + for (type_index = 0; type_index < MLXSW_MFCR_TACHOS_MAX; type_index++) { + if (tacho_active & BIT(type_index)) + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_FAN_RPM, + type_index, num++); + } + num = 0; + for (type_index = 0; type_index < MLXSW_MFCR_PWMS_MAX; type_index++) { + if (pwm_active & BIT(type_index)) + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_PWM, + type_index, num++); + } + return 0; +} + +int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct mlxsw_hwmon **p_hwmon) +{ + struct mlxsw_hwmon *mlxsw_hwmon; + struct device *hwmon_dev; + int err; + + mlxsw_hwmon = kzalloc(sizeof(*mlxsw_hwmon), GFP_KERNEL); + if (!mlxsw_hwmon) + return -ENOMEM; + mlxsw_hwmon->core = mlxsw_core; + mlxsw_hwmon->bus_info = mlxsw_bus_info; + + err = mlxsw_hwmon_temp_init(mlxsw_hwmon); + if (err) + goto err_temp_init; + + err = mlxsw_hwmon_fans_init(mlxsw_hwmon); + if (err) + goto err_fans_init; + + mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group; + mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs; + + hwmon_dev = devm_hwmon_device_register_with_groups(mlxsw_bus_info->dev, + "mlxsw", + mlxsw_hwmon, + mlxsw_hwmon->groups); + if (IS_ERR(hwmon_dev)) { + err = PTR_ERR(hwmon_dev); + goto err_hwmon_register; + } + + mlxsw_hwmon->hwmon_dev = hwmon_dev; + *p_hwmon = mlxsw_hwmon; + return 0; + +err_hwmon_register: +err_fans_init: +err_temp_init: + kfree(mlxsw_hwmon); + return err; +} + +void mlxsw_hwmon_fini(struct mlxsw_hwmon *mlxsw_hwmon) +{ + kfree(mlxsw_hwmon); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index de69e719dc9d..d2102e572b1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -686,11 +686,15 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); - /* We do not support lag now */ - if (mlxsw_pci_cqe_lag_get(cqe)) - goto drop; + if (mlxsw_pci_cqe_lag_get(cqe)) { + rx_info.is_lag = true; + rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe); + rx_info.lag_port_index = mlxsw_pci_cqe_lag_port_index_get(cqe); + } else { + rx_info.is_lag = false; + rx_info.u.sys_port = mlxsw_pci_cqe_system_port_get(cqe); + } - rx_info.sys_port = mlxsw_pci_cqe_system_port_get(cqe); rx_info.trap_id = mlxsw_pci_cqe_trap_id_get(cqe); byte_count = mlxsw_pci_cqe_byte_count_get(cqe); @@ -699,7 +703,6 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, skb_put(skb, byte_count); mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); -put_new_skb: memset(wqe, 0, q->elem_size); err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); if (err) @@ -708,10 +711,6 @@ put_new_skb: q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); return; - -drop: - dev_kfree_skb_any(skb); - goto put_new_skb; } static char *mlxsw_pci_cq_sw_cqe_get(struct mlxsw_pci_queue *q) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h index 142f33d978c5..912106054ff2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h @@ -129,13 +129,15 @@ MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false); */ MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1); -/* pci_cqe_system_port +/* pci_cqe_system_port/lag_id * When lag=0: System port on which the packet was received * When lag=1: * bits [15:4] LAG ID on which the packet was received * bits [3:0] sub_port on which the packet was received */ MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16); +MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12); +MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4); /* pci_cqe_wqe_counter * WQE count of the WQEs completed on the associated dqn diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 236fb5d2ad69..4e4e4dcf054f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -286,6 +286,7 @@ MLXSW_ITEM32_INDEXED(reg, sfd, rec_swid, MLXSW_REG_SFD_BASE_LEN, 24, 8, enum mlxsw_reg_sfd_rec_type { MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0, + MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG = 0x1, }; /* reg_sfd_rec_type @@ -376,24 +377,34 @@ MLXSW_ITEM32_INDEXED(reg, sfd, uc_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_ITEM32_INDEXED(reg, sfd, uc_system_port, MLXSW_REG_SFD_BASE_LEN, 0, 16, MLXSW_REG_SFD_REC_LEN, 0x0C, false); -static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, - enum mlxsw_reg_sfd_rec_policy policy, - const char *mac, u16 vid, - enum mlxsw_reg_sfd_rec_action action, - u8 local_port) +static inline void mlxsw_reg_sfd_rec_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_type rec_type, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, + enum mlxsw_reg_sfd_rec_action action) { u8 num_rec = mlxsw_reg_sfd_num_rec_get(payload); if (rec_index >= num_rec) mlxsw_reg_sfd_num_rec_set(payload, rec_index + 1); mlxsw_reg_sfd_rec_swid_set(payload, rec_index, 0); - mlxsw_reg_sfd_rec_type_set(payload, rec_index, - MLXSW_REG_SFD_REC_TYPE_UNICAST); + mlxsw_reg_sfd_rec_type_set(payload, rec_index, rec_type); mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy); mlxsw_reg_sfd_rec_mac_memcpy_to(payload, rec_index, mac); + mlxsw_reg_sfd_rec_action_set(payload, rec_index, action); +} + +static inline void mlxsw_reg_sfd_uc_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 vid, + enum mlxsw_reg_sfd_rec_action action, + u8 local_port) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST, + policy, mac, action); mlxsw_reg_sfd_uc_sub_port_set(payload, rec_index, 0); mlxsw_reg_sfd_uc_fid_vid_set(payload, rec_index, vid); - mlxsw_reg_sfd_rec_action_set(payload, rec_index, action); mlxsw_reg_sfd_uc_system_port_set(payload, rec_index, local_port); } @@ -406,6 +417,58 @@ static inline void mlxsw_reg_sfd_uc_unpack(char *payload, int rec_index, *p_local_port = mlxsw_reg_sfd_uc_system_port_get(payload, rec_index); } +/* reg_sfd_uc_lag_sub_port + * LAG sub port. + * Must be 0 if multichannel VEPA is not enabled. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_sub_port, MLXSW_REG_SFD_BASE_LEN, 16, 8, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_fid_vid + * Filtering ID or VLAN ID + * For SwitchX and SwitchX-2: + * - Dynamic entries (policy 2,3) use FID + * - Static entries (policy 0) use VID + * - When independent learning is configured, VID=FID + * For Spectrum: use FID for both Dynamic and Static entries. + * VID should not be used. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_fid_vid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_lag_lag_id + * LAG Identifier - pointer into the LAG descriptor table. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_lag_lag_id, MLXSW_REG_SFD_BASE_LEN, 0, 10, + MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +static inline void +mlxsw_reg_sfd_uc_lag_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 vid, + enum mlxsw_reg_sfd_rec_action action, + u16 lag_id) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG, + policy, mac, action); + mlxsw_reg_sfd_uc_lag_sub_port_set(payload, rec_index, 0); + mlxsw_reg_sfd_uc_lag_fid_vid_set(payload, rec_index, vid); + mlxsw_reg_sfd_uc_lag_lag_id_set(payload, rec_index, lag_id); +} + +static inline void mlxsw_reg_sfd_uc_lag_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u16 *p_lag_id) +{ + mlxsw_reg_sfd_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfd_uc_lag_fid_vid_get(payload, rec_index); + *p_lag_id = mlxsw_reg_sfd_uc_lag_lag_id_get(payload, rec_index); +} + /* SFN - Switch FDB Notification Register * ------------------------------------------- * The switch provides notifications on newly learned FDB entries and @@ -456,8 +519,12 @@ MLXSW_ITEM32_INDEXED(reg, sfn, rec_swid, MLXSW_REG_SFN_BASE_LEN, 24, 8, enum mlxsw_reg_sfn_rec_type { /* MAC addresses learned on a regular port. */ MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC = 0x5, - /* Aged-out MAC address on a regular port */ + /* MAC addresses learned on a LAG port. */ + MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG = 0x6, + /* Aged-out MAC address on a regular port. */ MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC = 0x7, + /* Aged-out MAC address on a LAG port. */ + MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG = 0x8, }; /* reg_sfn_rec_type @@ -505,6 +572,22 @@ static inline void mlxsw_reg_sfn_mac_unpack(char *payload, int rec_index, *p_local_port = mlxsw_reg_sfn_mac_system_port_get(payload, rec_index); } +/* reg_sfn_mac_lag_lag_id + * LAG ID (pointer into the LAG descriptor table). + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sfn, mac_lag_lag_id, MLXSW_REG_SFN_BASE_LEN, 0, 10, + MLXSW_REG_SFN_REC_LEN, 0x0C, false); + +static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index, + char *mac, u16 *p_vid, + u16 *p_lag_id) +{ + mlxsw_reg_sfn_rec_mac_memcpy_from(payload, rec_index, mac); + *p_vid = mlxsw_reg_sfn_mac_fid_get(payload, rec_index); + *p_lag_id = mlxsw_reg_sfn_mac_lag_lag_id_get(payload, rec_index); +} + /* SPMS - Switch Port MSTP/RSTP State Register * ------------------------------------------- * Configures the spanning tree state of a physical port. @@ -865,6 +948,293 @@ static inline void mlxsw_reg_sftr_pack(char *payload, mlxsw_reg_sftr_port_mask_set(payload, port, 1); } +/* SLDR - Switch LAG Descriptor Register + * ----------------------------------------- + * The switch LAG descriptor register is populated by LAG descriptors. + * Each LAG descriptor is indexed by lag_id. The LAG ID runs from 0 to + * max_lag-1. + */ +#define MLXSW_REG_SLDR_ID 0x2014 +#define MLXSW_REG_SLDR_LEN 0x0C /* counting in only one port in list */ + +static const struct mlxsw_reg_info mlxsw_reg_sldr = { + .id = MLXSW_REG_SLDR_ID, + .len = MLXSW_REG_SLDR_LEN, +}; + +enum mlxsw_reg_sldr_op { + /* Indicates a creation of a new LAG-ID, lag_id must be valid */ + MLXSW_REG_SLDR_OP_LAG_CREATE, + MLXSW_REG_SLDR_OP_LAG_DESTROY, + /* Ports that appear in the list have the Distributor enabled */ + MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST, + /* Removes ports from the disributor list */ + MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST, +}; + +/* reg_sldr_op + * Operation. + * Access: RW + */ +MLXSW_ITEM32(reg, sldr, op, 0x00, 29, 3); + +/* reg_sldr_lag_id + * LAG identifier. The lag_id is the index into the LAG descriptor table. + * Access: Index + */ +MLXSW_ITEM32(reg, sldr, lag_id, 0x00, 0, 10); + +static inline void mlxsw_reg_sldr_lag_create_pack(char *payload, u8 lag_id) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_CREATE); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); +} + +static inline void mlxsw_reg_sldr_lag_destroy_pack(char *payload, u8 lag_id) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_DESTROY); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); +} + +/* reg_sldr_num_ports + * The number of member ports of the LAG. + * Reserved for Create / Destroy operations + * For Add / Remove operations - indicates the number of ports in the list. + * Access: RW + */ +MLXSW_ITEM32(reg, sldr, num_ports, 0x04, 24, 8); + +/* reg_sldr_system_port + * System port. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sldr, system_port, 0x08, 0, 16, 4, 0, false); + +static inline void mlxsw_reg_sldr_lag_add_port_pack(char *payload, u8 lag_id, + u8 local_port) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_ADD_PORT_LIST); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); + mlxsw_reg_sldr_num_ports_set(payload, 1); + mlxsw_reg_sldr_system_port_set(payload, 0, local_port); +} + +static inline void mlxsw_reg_sldr_lag_remove_port_pack(char *payload, u8 lag_id, + u8 local_port) +{ + MLXSW_REG_ZERO(sldr, payload); + mlxsw_reg_sldr_op_set(payload, MLXSW_REG_SLDR_OP_LAG_REMOVE_PORT_LIST); + mlxsw_reg_sldr_lag_id_set(payload, lag_id); + mlxsw_reg_sldr_num_ports_set(payload, 1); + mlxsw_reg_sldr_system_port_set(payload, 0, local_port); +} + +/* SLCR - Switch LAG Configuration 2 Register + * ------------------------------------------- + * The Switch LAG Configuration register is used for configuring the + * LAG properties of the switch. + */ +#define MLXSW_REG_SLCR_ID 0x2015 +#define MLXSW_REG_SLCR_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_slcr = { + .id = MLXSW_REG_SLCR_ID, + .len = MLXSW_REG_SLCR_LEN, +}; + +enum mlxsw_reg_slcr_pp { + /* Global Configuration (for all ports) */ + MLXSW_REG_SLCR_PP_GLOBAL, + /* Per port configuration, based on local_port field */ + MLXSW_REG_SLCR_PP_PER_PORT, +}; + +/* reg_slcr_pp + * Per Port Configuration + * Note: Reading at Global mode results in reading port 1 configuration. + * Access: Index + */ +MLXSW_ITEM32(reg, slcr, pp, 0x00, 24, 1); + +/* reg_slcr_local_port + * Local port number + * Supported from CPU port + * Not supported from router port + * Reserved when pp = Global Configuration + * Access: Index + */ +MLXSW_ITEM32(reg, slcr, local_port, 0x00, 16, 8); + +enum mlxsw_reg_slcr_type { + MLXSW_REG_SLCR_TYPE_CRC, /* default */ + MLXSW_REG_SLCR_TYPE_XOR, + MLXSW_REG_SLCR_TYPE_RANDOM, +}; + +/* reg_slcr_type + * Hash type + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, type, 0x00, 0, 4); + +/* Ingress port */ +#define MLXSW_REG_SLCR_LAG_HASH_IN_PORT BIT(0) +/* SMAC - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_SMAC_IP BIT(1) +/* SMAC - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP BIT(2) +#define MLXSW_REG_SLCR_LAG_HASH_SMAC \ + (MLXSW_REG_SLCR_LAG_HASH_SMAC_IP | \ + MLXSW_REG_SLCR_LAG_HASH_SMAC_NONIP) +/* DMAC - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_DMAC_IP BIT(3) +/* DMAC - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP BIT(4) +#define MLXSW_REG_SLCR_LAG_HASH_DMAC \ + (MLXSW_REG_SLCR_LAG_HASH_DMAC_IP | \ + MLXSW_REG_SLCR_LAG_HASH_DMAC_NONIP) +/* Ethertype - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP BIT(5) +/* Ethertype - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP BIT(6) +#define MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE \ + (MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_IP | \ + MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE_NONIP) +/* VLAN ID - for IPv4 and IPv6 packets */ +#define MLXSW_REG_SLCR_LAG_HASH_VLANID_IP BIT(7) +/* VLAN ID - for non-IP packets */ +#define MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP BIT(8) +#define MLXSW_REG_SLCR_LAG_HASH_VLANID \ + (MLXSW_REG_SLCR_LAG_HASH_VLANID_IP | \ + MLXSW_REG_SLCR_LAG_HASH_VLANID_NONIP) +/* Source IP address (can be IPv4 or IPv6) */ +#define MLXSW_REG_SLCR_LAG_HASH_SIP BIT(9) +/* Destination IP address (can be IPv4 or IPv6) */ +#define MLXSW_REG_SLCR_LAG_HASH_DIP BIT(10) +/* TCP/UDP source port */ +#define MLXSW_REG_SLCR_LAG_HASH_SPORT BIT(11) +/* TCP/UDP destination port*/ +#define MLXSW_REG_SLCR_LAG_HASH_DPORT BIT(12) +/* IPv4 Protocol/IPv6 Next Header */ +#define MLXSW_REG_SLCR_LAG_HASH_IPPROTO BIT(13) +/* IPv6 Flow label */ +#define MLXSW_REG_SLCR_LAG_HASH_FLOWLABEL BIT(14) +/* SID - FCoE source ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_SID BIT(15) +/* DID - FCoE destination ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_DID BIT(16) +/* OXID - FCoE originator exchange ID */ +#define MLXSW_REG_SLCR_LAG_HASH_FCOE_OXID BIT(17) +/* Destination QP number - for RoCE packets */ +#define MLXSW_REG_SLCR_LAG_HASH_ROCE_DQP BIT(19) + +/* reg_slcr_lag_hash + * LAG hashing configuration. This is a bitmask, in which each set + * bit includes the corresponding item in the LAG hash calculation. + * The default lag_hash contains SMAC, DMAC, VLANID and + * Ethertype (for all packet types). + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, lag_hash, 0x04, 0, 20); + +static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash) +{ + MLXSW_REG_ZERO(slcr, payload); + mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL); + mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_XOR); + mlxsw_reg_slcr_lag_hash_set(payload, lag_hash); +} + +/* SLCOR - Switch LAG Collector Register + * ------------------------------------- + * The Switch LAG Collector register controls the Local Port membership + * in a LAG and enablement of the collector. + */ +#define MLXSW_REG_SLCOR_ID 0x2016 +#define MLXSW_REG_SLCOR_LEN 0x10 + +static const struct mlxsw_reg_info mlxsw_reg_slcor = { + .id = MLXSW_REG_SLCOR_ID, + .len = MLXSW_REG_SLCOR_LEN, +}; + +enum mlxsw_reg_slcor_col { + /* Port is added with collector disabled */ + MLXSW_REG_SLCOR_COL_LAG_ADD_PORT, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_DISABLED, + MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT, +}; + +/* reg_slcor_col + * Collector configuration + * Access: RW + */ +MLXSW_ITEM32(reg, slcor, col, 0x00, 30, 2); + +/* reg_slcor_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32(reg, slcor, local_port, 0x00, 16, 8); + +/* reg_slcor_lag_id + * LAG Identifier. Index into the LAG descriptor table. + * Access: Index + */ +MLXSW_ITEM32(reg, slcor, lag_id, 0x00, 0, 10); + +/* reg_slcor_port_index + * Port index in the LAG list. Only valid on Add Port to LAG col. + * Valid range is from 0 to cap_max_lag_members-1 + * Access: RW + */ +MLXSW_ITEM32(reg, slcor, port_index, 0x04, 0, 10); + +static inline void mlxsw_reg_slcor_pack(char *payload, + u8 local_port, u16 lag_id, + enum mlxsw_reg_slcor_col col) +{ + MLXSW_REG_ZERO(slcor, payload); + mlxsw_reg_slcor_col_set(payload, col); + mlxsw_reg_slcor_local_port_set(payload, local_port); + mlxsw_reg_slcor_lag_id_set(payload, lag_id); +} + +static inline void mlxsw_reg_slcor_port_add_pack(char *payload, + u8 local_port, u16 lag_id, + u8 port_index) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_ADD_PORT); + mlxsw_reg_slcor_port_index_set(payload, port_index); +} + +static inline void mlxsw_reg_slcor_port_remove_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_REMOVE_PORT); +} + +static inline void mlxsw_reg_slcor_col_enable_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED); +} + +static inline void mlxsw_reg_slcor_col_disable_pack(char *payload, + u8 local_port, u16 lag_id) +{ + mlxsw_reg_slcor_pack(payload, local_port, lag_id, + MLXSW_REG_SLCOR_COL_LAG_COLLECTOR_ENABLED); +} + /* SPMLR - Switch Port MAC Learning Register * ----------------------------------------- * Controls the Switch MAC learning policy per port. @@ -2087,6 +2457,284 @@ static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id) mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT); } +/* MFCR - Management Fan Control Register + * -------------------------------------- + * This register controls the settings of the Fan Speed PWM mechanism. + */ +#define MLXSW_REG_MFCR_ID 0x9001 +#define MLXSW_REG_MFCR_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mfcr = { + .id = MLXSW_REG_MFCR_ID, + .len = MLXSW_REG_MFCR_LEN, +}; + +enum mlxsw_reg_mfcr_pwm_frequency { + MLXSW_REG_MFCR_PWM_FEQ_11HZ = 0x00, + MLXSW_REG_MFCR_PWM_FEQ_14_7HZ = 0x01, + MLXSW_REG_MFCR_PWM_FEQ_22_1HZ = 0x02, + MLXSW_REG_MFCR_PWM_FEQ_1_4KHZ = 0x40, + MLXSW_REG_MFCR_PWM_FEQ_5KHZ = 0x41, + MLXSW_REG_MFCR_PWM_FEQ_20KHZ = 0x42, + MLXSW_REG_MFCR_PWM_FEQ_22_5KHZ = 0x43, + MLXSW_REG_MFCR_PWM_FEQ_25KHZ = 0x44, +}; + +/* reg_mfcr_pwm_frequency + * Controls the frequency of the PWM signal. + * Access: RW + */ +MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 6); + +#define MLXSW_MFCR_TACHOS_MAX 10 + +/* reg_mfcr_tacho_active + * Indicates which of the tachometer is active (bit per tachometer). + * Access: RO + */ +MLXSW_ITEM32(reg, mfcr, tacho_active, 0x04, 16, MLXSW_MFCR_TACHOS_MAX); + +#define MLXSW_MFCR_PWMS_MAX 5 + +/* reg_mfcr_pwm_active + * Indicates which of the PWM control is active (bit per PWM). + * Access: RO + */ +MLXSW_ITEM32(reg, mfcr, pwm_active, 0x04, 0, MLXSW_MFCR_PWMS_MAX); + +static inline void +mlxsw_reg_mfcr_pack(char *payload, + enum mlxsw_reg_mfcr_pwm_frequency pwm_frequency) +{ + MLXSW_REG_ZERO(mfcr, payload); + mlxsw_reg_mfcr_pwm_frequency_set(payload, pwm_frequency); +} + +static inline void +mlxsw_reg_mfcr_unpack(char *payload, + enum mlxsw_reg_mfcr_pwm_frequency *p_pwm_frequency, + u16 *p_tacho_active, u8 *p_pwm_active) +{ + *p_pwm_frequency = mlxsw_reg_mfcr_pwm_frequency_get(payload); + *p_tacho_active = mlxsw_reg_mfcr_tacho_active_get(payload); + *p_pwm_active = mlxsw_reg_mfcr_pwm_active_get(payload); +} + +/* MFSC - Management Fan Speed Control Register + * -------------------------------------------- + * This register controls the settings of the Fan Speed PWM mechanism. + */ +#define MLXSW_REG_MFSC_ID 0x9002 +#define MLXSW_REG_MFSC_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mfsc = { + .id = MLXSW_REG_MFSC_ID, + .len = MLXSW_REG_MFSC_LEN, +}; + +/* reg_mfsc_pwm + * Fan pwm to control / monitor. + * Access: Index + */ +MLXSW_ITEM32(reg, mfsc, pwm, 0x00, 24, 3); + +/* reg_mfsc_pwm_duty_cycle + * Controls the duty cycle of the PWM. Value range from 0..255 to + * represent duty cycle of 0%...100%. + * Access: RW + */ +MLXSW_ITEM32(reg, mfsc, pwm_duty_cycle, 0x04, 0, 8); + +static inline void mlxsw_reg_mfsc_pack(char *payload, u8 pwm, + u8 pwm_duty_cycle) +{ + MLXSW_REG_ZERO(mfsc, payload); + mlxsw_reg_mfsc_pwm_set(payload, pwm); + mlxsw_reg_mfsc_pwm_duty_cycle_set(payload, pwm_duty_cycle); +} + +/* MFSM - Management Fan Speed Measurement + * --------------------------------------- + * This register controls the settings of the Tacho measurements and + * enables reading the Tachometer measurements. + */ +#define MLXSW_REG_MFSM_ID 0x9003 +#define MLXSW_REG_MFSM_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mfsm = { + .id = MLXSW_REG_MFSM_ID, + .len = MLXSW_REG_MFSM_LEN, +}; + +/* reg_mfsm_tacho + * Fan tachometer index. + * Access: Index + */ +MLXSW_ITEM32(reg, mfsm, tacho, 0x00, 24, 4); + +/* reg_mfsm_rpm + * Fan speed (round per minute). + * Access: RO + */ +MLXSW_ITEM32(reg, mfsm, rpm, 0x04, 0, 16); + +static inline void mlxsw_reg_mfsm_pack(char *payload, u8 tacho) +{ + MLXSW_REG_ZERO(mfsm, payload); + mlxsw_reg_mfsm_tacho_set(payload, tacho); +} + +/* MTCAP - Management Temperature Capabilities + * ------------------------------------------- + * This register exposes the capabilities of the device and + * system temperature sensing. + */ +#define MLXSW_REG_MTCAP_ID 0x9009 +#define MLXSW_REG_MTCAP_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_mtcap = { + .id = MLXSW_REG_MTCAP_ID, + .len = MLXSW_REG_MTCAP_LEN, +}; + +/* reg_mtcap_sensor_count + * Number of sensors supported by the device. + * This includes the QSFP module sensors (if exists in the QSFP module). + * Access: RO + */ +MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7); + +/* MTMP - Management Temperature + * ----------------------------- + * This register controls the settings of the temperature measurements + * and enables reading the temperature measurements. Note that temperature + * is in 0.125 degrees Celsius. + */ +#define MLXSW_REG_MTMP_ID 0x900A +#define MLXSW_REG_MTMP_LEN 0x20 + +static const struct mlxsw_reg_info mlxsw_reg_mtmp = { + .id = MLXSW_REG_MTMP_ID, + .len = MLXSW_REG_MTMP_LEN, +}; + +/* reg_mtmp_sensor_index + * Sensors index to access. + * 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially + * (module 0 is mapped to sensor_index 64). + * Access: Index + */ +MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7); + +/* Convert to milli degrees Celsius */ +#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125) + +/* reg_mtmp_temperature + * Temperature reading from the sensor. Reading is in 0.125 Celsius + * degrees units. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, temperature, 0x04, 0, 16); + +/* reg_mtmp_mte + * Max Temperature Enable - enables measuring the max temperature on a sensor. + * Access: RW + */ +MLXSW_ITEM32(reg, mtmp, mte, 0x08, 31, 1); + +/* reg_mtmp_mtr + * Max Temperature Reset - clears the value of the max temperature register. + * Access: WO + */ +MLXSW_ITEM32(reg, mtmp, mtr, 0x08, 30, 1); + +/* reg_mtmp_max_temperature + * The highest measured temperature from the sensor. + * When the bit mte is cleared, the field max_temperature is reserved. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, max_temperature, 0x08, 0, 16); + +#define MLXSW_REG_MTMP_SENSOR_NAME_SIZE 8 + +/* reg_mtmp_sensor_name + * Sensor Name + * Access: RO + */ +MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE); + +static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index, + bool max_temp_enable, + bool max_temp_reset) +{ + MLXSW_REG_ZERO(mtmp, payload); + mlxsw_reg_mtmp_sensor_index_set(payload, sensor_index); + mlxsw_reg_mtmp_mte_set(payload, max_temp_enable); + mlxsw_reg_mtmp_mtr_set(payload, max_temp_reset); +} + +static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, + unsigned int *p_max_temp, + char *sensor_name) +{ + u16 temp; + + if (p_temp) { + temp = mlxsw_reg_mtmp_temperature_get(payload); + *p_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (p_max_temp) { + temp = mlxsw_reg_mtmp_temperature_get(payload); + *p_max_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (sensor_name) + mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); +} + +/* MLCR - Management LED Control Register + * -------------------------------------- + * Controls the system LEDs. + */ +#define MLXSW_REG_MLCR_ID 0x902B +#define MLXSW_REG_MLCR_LEN 0x0C + +static const struct mlxsw_reg_info mlxsw_reg_mlcr = { + .id = MLXSW_REG_MLCR_ID, + .len = MLXSW_REG_MLCR_LEN, +}; + +/* reg_mlcr_local_port + * Local port number. + * Access: RW + */ +MLXSW_ITEM32(reg, mlcr, local_port, 0x00, 16, 8); + +#define MLXSW_REG_MLCR_DURATION_MAX 0xFFFF + +/* reg_mlcr_beacon_duration + * Duration of the beacon to be active, in seconds. + * 0x0 - Will turn off the beacon. + * 0xFFFF - Will turn on the beacon until explicitly turned off. + * Access: RW + */ +MLXSW_ITEM32(reg, mlcr, beacon_duration, 0x04, 0, 16); + +/* reg_mlcr_beacon_remain + * Remaining duration of the beacon, in seconds. + * 0xFFFF indicates an infinite amount of time. + * Access: RO + */ +MLXSW_ITEM32(reg, mlcr, beacon_remain, 0x08, 0, 16); + +static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port, + bool active) +{ + MLXSW_REG_ZERO(mlcr, payload); + mlxsw_reg_mlcr_local_port_set(payload, local_port); + mlxsw_reg_mlcr_beacon_duration_set(payload, active ? + MLXSW_REG_MLCR_DURATION_MAX : 0); +} + /* SBPR - Shared Buffer Pools Register * ----------------------------------- * The SBPR configures and retrieves the shared buffer pools and configuration. @@ -2375,6 +3023,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SFGC"; case MLXSW_REG_SFTR_ID: return "SFTR"; + case MLXSW_REG_SLDR_ID: + return "SLDR"; + case MLXSW_REG_SLCR_ID: + return "SLCR"; + case MLXSW_REG_SLCOR_ID: + return "SLCOR"; case MLXSW_REG_SPMLR_ID: return "SPMLR"; case MLXSW_REG_SVFA_ID: @@ -2405,6 +3059,18 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "HTGT"; case MLXSW_REG_HPKT_ID: return "HPKT"; + case MLXSW_REG_MFCR_ID: + return "MFCR"; + case MLXSW_REG_MFSC_ID: + return "MFSC"; + case MLXSW_REG_MFSM_ID: + return "MFSM"; + case MLXSW_REG_MTCAP_ID: + return "MTCAP"; + case MLXSW_REG_MTMP_ID: + return "MTMP"; + case MLXSW_REG_MLCR_ID: + return "MLCR"; case MLXSW_REG_SBPR_ID: return "SBPR"; case MLXSW_REG_SBCM_ID: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3be4a2355ead..3ec07b9a458d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -417,6 +417,10 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } +static void mlxsw_sp_set_rx_mode(struct net_device *dev) +{ +} + static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); @@ -725,6 +729,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, .ndo_start_xmit = mlxsw_sp_port_xmit, + .ndo_set_rx_mode = mlxsw_sp_set_rx_mode, .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, .ndo_change_mtu = mlxsw_sp_port_change_mtu, .ndo_get_stats64 = mlxsw_sp_port_get_stats64, @@ -859,6 +864,29 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev, } } +static int mlxsw_sp_port_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char mlcr_pl[MLXSW_REG_MLCR_LEN]; + bool active; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + active = true; + break; + case ETHTOOL_ID_INACTIVE: + active = false; + break; + default: + return -EOPNOTSUPP; + } + + mlxsw_reg_mlcr_pack(mlcr_pl, mlxsw_sp_port->local_port, active); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mlcr), mlcr_pl); +} + static void mlxsw_sp_port_get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { @@ -1205,6 +1233,7 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_drvinfo = mlxsw_sp_port_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlxsw_sp_port_get_strings, + .set_phys_id = mlxsw_sp_port_set_phys_id, .get_ethtool_stats = mlxsw_sp_port_get_stats, .get_sset_count = mlxsw_sp_port_get_sset_count, .get_settings = mlxsw_sp_port_get_settings, @@ -1683,6 +1712,22 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) +{ + char slcr_pl[MLXSW_REG_SLCR_LEN]; + + mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC | + MLXSW_REG_SLCR_LAG_HASH_DMAC | + MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE | + MLXSW_REG_SLCR_LAG_HASH_VLANID | + MLXSW_REG_SLCR_LAG_HASH_SIP | + MLXSW_REG_SLCR_LAG_HASH_DIP | + MLXSW_REG_SLCR_LAG_HASH_SPORT | + MLXSW_REG_SLCR_LAG_HASH_DPORT | + MLXSW_REG_SLCR_LAG_HASH_IPPROTO); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); +} + static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { @@ -1728,6 +1773,12 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, goto err_buffers_init; } + err = mlxsw_sp_lag_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize LAG\n"); + goto err_lag_init; + } + err = mlxsw_sp_switchdev_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize switchdev\n"); @@ -1737,6 +1788,7 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, return 0; err_switchdev_init: +err_lag_init: err_buffers_init: err_flood_init: mlxsw_sp_traps_fini(mlxsw_sp); @@ -1764,9 +1816,9 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, .used_max_lag = 1, - .max_lag = 64, + .max_lag = MLXSW_SP_LAG_MAX, .used_max_port_per_lag = 1, - .max_port_per_lag = 16, + .max_port_per_lag = MLXSW_SP_PORT_PER_LAG_MAX, .used_max_mid = 1, .max_mid = 7000, .used_max_pgt = 1, @@ -1865,19 +1917,245 @@ static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp, mlxsw_sp->master_bridge.dev = NULL; } -static int mlxsw_sp_netdevice_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_lag_create(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_create_pack(sldr_pl, lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_destroy(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_destroy_pack(sldr_pl, lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_col_port_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id, u8 port_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_port_add_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id, port_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_port_remove_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_enable(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_col_enable_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_col_port_disable(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char slcor_pl[MLXSW_REG_SLCOR_LEN]; + + mlxsw_reg_slcor_col_disable_pack(slcor_pl, mlxsw_sp_port->local_port, + lag_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcor), slcor_pl); +} + +static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *lag_dev, + u16 *p_lag_id) +{ + struct mlxsw_sp_upper *lag; + int free_lag_id = -1; + int i; + + for (i = 0; i < MLXSW_SP_LAG_MAX; i++) { + lag = mlxsw_sp_lag_get(mlxsw_sp, i); + if (lag->ref_count) { + if (lag->dev == lag_dev) { + *p_lag_id = i; + return 0; + } + } else if (free_lag_id < 0) { + free_lag_id = i; + } + } + if (free_lag_id < 0) + return -EBUSY; + *p_lag_id = free_lag_id; + return 0; +} + +static bool +mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, + struct net_device *lag_dev, + struct netdev_lag_upper_info *lag_upper_info) +{ + u16 lag_id; + + if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) + return false; + if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + return false; + return true; +} + +static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, + u16 lag_id, u8 *p_port_index) +{ + int i; + + for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) { + *p_port_index = i; + return 0; + } + } + return -EBUSY; +} + +static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_upper *lag; + u16 lag_id; + u8 port_index; + int err; + + err = mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id); + if (err) + return err; + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); + if (!lag->ref_count) { + err = mlxsw_sp_lag_create(mlxsw_sp, lag_id); + if (err) + return err; + lag->dev = lag_dev; + } + + err = mlxsw_sp_port_lag_index_get(mlxsw_sp, lag_id, &port_index); + if (err) + return err; + err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); + if (err) + goto err_col_port_add; + err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id); + if (err) + goto err_col_port_enable; + + mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index, + mlxsw_sp_port->local_port); + mlxsw_sp_port->lag_id = lag_id; + mlxsw_sp_port->lagged = 1; + lag->ref_count++; + return 0; + +err_col_port_add: + if (!lag->ref_count) + mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); +err_col_port_enable: + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); + return err; +} + +static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_upper *lag; + u16 lag_id = mlxsw_sp_port->lag_id; + int err; + + if (!mlxsw_sp_port->lagged) + return 0; + lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); + WARN_ON(lag->ref_count == 0); + + err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); + if (err) + return err; + mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); + if (err) + return err; + + if (lag->ref_count == 1) { + err = mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); + if (err) + return err; + } + + mlxsw_core_lag_mapping_clear(mlxsw_sp->core, lag_id, + mlxsw_sp_port->local_port); + mlxsw_sp_port->lagged = 0; + lag->ref_count--; + return 0; +} + +static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_add_port_pack(sldr_pl, lag_id, + mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char sldr_pl[MLXSW_REG_SLDR_LEN]; + + mlxsw_reg_sldr_lag_remove_port_pack(sldr_pl, lag_id, + mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); +} + +static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool lag_tx_enabled) +{ + if (lag_tx_enabled) + return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + else + return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, + mlxsw_sp_port->lag_id); +} + +static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, + struct netdev_lag_lower_state_info *info) +{ + return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); +} + +static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, + unsigned long event, void *ptr) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_changeupper_info *info; struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; int err; - if (!mlxsw_sp_port_dev_check(dev)) - return NOTIFY_DONE; - mlxsw_sp_port = netdev_priv(dev); mlxsw_sp = mlxsw_sp_port->mlxsw_sp; info = ptr; @@ -1885,16 +2163,22 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; + if (!info->master || !info->linking) + break; /* HW limitation forbids to put ports to multiple bridges. */ - if (info->master && info->linking && - netif_is_bridge_master(upper_dev) && + if (netif_is_bridge_master(upper_dev) && !mlxsw_sp_master_bridge_check(mlxsw_sp, upper_dev)) return NOTIFY_BAD; + if (netif_is_lag_master(upper_dev) && + !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, + info->upper_info)) + return NOTIFY_BAD; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; - if (info->master && - netif_is_bridge_master(upper_dev)) { + if (!info->master) + break; + if (netif_is_bridge_master(upper_dev)) { if (info->linking) { err = mlxsw_sp_port_bridge_join(mlxsw_sp_port); if (err) @@ -1908,6 +2192,46 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, mlxsw_sp_port->bridged = 0; mlxsw_sp_master_bridge_dec(mlxsw_sp, upper_dev); } + } else if (netif_is_lag_master(upper_dev)) { + if (info->linking) { + err = mlxsw_sp_port_lag_join(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to join link aggregation\n"); + return NOTIFY_BAD; + } + } else { + err = mlxsw_sp_port_lag_leave(mlxsw_sp_port, + upper_dev); + if (err) { + netdev_err(dev, "Failed to leave link aggregation\n"); + return NOTIFY_BAD; + } + } + } + break; + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_port_lower_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_changelowerstate_info *info; + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + mlxsw_sp_port = netdev_priv(dev); + info = ptr; + + switch (event) { + case NETDEV_CHANGELOWERSTATE: + if (netif_is_lag_port(dev) && mlxsw_sp_port->lagged) { + err = mlxsw_sp_port_lag_changed(mlxsw_sp_port, + info->lower_state_info); + if (err) + netdev_err(dev, "Failed to reflect link aggregation lower state change\n"); } break; } @@ -1915,6 +2239,52 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, return NOTIFY_DONE; } +static int mlxsw_sp_netdevice_port_event(struct net_device *dev, + unsigned long event, void *ptr) +{ + switch (event) { + case NETDEV_PRECHANGEUPPER: + case NETDEV_CHANGEUPPER: + return mlxsw_sp_netdevice_port_upper_event(dev, event, ptr); + case NETDEV_CHANGELOWERSTATE: + return mlxsw_sp_netdevice_port_lower_event(dev, event, ptr); + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, + unsigned long event, void *ptr) +{ + struct net_device *dev; + struct list_head *iter; + int ret; + + netdev_for_each_lower_dev(lag_dev, dev, iter) { + if (mlxsw_sp_port_dev_check(dev)) { + ret = mlxsw_sp_netdevice_port_event(dev, event, ptr); + if (ret == NOTIFY_BAD) + return ret; + } + } + + return NOTIFY_DONE; +} + +static int mlxsw_sp_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (mlxsw_sp_port_dev_check(dev)) + return mlxsw_sp_netdevice_port_event(dev, event, ptr); + + if (netif_is_lag_master(dev)) + return mlxsw_sp_netdevice_lag_event(dev, event, ptr); + + return NOTIFY_DONE; +} + static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { .notifier_call = mlxsw_sp_netdevice_event, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4365c8bccc6d..48be5a63b9b5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -46,9 +46,16 @@ #include "core.h" #define MLXSW_SP_VFID_BASE VLAN_N_VID +#define MLXSW_SP_LAG_MAX 64 +#define MLXSW_SP_PORT_PER_LAG_MAX 16 struct mlxsw_sp_port; +struct mlxsw_sp_upper { + struct net_device *dev; + unsigned int ref_count; +}; + struct mlxsw_sp { unsigned long active_vfids[BITS_TO_LONGS(VLAN_N_VID)]; unsigned long active_fids[BITS_TO_LONGS(VLAN_N_VID)]; @@ -63,12 +70,16 @@ struct mlxsw_sp { } fdb_notify; #define MLXSW_SP_DEFAULT_AGEING_TIME 300 u32 ageing_time; - struct { - struct net_device *dev; - unsigned int ref_count; - } master_bridge; + struct mlxsw_sp_upper master_bridge; + struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; }; +static inline struct mlxsw_sp_upper * +mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id) +{ + return &mlxsw_sp->lags[lag_id]; +} + struct mlxsw_sp_port_pcpu_stats { u64 rx_packets; u64 rx_bytes; @@ -87,8 +98,10 @@ struct mlxsw_sp_port { u8 learning:1, learning_sync:1, uc_flood:1, - bridged:1; + bridged:1, + lagged:1; u16 pvid; + u16 lag_id; /* 802.1Q bridge VLANs */ unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; /* VLAN interfaces */ @@ -96,6 +109,18 @@ struct mlxsw_sp_port { u16 nr_vfids; }; +static inline struct mlxsw_sp_port * +mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u8 local_port; + + local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core, + lag_id, port_index); + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + return mlxsw_sp_port && mlxsw_sp_port->lagged ? mlxsw_sp_port : NULL; +} + enum mlxsw_sp_flood_table { MLXSW_SP_FLOOD_TABLE_UC, MLXSW_SP_FLOOD_TABLE_BM, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 617fb22b5d81..406dab2f6b17 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -342,14 +342,35 @@ err_port_add_vid: return err; } +static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, bool is_member, + bool untagged) +{ + u16 vid, vid_e; + int err; + + for (vid = vid_begin; vid <= vid_end; + vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { + vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), + vid_end); + + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, + is_member, untagged); + if (err) + return err; + } + + return 0; +} + static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool flag_untagged, bool flag_pvid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct net_device *dev = mlxsw_sp_port->dev; + u16 vid, last_visited_vid, old_pvid; enum mlxsw_reg_svfa_mt mt; - u16 vid, vid_e; int err; /* In case this is invoked with BRIDGE_FLAGS_SELF and port is @@ -377,15 +398,18 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, if (err) { netdev_err(dev, "Failed to create FID=VID=%d mapping\n", vid); - return err; + goto err_port_vid_to_fid_set; } } + } - /* Set FID mapping according to port's mode */ + /* Set FID mapping according to port's mode */ + for (vid = vid_begin; vid <= vid_end; vid++) { err = mlxsw_sp_port_fid_map(mlxsw_sp_port, vid); if (err) { netdev_err(dev, "Failed to map FID=%d", vid); - return err; + last_visited_vid = --vid; + goto err_port_fid_map; } } @@ -393,40 +417,62 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, true, false); if (err) { netdev_err(dev, "Failed to configure flooding\n"); - return err; + goto err_port_flood_set; } - for (vid = vid_begin; vid <= vid_end; - vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { - vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), - vid_end); - - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, true, - flag_untagged); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Unable to add VIDs %d-%d\n", - vid, vid_e); - return err; - } + err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, + true, flag_untagged); + if (err) { + netdev_err(dev, "Unable to add VIDs %d-%d\n", vid_begin, + vid_end); + goto err_port_vlans_set; } - vid = vid_begin; - if (flag_pvid && mlxsw_sp_port->pvid != vid) { - err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid); + old_pvid = mlxsw_sp_port->pvid; + if (flag_pvid && old_pvid != vid_begin) { + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, vid_begin); if (err) { - netdev_err(mlxsw_sp_port->dev, "Unable to add PVID %d\n", - vid); - return err; + netdev_err(dev, "Unable to add PVID %d\n", vid_begin); + goto err_port_pvid_set; } - mlxsw_sp_port->pvid = vid; + mlxsw_sp_port->pvid = vid_begin; } /* Changing activity bits only if HW operation succeded */ for (vid = vid_begin; vid <= vid_end; vid++) set_bit(vid, mlxsw_sp_port->active_vlans); - return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, - mlxsw_sp_port->stp_state); + /* STP state change must be done after we set active VLANs */ + err = mlxsw_sp_port_stp_state_set(mlxsw_sp_port, + mlxsw_sp_port->stp_state); + if (err) { + netdev_err(dev, "Failed to set STP state\n"); + goto err_port_stp_state_set; + } + + return 0; + +err_port_vid_to_fid_set: + mlxsw_sp_fid_destroy(mlxsw_sp, vid); + return err; + +err_port_stp_state_set: + for (vid = vid_begin; vid <= vid_end; vid++) + clear_bit(vid, mlxsw_sp_port->active_vlans); + if (old_pvid != mlxsw_sp_port->pvid) + mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid); +err_port_pvid_set: + __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, + false); +err_port_vlans_set: + __mlxsw_sp_port_flood_set(mlxsw_sp_port, vid_begin, vid_end, false, + false); +err_port_flood_set: + last_visited_vid = vid_end; +err_port_fid_map: + for (vid = last_visited_vid; vid >= vid_begin; vid--) + mlxsw_sp_port_fid_unmap(mlxsw_sp_port, vid); + return err; } static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, @@ -444,32 +490,56 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, untagged_flag, pvid_flag); } -static int mlxsw_sp_port_fdb_op(struct mlxsw_sp_port *mlxsw_sp_port, - const char *mac, u16 vid, bool adding, - bool dynamic) +static enum mlxsw_reg_sfd_rec_policy mlxsw_sp_sfd_rec_policy(bool dynamic) +{ + return dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : + MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; +} + +static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) +{ + return adding ? MLXSW_REG_SFD_OP_WRITE_EDIT : + MLXSW_REG_SFD_OP_WRITE_REMOVE; +} + +static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp_port *mlxsw_sp_port, + const char *mac, u16 vid, bool adding, + bool dynamic) { - enum mlxsw_reg_sfd_rec_policy policy; - enum mlxsw_reg_sfd_op op; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *sfd_pl; int err; - if (!vid) - vid = mlxsw_sp_port->pvid; - sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); if (!sfd_pl) return -ENOMEM; - policy = dynamic ? MLXSW_REG_SFD_REC_POLICY_DYNAMIC_ENTRY_INGRESS : - MLXSW_REG_SFD_REC_POLICY_STATIC_ENTRY; - op = adding ? MLXSW_REG_SFD_OP_WRITE_EDIT : - MLXSW_REG_SFD_OP_WRITE_REMOVE; - mlxsw_reg_sfd_pack(sfd_pl, op, 0); - mlxsw_reg_sfd_uc_pack(sfd_pl, 0, policy, + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP, mlxsw_sp_port->local_port); - err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(sfd), - sfd_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + kfree(sfd_pl); + + return err; +} + +static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, + const char *mac, u16 vid, bool adding, + bool dynamic) +{ + char *sfd_pl; + int err; + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_lag_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), + mac, vid, MLXSW_REG_SFD_REC_ACTION_NOP, + lag_id); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); return err; @@ -480,11 +550,21 @@ mlxsw_sp_port_fdb_static_add(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { + u16 vid = fdb->vid; + if (switchdev_trans_ph_prepare(trans)) return 0; - return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid, - true, false); + if (!vid) + vid = mlxsw_sp_port->pvid; + + if (!mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, + fdb->addr, vid, true, false); + else + return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->lag_id, + fdb->addr, vid, true, false); } static int mlxsw_sp_port_obj_add(struct net_device *dev, @@ -532,7 +612,7 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool init) { struct net_device *dev = mlxsw_sp_port->dev; - u16 vid, vid_e; + u16 vid, pvid; int err; /* In case this is invoked with BRIDGE_FLAGS_SELF and port is @@ -542,30 +622,23 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, if (!init && !mlxsw_sp_port->bridged) return mlxsw_sp_port_kill_vids(dev, vid_begin, vid_end); - for (vid = vid_begin; vid <= vid_end; - vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { - vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), - vid_end); - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, false, - false); - if (err) { - netdev_err(mlxsw_sp_port->dev, "Unable to del VIDs %d-%d\n", - vid, vid_e); - return err; - } + err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, + false, false); + if (err) { + netdev_err(dev, "Unable to del VIDs %d-%d\n", vid_begin, + vid_end); + return err; } - if ((mlxsw_sp_port->pvid >= vid_begin) && - (mlxsw_sp_port->pvid <= vid_end)) { + pvid = mlxsw_sp_port->pvid; + if (pvid >= vid_begin && pvid <= vid_end && pvid != 1) { /* Default VLAN is always 1 */ - mlxsw_sp_port->pvid = 1; - err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, - mlxsw_sp_port->pvid); + err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); if (err) { - netdev_err(mlxsw_sp_port->dev, "Unable to del PVID %d\n", - vid); + netdev_err(dev, "Unable to del PVID %d\n", pvid); return err; } + mlxsw_sp_port->pvid = 1; } if (init) @@ -606,8 +679,15 @@ static int mlxsw_sp_port_fdb_static_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_fdb *fdb) { - return mlxsw_sp_port_fdb_op(mlxsw_sp_port, fdb->addr, fdb->vid, - false, false); + if (!mlxsw_sp_port->lagged) + return mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, + fdb->addr, fdb->vid, + false, false); + else + return mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->lag_id, + fdb->addr, fdb->vid, + false, false); } static int mlxsw_sp_port_obj_del(struct net_device *dev, @@ -633,14 +713,30 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, return err; } +static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, + u16 lag_id) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + int i; + + for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); + if (mlxsw_sp_port) + return mlxsw_sp_port; + } + return NULL; +} + static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_obj_port_fdb *fdb, switchdev_obj_dump_cb_t *cb) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *sfd_pl; char mac[ETH_ALEN]; u16 vid; u8 local_port; + u16 lag_id; u8 num_rec; int stored_err = 0; int i; @@ -653,8 +749,7 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_reg_sfd_pack(sfd_pl, MLXSW_REG_SFD_OP_QUERY_DUMP, 0); do { mlxsw_reg_sfd_num_rec_set(sfd_pl, MLXSW_REG_SFD_REC_MAX_COUNT); - err = mlxsw_reg_query(mlxsw_sp_port->mlxsw_sp->core, - MLXSW_REG(sfd), sfd_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); if (err) goto out; @@ -679,6 +774,20 @@ static int mlxsw_sp_port_fdb_dump(struct mlxsw_sp_port *mlxsw_sp_port, if (err) stored_err = err; } + break; + case MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG: + mlxsw_reg_sfd_uc_lag_unpack(sfd_pl, i, + mac, &vid, &lag_id); + if (mlxsw_sp_port == + mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id)) { + ether_addr_copy(fdb->addr, mac); + fdb->ndm_state = NUD_REACHABLE; + fdb->vid = vid; + err = cb(&fdb->obj); + if (err) + stored_err = err; + } + break; } } } while (num_rec == MLXSW_REG_SFD_REC_MAX_COUNT); @@ -740,6 +849,21 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { .switchdev_port_obj_dump = mlxsw_sp_port_obj_dump, }; +static void mlxsw_sp_fdb_call_notifiers(bool learning, bool learning_sync, + bool adding, char *mac, u16 vid, + struct net_device *dev) +{ + struct switchdev_notifier_fdb_info info; + unsigned long notifier_type; + + if (learning && learning_sync) { + info.addr = mac; + info.vid = vid; + notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; + call_switchdev_notifiers(notifier_type, dev, &info.info); + } +} + static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, char *sfn_pl, int rec_index, bool adding) @@ -757,24 +881,49 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, return; } - err = mlxsw_sp_port_fdb_op(mlxsw_sp_port, mac, vid, - adding && mlxsw_sp_port->learning, true); + err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp_port, mac, vid, + adding && mlxsw_sp_port->learning, true); if (err) { if (net_ratelimit()) netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); return; } - if (mlxsw_sp_port->learning && mlxsw_sp_port->learning_sync) { - struct switchdev_notifier_fdb_info info; - unsigned long notifier_type; + mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning, + mlxsw_sp_port->learning_sync, + adding, mac, vid, mlxsw_sp_port->dev); +} - info.addr = mac; - info.vid = vid; - notifier_type = adding ? SWITCHDEV_FDB_ADD : SWITCHDEV_FDB_DEL; - call_switchdev_notifiers(notifier_type, mlxsw_sp_port->dev, - &info.info); +static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, + char *sfn_pl, int rec_index, + bool adding) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + char mac[ETH_ALEN]; + u16 lag_id; + u16 vid; + int err; + + mlxsw_reg_sfn_mac_lag_unpack(sfn_pl, rec_index, mac, &vid, &lag_id); + mlxsw_sp_port = mlxsw_sp_lag_rep_port(mlxsw_sp, lag_id); + if (!mlxsw_sp_port) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Cannot find port representor for LAG\n"); + return; + } + + err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, vid, + adding && mlxsw_sp_port->learning, + true); + if (err) { + if (net_ratelimit()) + netdev_err(mlxsw_sp_port->dev, "Failed to set FDB entry\n"); + return; } + + mlxsw_sp_fdb_call_notifiers(mlxsw_sp_port->learning, + mlxsw_sp_port->learning_sync, + adding, mac, vid, + mlxsw_sp_lag_get(mlxsw_sp, lag_id)->dev); } static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, @@ -789,6 +938,14 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fdb_notify_mac_process(mlxsw_sp, sfn_pl, rec_index, false); break; + case MLXSW_REG_SFN_REC_TYPE_LEARNED_MAC_LAG: + mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, + rec_index, true); + break; + case MLXSW_REG_SFN_REC_TYPE_AGED_OUT_MAC_LAG: + mlxsw_sp_fdb_notify_mac_lag_process(mlxsw_sp, sfn_pl, + rec_index, false); + break; } } diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index 2056b719c262..7df318346b05 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -600,22 +600,11 @@ static void encx24j600_set_rxfilter_mode(struct encx24j600_priv *priv) static int encx24j600_hw_init(struct encx24j600_priv *priv) { - struct net_device *dev = priv->ndev; int ret = 0; - u16 eidled; u16 macon2; priv->hw_enabled = false; - eidled = encx24j600_read_reg(priv, EIDLED); - if (((eidled & DEVID_MASK) >> DEVID_SHIFT) != ENCX24J600_DEV_ID) { - ret = -EINVAL; - goto err_out; - } - - netif_info(priv, drv, dev, "Silicon rev ID: 0x%02x\n", - (eidled & REVID_MASK) >> REVID_SHIFT); - /* PHY Leds: link status, * LEDA: Link State + collision events * LEDB: Link State + transmit/receive events @@ -655,7 +644,6 @@ static int encx24j600_hw_init(struct encx24j600_priv *priv) if (netif_msg_hw(priv)) encx24j600_dump_config(priv, "Hw is initialized"); -err_out: return ret; } @@ -1004,6 +992,7 @@ static int encx24j600_spi_probe(struct spi_device *spi) struct net_device *ndev; struct encx24j600_priv *priv; + u16 eidled; ndev = alloc_etherdev(sizeof(struct encx24j600_priv)); @@ -1072,10 +1061,21 @@ static int encx24j600_spi_probe(struct spi_device *spi) goto out_free; } + eidled = encx24j600_read_reg(priv, EIDLED); + if (((eidled & DEVID_MASK) >> DEVID_SHIFT) != ENCX24J600_DEV_ID) { + ret = -EINVAL; + goto out_unregister; + } + + netif_info(priv, probe, ndev, "Silicon rev ID: 0x%02x\n", + (eidled & REVID_MASK) >> REVID_SHIFT); + netif_info(priv, drv, priv->ndev, "MAC address %pM\n", ndev->dev_addr); return ret; +out_unregister: + unregister_netdev(priv->ndev); out_free: free_netdev(ndev); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 83651ac8ddb9..270c9eeb7ab6 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -1488,7 +1488,6 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum) } myri10ge_vlan_rx(mgp->dev, va, skb); skb_record_rx_queue(skb, ss - &mgp->ss[0]); - skb_mark_napi_id(skb, &ss->napi); if (polling) { int hlen; @@ -1506,6 +1505,7 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum) skb->data_len -= hlen; skb->tail += hlen; skb->protocol = eth_type_trans(skb, dev); + skb_mark_napi_id(skb, &ss->napi); netif_receive_skb(skb); } else @@ -3814,7 +3814,6 @@ static int myri10ge_alloc_slices(struct myri10ge_priv *mgp) ss->dev = mgp->dev; netif_napi_add(ss->dev, &ss->napi, myri10ge_poll, myri10ge_napi_weight); - napi_hash_add(&ss->napi); } return 0; abort: diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig new file mode 100644 index 000000000000..9508ad782c30 --- /dev/null +++ b/drivers/net/ethernet/netronome/Kconfig @@ -0,0 +1,36 @@ +# +# Netronome device configuration +# + +config NET_VENDOR_NETRONOME + bool "Netronome(R) devices" + default y + ---help--- + If you have a Netronome(R) network (Ethernet) card or device, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Netronome(R) cards. If you say Y, you will be + asked for your specific card in the following questions. + +if NET_VENDOR_NETRONOME + +config NFP_NETVF + tristate "Netronome(R) NFP4000/NFP6000 VF NIC driver" + depends on PCI && PCI_MSI + depends on VXLAN || VXLAN=n + ---help--- + This driver supports SR-IOV virtual functions of + the Netronome(R) NFP4000/NFP6000 cards working as + a advanced Ethernet NIC. + +config NFP_NET_DEBUG + bool "Debug support for Netronome(R) NFP3200/NFP6000 NIC drivers" + depends on NFP_NET || NFP_NETVF + ---help--- + Enable extra sanity checks and debugfs support in + Netronome(R) NFP3200/NFP6000 NIC PF and VF drivers. + Note: selecting this option may adversely impact + performance. + +endif diff --git a/drivers/net/ethernet/netronome/Makefile b/drivers/net/ethernet/netronome/Makefile new file mode 100644 index 000000000000..dcb7b383f634 --- /dev/null +++ b/drivers/net/ethernet/netronome/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Netronome network device drivers +# + +obj-$(CONFIG_NFP_NETVF) += nfp/ diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile new file mode 100644 index 000000000000..68178819ff12 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -0,0 +1,8 @@ +obj-$(CONFIG_NFP_NETVF) += nfp_netvf.o + +nfp_netvf-objs := \ + nfp_net_common.o \ + nfp_net_ethtool.o \ + nfp_netvf_main.o + +nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h new file mode 100644 index 000000000000..ab264e1bccd0 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -0,0 +1,748 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net.h + * Declarations for Netronome network device driver. + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ + +#ifndef _NFP_NET_H_ +#define _NFP_NET_H_ + +#include <linux/interrupt.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <asm-generic/io-64-nonatomic-hi-lo.h> + +#include "nfp_net_ctrl.h" + +#define nn_err(nn, fmt, args...) netdev_err((nn)->netdev, fmt, ## args) +#define nn_warn(nn, fmt, args...) netdev_warn((nn)->netdev, fmt, ## args) +#define nn_info(nn, fmt, args...) netdev_info((nn)->netdev, fmt, ## args) +#define nn_dbg(nn, fmt, args...) netdev_dbg((nn)->netdev, fmt, ## args) +#define nn_warn_ratelimit(nn, fmt, args...) \ + do { \ + if (unlikely(net_ratelimit())) \ + netdev_warn((nn)->netdev, fmt, ## args); \ + } while (0) + +/* Max time to wait for NFP to respond on updates (in ms) */ +#define NFP_NET_POLL_TIMEOUT 5000 + +/* Bar allocation */ +#define NFP_NET_CRTL_BAR 0 +#define NFP_NET_Q0_BAR 2 +#define NFP_NET_Q1_BAR 4 /* OBSOLETE */ + +/* Max bits in DMA address */ +#define NFP_NET_MAX_DMA_BITS 40 + +/* Default size for MTU and freelist buffer sizes */ +#define NFP_NET_DEFAULT_MTU 1500 +#define NFP_NET_DEFAULT_RX_BUFSZ 2048 + +/* Maximum number of bytes prepended to a packet */ +#define NFP_NET_MAX_PREPEND 64 + +/* Interrupt definitions */ +#define NFP_NET_NON_Q_VECTORS 2 +#define NFP_NET_IRQ_LSC_IDX 0 +#define NFP_NET_IRQ_EXN_IDX 1 + +/* Queue/Ring definitions */ +#define NFP_NET_MAX_TX_RINGS 64 /* Max. # of Tx rings per device */ +#define NFP_NET_MAX_RX_RINGS 64 /* Max. # of Rx rings per device */ + +#define NFP_NET_MIN_TX_DESCS 256 /* Min. # of Tx descs per ring */ +#define NFP_NET_MIN_RX_DESCS 256 /* Min. # of Rx descs per ring */ +#define NFP_NET_MAX_TX_DESCS (256 * 1024) /* Max. # of Tx descs per ring */ +#define NFP_NET_MAX_RX_DESCS (256 * 1024) /* Max. # of Rx descs per ring */ + +#define NFP_NET_TX_DESCS_DEFAULT 4096 /* Default # of Tx descs per ring */ +#define NFP_NET_RX_DESCS_DEFAULT 4096 /* Default # of Rx descs per ring */ + +#define NFP_NET_FL_BATCH 16 /* Add freelist in this Batch size */ + +/* Offload definitions */ +#define NFP_NET_N_VXLAN_PORTS (NFP_NET_CFG_VXLAN_SZ / sizeof(__be16)) + +/* Forward declarations */ +struct nfp_net; +struct nfp_net_r_vector; + +/* Convenience macro for writing dma address into RX/TX descriptors */ +#define nfp_desc_set_dma_addr(desc, dma_addr) \ + do { \ + __typeof(desc) __d = (desc); \ + dma_addr_t __addr = (dma_addr); \ + \ + __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \ + __d->dma_addr_hi = upper_32_bits(__addr) & 0xff; \ + } while (0) + +/* TX descriptor format */ + +#define PCIE_DESC_TX_EOP BIT(7) +#define PCIE_DESC_TX_OFFSET_MASK GENMASK(6, 0) +#define PCIE_DESC_TX_MSS_MASK GENMASK(13, 0) + +/* Flags in the host TX descriptor */ +#define PCIE_DESC_TX_CSUM BIT(7) +#define PCIE_DESC_TX_IP4_CSUM BIT(6) +#define PCIE_DESC_TX_TCP_CSUM BIT(5) +#define PCIE_DESC_TX_UDP_CSUM BIT(4) +#define PCIE_DESC_TX_VLAN BIT(3) +#define PCIE_DESC_TX_LSO BIT(2) +#define PCIE_DESC_TX_ENCAP BIT(1) +#define PCIE_DESC_TX_O_IP4_CSUM BIT(0) + +struct nfp_net_tx_desc { + union { + struct { + u8 dma_addr_hi; /* High bits of host buf address */ + __le16 dma_len; /* Length to DMA for this desc */ + u8 offset_eop; /* Offset in buf where pkt starts + + * highest bit is eop flag. + */ + __le32 dma_addr_lo; /* Low 32bit of host buf addr */ + + __le16 mss; /* MSS to be used for LSO */ + u8 l4_offset; /* LSO, where the L4 data starts */ + u8 flags; /* TX Flags, see @PCIE_DESC_TX_* */ + + __le16 vlan; /* VLAN tag to add if indicated */ + __le16 data_len; /* Length of frame + meta data */ + } __packed; + __le32 vals[4]; + }; +}; + +/** + * struct nfp_net_tx_buf - software TX buffer descriptor + * @skb: sk_buff associated with this buffer + * @dma_addr: DMA mapping address of the buffer + * @fidx: Fragment index (-1 for the head and [0..nr_frags-1] for frags) + * @pkt_cnt: Number of packets to be produced out of the skb associated + * with this buffer (valid only on the head's buffer). + * Will be 1 for all non-TSO packets. + * @real_len: Number of bytes which to be produced out of the skb (valid only + * on the head's buffer). Equal to skb->len for non-TSO packets. + */ +struct nfp_net_tx_buf { + struct sk_buff *skb; + dma_addr_t dma_addr; + short int fidx; + u16 pkt_cnt; + u32 real_len; +}; + +/** + * struct nfp_net_tx_ring - TX ring structure + * @r_vec: Back pointer to ring vector structure + * @idx: Ring index from Linux's perspective + * @qcidx: Queue Controller Peripheral (QCP) queue index for the TX queue + * @qcp_q: Pointer to base of the QCP TX queue + * @cnt: Size of the queue in number of descriptors + * @wr_p: TX ring write pointer (free running) + * @rd_p: TX ring read pointer (free running) + * @qcp_rd_p: Local copy of QCP TX queue read pointer + * @wr_ptr_add: Accumulated number of buffers to add to QCP write pointer + * (used for .xmit_more delayed kick) + * @txbufs: Array of transmitted TX buffers, to free on transmit + * @txds: Virtual address of TX ring in host memory + * @dma: DMA address of the TX ring + * @size: Size, in bytes, of the TX ring (needed to free) + */ +struct nfp_net_tx_ring { + struct nfp_net_r_vector *r_vec; + + u32 idx; + int qcidx; + u8 __iomem *qcp_q; + + u32 cnt; + u32 wr_p; + u32 rd_p; + u32 qcp_rd_p; + + u32 wr_ptr_add; + + struct nfp_net_tx_buf *txbufs; + struct nfp_net_tx_desc *txds; + + dma_addr_t dma; + unsigned int size; +} ____cacheline_aligned; + +/* RX and freelist descriptor format */ + +#define PCIE_DESC_RX_DD BIT(7) +#define PCIE_DESC_RX_META_LEN_MASK GENMASK(6, 0) + +/* Flags in the RX descriptor */ +#define PCIE_DESC_RX_RSS cpu_to_le16(BIT(15)) +#define PCIE_DESC_RX_I_IP4_CSUM cpu_to_le16(BIT(14)) +#define PCIE_DESC_RX_I_IP4_CSUM_OK cpu_to_le16(BIT(13)) +#define PCIE_DESC_RX_I_TCP_CSUM cpu_to_le16(BIT(12)) +#define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11)) +#define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10)) +#define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9)) +#define PCIE_DESC_RX_SPARE cpu_to_le16(BIT(8)) +#define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7)) +#define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6)) +#define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5)) +#define PCIE_DESC_RX_TCP_CSUM cpu_to_le16(BIT(4)) +#define PCIE_DESC_RX_TCP_CSUM_OK cpu_to_le16(BIT(3)) +#define PCIE_DESC_RX_UDP_CSUM cpu_to_le16(BIT(2)) +#define PCIE_DESC_RX_UDP_CSUM_OK cpu_to_le16(BIT(1)) +#define PCIE_DESC_RX_VLAN cpu_to_le16(BIT(0)) + +#define PCIE_DESC_RX_CSUM_ALL (PCIE_DESC_RX_IP4_CSUM | \ + PCIE_DESC_RX_TCP_CSUM | \ + PCIE_DESC_RX_UDP_CSUM | \ + PCIE_DESC_RX_I_IP4_CSUM | \ + PCIE_DESC_RX_I_TCP_CSUM | \ + PCIE_DESC_RX_I_UDP_CSUM) +#define PCIE_DESC_RX_CSUM_OK_SHIFT 1 +#define __PCIE_DESC_RX_CSUM_ALL le16_to_cpu(PCIE_DESC_RX_CSUM_ALL) +#define __PCIE_DESC_RX_CSUM_ALL_OK (__PCIE_DESC_RX_CSUM_ALL >> \ + PCIE_DESC_RX_CSUM_OK_SHIFT) + +struct nfp_net_rx_desc { + union { + struct { + u8 dma_addr_hi; /* High bits of the buf address */ + __le16 reserved; /* Must be zero */ + u8 meta_len_dd; /* Must be zero */ + + __le32 dma_addr_lo; /* Low bits of the buffer address */ + } __packed fld; + + struct { + __le16 data_len; /* Length of the frame + meta data */ + u8 reserved; + u8 meta_len_dd; /* Length of meta data prepended + + * descriptor done flag. + */ + + __le16 flags; /* RX flags. See @PCIE_DESC_RX_* */ + __le16 vlan; /* VLAN if stripped */ + } __packed rxd; + + __le32 vals[2]; + }; +}; + +struct nfp_net_rx_hash { + __be32 hash_type; + __be32 hash; +}; + +/** + * struct nfp_net_rx_buf - software RX buffer descriptor + * @skb: sk_buff associated with this buffer + * @dma_addr: DMA mapping address of the buffer + */ +struct nfp_net_rx_buf { + struct sk_buff *skb; + dma_addr_t dma_addr; +}; + +/** + * struct nfp_net_rx_ring - RX ring structure + * @r_vec: Back pointer to ring vector structure + * @cnt: Size of the queue in number of descriptors + * @wr_p: FL/RX ring write pointer (free running) + * @rd_p: FL/RX ring read pointer (free running) + * @idx: Ring index from Linux's perspective + * @fl_qcidx: Queue Controller Peripheral (QCP) queue index for the freelist + * @rx_qcidx: Queue Controller Peripheral (QCP) queue index for the RX queue + * @qcp_fl: Pointer to base of the QCP freelist queue + * @qcp_rx: Pointer to base of the QCP RX queue + * @wr_ptr_add: Accumulated number of buffers to add to QCP write pointer + * (used for free list batching) + * @rxbufs: Array of transmitted FL/RX buffers + * @rxds: Virtual address of FL/RX ring in host memory + * @dma: DMA address of the FL/RX ring + * @size: Size, in bytes, of the FL/RX ring (needed to free) + */ +struct nfp_net_rx_ring { + struct nfp_net_r_vector *r_vec; + + u32 cnt; + u32 wr_p; + u32 rd_p; + + u16 idx; + u16 wr_ptr_add; + + int fl_qcidx; + int rx_qcidx; + u8 __iomem *qcp_fl; + u8 __iomem *qcp_rx; + + struct nfp_net_rx_buf *rxbufs; + struct nfp_net_rx_desc *rxds; + + dma_addr_t dma; + unsigned int size; +} ____cacheline_aligned; + +/** + * struct nfp_net_r_vector - Per ring interrupt vector configuration + * @nfp_net: Backpointer to nfp_net structure + * @napi: NAPI structure for this ring vec + * @tx_ring: Pointer to TX ring + * @rx_ring: Pointer to RX ring + * @irq_idx: Index into MSI-X table + * @rx_sync: Seqlock for atomic updates of RX stats + * @rx_pkts: Number of received packets + * @rx_bytes: Number of received bytes + * @rx_drops: Number of packets dropped on RX due to lack of resources + * @hw_csum_rx_ok: Counter of packets where the HW checksum was OK + * @hw_csum_rx_inner_ok: Counter of packets where the inner HW checksum was OK + * @hw_csum_rx_error: Counter of packets with bad checksums + * @tx_sync: Seqlock for atomic updates of TX stats + * @tx_pkts: Number of Transmitted packets + * @tx_bytes: Number of Transmitted bytes + * @hw_csum_tx: Counter of packets with TX checksum offload requested + * @hw_csum_tx_inner: Counter of inner TX checksum offload requests + * @tx_gather: Counter of packets with Gather DMA + * @tx_lso: Counter of LSO packets sent + * @tx_errors: How many TX errors were encountered + * @tx_busy: How often was TX busy (no space)? + * @handler: Interrupt handler for this ring vector + * @name: Name of the interrupt vector + * @affinity_mask: SMP affinity mask for this vector + * + * This structure ties RX and TX rings to interrupt vectors and a NAPI + * context. This currently only supports one RX and TX ring per + * interrupt vector but might be extended in the future to allow + * association of multiple rings per vector. + */ +struct nfp_net_r_vector { + struct nfp_net *nfp_net; + struct napi_struct napi; + + struct nfp_net_tx_ring *tx_ring; + struct nfp_net_rx_ring *rx_ring; + + int irq_idx; + + struct u64_stats_sync rx_sync; + u64 rx_pkts; + u64 rx_bytes; + u64 rx_drops; + u64 hw_csum_rx_ok; + u64 hw_csum_rx_inner_ok; + u64 hw_csum_rx_error; + + struct u64_stats_sync tx_sync; + u64 tx_pkts; + u64 tx_bytes; + u64 hw_csum_tx; + u64 hw_csum_tx_inner; + u64 tx_gather; + u64 tx_lso; + u64 tx_errors; + u64 tx_busy; + + irq_handler_t handler; + char name[IFNAMSIZ + 8]; + cpumask_t affinity_mask; +} ____cacheline_aligned; + +/* Firmware version as it is written in the 32bit value in the BAR */ +struct nfp_net_fw_version { + u8 minor; + u8 major; + u8 class; + u8 resv; +} __packed; + +static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, + u8 resv, u8 class, u8 major, u8 minor) +{ + return fw_ver->resv == resv && + fw_ver->class == class && + fw_ver->major == major && + fw_ver->minor == minor; +} + +/** + * struct nfp_net - NFP network device structure + * @pdev: Backpointer to PCI device + * @netdev: Backpointer to net_device structure + * @nfp_fallback: Is the driver used in fallback mode? + * @is_vf: Is the driver attached to a VF? + * @is_nfp3200: Is the driver for a NFP-3200 card? + * @fw_loaded: Is the firmware loaded? + * @ctrl: Local copy of the control register/word. + * @fl_bufsz: Currently configured size of the freelist buffers + * @rx_offset: Offset in the RX buffers where packet data starts + * @cpp: Pointer to the CPP handle + * @nfp_dev_cpp: Pointer to the NFP Device handle + * @ctrl_area: Pointer to the CPP area for the control BAR + * @tx_area: Pointer to the CPP area for the TX queues + * @rx_area: Pointer to the CPP area for the FL/RX queues + * @fw_ver: Firmware version + * @cap: Capabilities advertised by the Firmware + * @max_mtu: Maximum support MTU advertised by the Firmware + * @rss_cfg: RSS configuration + * @rss_key: RSS secret key + * @rss_itbl: RSS indirection table + * @max_tx_rings: Maximum number of TX rings supported by the Firmware + * @max_rx_rings: Maximum number of RX rings supported by the Firmware + * @num_tx_rings: Currently configured number of TX rings + * @num_rx_rings: Currently configured number of RX rings + * @txd_cnt: Size of the TX ring in number of descriptors + * @rxd_cnt: Size of the RX ring in number of descriptors + * @tx_rings: Array of pre-allocated TX ring structures + * @rx_rings: Array of pre-allocated RX ring structures + * @num_irqs: Number of allocated interrupt vectors + * @num_r_vecs: Number of used ring vectors + * @r_vecs: Pre-allocated array of ring vectors + * @irq_entries: Pre-allocated array of MSI-X entries + * @lsc_handler: Handler for Link State Change interrupt + * @lsc_name: Name for Link State Change interrupt + * @exn_handler: Handler for Exception interrupt + * @exn_name: Name for Exception interrupt + * @shared_handler: Handler for shared interrupts + * @shared_name: Name for shared interrupt + * @me_freq_mhz: ME clock_freq (MHz) + * @reconfig_lock: Protects HW reconfiguration request regs/machinery + * @link_up: Is the link up? + * @link_status_lock: Protects @link_up and ensures atomicity with BAR reading + * @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter + * @rx_coalesce_max_frames: RX interrupt moderation frame count parameter + * @tx_coalesce_usecs: TX interrupt moderation usecs delay parameter + * @tx_coalesce_max_frames: TX interrupt moderation frame count parameter + * @vxlan_ports: VXLAN ports for RX inner csum offload communicated to HW + * @vxlan_usecnt: IPv4/IPv6 VXLAN port use counts + * @qcp_cfg: Pointer to QCP queue used for configuration notification + * @ctrl_bar: Pointer to mapped control BAR + * @tx_bar: Pointer to mapped TX queues + * @rx_bar: Pointer to mapped FL/RX queues + * @debugfs_dir: Device directory in debugfs + */ +struct nfp_net { + struct pci_dev *pdev; + struct net_device *netdev; + + unsigned nfp_fallback:1; + unsigned is_vf:1; + unsigned is_nfp3200:1; + unsigned fw_loaded:1; + + u32 ctrl; + u32 fl_bufsz; + + u32 rx_offset; + +#ifdef CONFIG_PCI_IOV + unsigned int num_vfs; + struct vf_data_storage *vfinfo; + int vf_rate_link_speed; +#endif + + struct nfp_cpp *cpp; + struct platform_device *nfp_dev_cpp; + struct nfp_cpp_area *ctrl_area; + struct nfp_cpp_area *tx_area; + struct nfp_cpp_area *rx_area; + + struct nfp_net_fw_version fw_ver; + u32 cap; + u32 max_mtu; + + u32 rss_cfg; + u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ]; + u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ]; + + int max_tx_rings; + int max_rx_rings; + + int num_tx_rings; + int num_rx_rings; + + int stride_tx; + int stride_rx; + + int txd_cnt; + int rxd_cnt; + + struct nfp_net_tx_ring tx_rings[NFP_NET_MAX_TX_RINGS]; + struct nfp_net_rx_ring rx_rings[NFP_NET_MAX_RX_RINGS]; + + u8 num_irqs; + u8 num_r_vecs; + struct nfp_net_r_vector r_vecs[NFP_NET_MAX_TX_RINGS]; + struct msix_entry irq_entries[NFP_NET_NON_Q_VECTORS + + NFP_NET_MAX_TX_RINGS]; + + irq_handler_t lsc_handler; + char lsc_name[IFNAMSIZ + 8]; + + irq_handler_t exn_handler; + char exn_name[IFNAMSIZ + 8]; + + irq_handler_t shared_handler; + char shared_name[IFNAMSIZ + 8]; + + u32 me_freq_mhz; + + bool link_up; + spinlock_t link_status_lock; + + spinlock_t reconfig_lock; + + u32 rx_coalesce_usecs; + u32 rx_coalesce_max_frames; + u32 tx_coalesce_usecs; + u32 tx_coalesce_max_frames; + + __be16 vxlan_ports[NFP_NET_N_VXLAN_PORTS]; + u8 vxlan_usecnt[NFP_NET_N_VXLAN_PORTS]; + + u8 __iomem *qcp_cfg; + + u8 __iomem *ctrl_bar; + u8 __iomem *q_bar; + u8 __iomem *tx_bar; + u8 __iomem *rx_bar; + + struct dentry *debugfs_dir; +}; + +/* Functions to read/write from/to a BAR + * Performs any endian conversion necessary. + */ +static inline void nn_writeb(struct nfp_net *nn, int off, u8 val) +{ + writeb(val, nn->ctrl_bar + off); +} + +/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */ + +static inline u32 nn_readl(struct nfp_net *nn, int off) +{ + return readl(nn->ctrl_bar + off); +} + +static inline void nn_writel(struct nfp_net *nn, int off, u32 val) +{ + writel(val, nn->ctrl_bar + off); +} + +static inline u64 nn_readq(struct nfp_net *nn, int off) +{ + return readq(nn->ctrl_bar + off); +} + +static inline void nn_writeq(struct nfp_net *nn, int off, u64 val) +{ + writeq(val, nn->ctrl_bar + off); +} + +/* Flush posted PCI writes by reading something without side effects */ +static inline void nn_pci_flush(struct nfp_net *nn) +{ + nn_readl(nn, NFP_NET_CFG_VERSION); +} + +/* Queue Controller Peripheral access functions and definitions. + * + * Some of the BARs of the NFP are mapped to portions of the Queue + * Controller Peripheral (QCP) address space on the NFP. A QCP queue + * has a read and a write pointer (as well as a size and flags, + * indicating overflow etc). The QCP offers a number of different + * operation on queue pointers, but here we only offer function to + * either add to a pointer or to read the pointer value. + */ +#define NFP_QCP_QUEUE_ADDR_SZ 0x800 +#define NFP_QCP_QUEUE_OFF(_x) ((_x) * NFP_QCP_QUEUE_ADDR_SZ) +#define NFP_QCP_QUEUE_ADD_RPTR 0x0000 +#define NFP_QCP_QUEUE_ADD_WPTR 0x0004 +#define NFP_QCP_QUEUE_STS_LO 0x0008 +#define NFP_QCP_QUEUE_STS_LO_READPTR_mask 0x3ffff +#define NFP_QCP_QUEUE_STS_HI 0x000c +#define NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask 0x3ffff + +/* The offset of a QCP queues in the PCIe Target (same on NFP3200 and NFP6000 */ +#define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff))) + +/* nfp_qcp_ptr - Read or Write Pointer of a queue */ +enum nfp_qcp_ptr { + NFP_QCP_READ_PTR = 0, + NFP_QCP_WRITE_PTR +}; + +/* There appear to be an *undocumented* upper limit on the value which + * one can add to a queue and that value is either 0x3f or 0x7f. We + * go with 0x3f as a conservative measure. + */ +#define NFP_QCP_MAX_ADD 0x3f + +static inline void _nfp_qcp_ptr_add(u8 __iomem *q, + enum nfp_qcp_ptr ptr, u32 val) +{ + u32 off; + + if (ptr == NFP_QCP_READ_PTR) + off = NFP_QCP_QUEUE_ADD_RPTR; + else + off = NFP_QCP_QUEUE_ADD_WPTR; + + while (val > NFP_QCP_MAX_ADD) { + writel(NFP_QCP_MAX_ADD, q + off); + val -= NFP_QCP_MAX_ADD; + } + + writel(val, q + off); +} + +/** + * nfp_qcp_rd_ptr_add() - Add the value to the read pointer of a queue + * + * @q: Base address for queue structure + * @val: Value to add to the queue pointer + * + * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed. + */ +static inline void nfp_qcp_rd_ptr_add(u8 __iomem *q, u32 val) +{ + _nfp_qcp_ptr_add(q, NFP_QCP_READ_PTR, val); +} + +/** + * nfp_qcp_wr_ptr_add() - Add the value to the write pointer of a queue + * + * @q: Base address for queue structure + * @val: Value to add to the queue pointer + * + * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed. + */ +static inline void nfp_qcp_wr_ptr_add(u8 __iomem *q, u32 val) +{ + _nfp_qcp_ptr_add(q, NFP_QCP_WRITE_PTR, val); +} + +static inline u32 _nfp_qcp_read(u8 __iomem *q, enum nfp_qcp_ptr ptr) +{ + u32 off; + u32 val; + + if (ptr == NFP_QCP_READ_PTR) + off = NFP_QCP_QUEUE_STS_LO; + else + off = NFP_QCP_QUEUE_STS_HI; + + val = readl(q + off); + + if (ptr == NFP_QCP_READ_PTR) + return val & NFP_QCP_QUEUE_STS_LO_READPTR_mask; + else + return val & NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask; +} + +/** + * nfp_qcp_rd_ptr_read() - Read the current read pointer value for a queue + * @q: Base address for queue structure + * + * Return: Value read. + */ +static inline u32 nfp_qcp_rd_ptr_read(u8 __iomem *q) +{ + return _nfp_qcp_read(q, NFP_QCP_READ_PTR); +} + +/** + * nfp_qcp_wr_ptr_read() - Read the current write pointer value for a queue + * @q: Base address for queue structure + * + * Return: Value read. + */ +static inline u32 nfp_qcp_wr_ptr_read(u8 __iomem *q) +{ + return _nfp_qcp_read(q, NFP_QCP_WRITE_PTR); +} + +/* Globals */ +extern const char nfp_net_driver_name[]; +extern const char nfp_net_driver_version[]; + +/* Prototypes */ +void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, + void __iomem *ctrl_bar); + +struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, + int max_tx_rings, int max_rx_rings); +void nfp_net_netdev_free(struct nfp_net *nn); +int nfp_net_netdev_init(struct net_device *netdev); +void nfp_net_netdev_clean(struct net_device *netdev); +void nfp_net_set_ethtool_ops(struct net_device *netdev); +void nfp_net_info(struct nfp_net *nn); +int nfp_net_reconfig(struct nfp_net *nn, u32 update); +void nfp_net_rss_write_itbl(struct nfp_net *nn); +void nfp_net_rss_write_key(struct nfp_net *nn); +void nfp_net_coalesce_write_cfg(struct nfp_net *nn); +int nfp_net_irqs_alloc(struct nfp_net *nn); +void nfp_net_irqs_disable(struct nfp_net *nn); + +#ifdef CONFIG_NFP_NET_DEBUG +void nfp_net_debugfs_create(void); +void nfp_net_debugfs_destroy(void); +void nfp_net_debugfs_adapter_add(struct nfp_net *nn); +void nfp_net_debugfs_adapter_del(struct nfp_net *nn); +#else +static inline void nfp_net_debugfs_create(void) +{ +} + +static inline void nfp_net_debugfs_destroy(void) +{ +} + +static inline void nfp_net_debugfs_adapter_add(struct nfp_net *nn) +{ +} + +static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) +{ +} +#endif /* CONFIG_NFP_NET_DEBUG */ + +#endif /* _NFP_NET_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c new file mode 100644 index 000000000000..038ac6b14a60 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -0,0 +1,2432 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_common.c + * Netronome network device driver: Common functions between PF and VF + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * Brad Petrus <brad.petrus@netronome.com> + * Chris Telfer <chris.telfer@netronome.com> + */ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/interrupt.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/pci.h> +#include <linux/pci_regs.h> +#include <linux/msi.h> +#include <linux/ethtool.h> +#include <linux/log2.h> +#include <linux/if_vlan.h> +#include <linux/random.h> + +#include <linux/ktime.h> + +#include <net/vxlan.h> + +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +/** + * nfp_net_get_fw_version() - Read and parse the FW version + * @fw_ver: Output fw_version structure to read to + * @ctrl_bar: Mapped address of the control BAR + */ +void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, + void __iomem *ctrl_bar) +{ + u32 reg; + + reg = readl(ctrl_bar + NFP_NET_CFG_VERSION); + put_unaligned_le32(reg, fw_ver); +} + +/** + * nfp_net_reconfig() - Reconfigure the firmware + * @nn: NFP Net device to reconfigure + * @update: The value for the update field in the BAR config + * + * Write the update word to the BAR and ping the reconfig queue. The + * poll until the firmware has acknowledged the update by zeroing the + * update word. + * + * Return: Negative errno on error, 0 on success + */ +int nfp_net_reconfig(struct nfp_net *nn, u32 update) +{ + int cnt, ret = 0; + u32 new; + + spin_lock_bh(&nn->reconfig_lock); + + nn_writel(nn, NFP_NET_CFG_UPDATE, update); + /* ensure update is written before pinging HW */ + nn_pci_flush(nn); + nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1); + + /* Poll update field, waiting for NFP to ack the config */ + for (cnt = 0; ; cnt++) { + new = nn_readl(nn, NFP_NET_CFG_UPDATE); + if (new == 0) + break; + if (new & NFP_NET_CFG_UPDATE_ERR) { + nn_err(nn, "Reconfig error: 0x%08x\n", new); + ret = -EIO; + break; + } else if (cnt >= NFP_NET_POLL_TIMEOUT) { + nn_err(nn, "Reconfig timeout for 0x%08x after %dms\n", + update, cnt); + ret = -EIO; + break; + } + mdelay(1); + } + + spin_unlock_bh(&nn->reconfig_lock); + return ret; +} + +/* Interrupt configuration and handling + */ + +/** + * nfp_net_irq_unmask_msix() - Unmask MSI-X after automasking + * @nn: NFP Network structure + * @entry_nr: MSI-X table entry + * + * Clear the MSI-X table mask bit for the given entry bypassing Linux irq + * handling subsystem. Use *only* to reenable automasked vectors. + */ +static void nfp_net_irq_unmask_msix(struct nfp_net *nn, unsigned int entry_nr) +{ + struct list_head *msi_head = &nn->pdev->dev.msi_list; + struct msi_desc *entry; + u32 off; + + /* All MSI-Xs have the same mask_base */ + entry = list_first_entry(msi_head, struct msi_desc, list); + + off = (PCI_MSIX_ENTRY_SIZE * entry_nr) + + PCI_MSIX_ENTRY_VECTOR_CTRL; + writel(0, entry->mask_base + off); + readl(entry->mask_base); +} + +/** + * nfp_net_irq_unmask() - Unmask automasked interrupt + * @nn: NFP Network structure + * @entry_nr: MSI-X table entry + * + * If MSI-X auto-masking is enabled clear the mask bit, otherwise + * clear the ICR for the entry. + */ +static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) +{ + if (nn->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { + nfp_net_irq_unmask_msix(nn, entry_nr); + return; + } + + nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED); + nn_pci_flush(nn); +} + +/** + * nfp_net_msix_alloc() - Try to allocate MSI-X irqs + * @nn: NFP Network structure + * @nr_vecs: Number of MSI-X vectors to allocate + * + * For MSI-X we want at least NFP_NET_NON_Q_VECTORS + 1 vectors. + * + * Return: Number of MSI-X vectors obtained or 0 on error. + */ +static int nfp_net_msix_alloc(struct nfp_net *nn, int nr_vecs) +{ + struct pci_dev *pdev = nn->pdev; + int nvecs; + int i; + + for (i = 0; i < nr_vecs; i++) + nn->irq_entries[i].entry = i; + + nvecs = pci_enable_msix_range(pdev, nn->irq_entries, + NFP_NET_NON_Q_VECTORS + 1, nr_vecs); + if (nvecs < 0) { + nn_warn(nn, "Failed to enable MSI-X. Wanted %d-%d (err=%d)\n", + NFP_NET_NON_Q_VECTORS + 1, nr_vecs, nvecs); + return 0; + } + + return nvecs; +} + +/** + * nfp_net_irqs_wanted() - Work out how many interrupt vectors we want + * @nn: NFP Network structure + * + * We want a vector per CPU (or ring), whatever is smaller plus + * NFP_NET_NON_Q_VECTORS for LSC etc. + * + * Return: Number of interrupts wanted + */ +static int nfp_net_irqs_wanted(struct nfp_net *nn) +{ + int ncpus; + int vecs; + + ncpus = num_online_cpus(); + + vecs = max_t(int, nn->num_tx_rings, nn->num_rx_rings); + vecs = min_t(int, vecs, ncpus); + + return vecs + NFP_NET_NON_Q_VECTORS; +} + +/** + * nfp_net_irqs_alloc() - allocates MSI-X irqs + * @nn: NFP Network structure + * + * Return: Number of irqs obtained or 0 on error. + */ +int nfp_net_irqs_alloc(struct nfp_net *nn) +{ + int wanted_irqs; + + wanted_irqs = nfp_net_irqs_wanted(nn); + + nn->num_irqs = nfp_net_msix_alloc(nn, wanted_irqs); + if (nn->num_irqs == 0) { + nn_err(nn, "Failed to allocate MSI-X IRQs\n"); + return 0; + } + + nn->num_r_vecs = nn->num_irqs - NFP_NET_NON_Q_VECTORS; + + if (nn->num_irqs < wanted_irqs) + nn_warn(nn, "Unable to allocate %d vectors. Got %d instead\n", + wanted_irqs, nn->num_irqs); + + return nn->num_irqs; +} + +/** + * nfp_net_irqs_disable() - Disable interrupts + * @nn: NFP Network structure + * + * Undoes what @nfp_net_irqs_alloc() does. + */ +void nfp_net_irqs_disable(struct nfp_net *nn) +{ + pci_disable_msix(nn->pdev); +} + +/** + * nfp_net_irq_rxtx() - Interrupt service routine for RX/TX rings. + * @irq: Interrupt + * @data: Opaque data structure + * + * Return: Indicate if the interrupt has been handled. + */ +static irqreturn_t nfp_net_irq_rxtx(int irq, void *data) +{ + struct nfp_net_r_vector *r_vec = data; + + napi_schedule_irqoff(&r_vec->napi); + + /* The FW auto-masks any interrupt, either via the MASK bit in + * the MSI-X table or via the per entry ICR field. So there + * is no need to disable interrupts here. + */ + return IRQ_HANDLED; +} + +/** + * nfp_net_read_link_status() - Reread link status from control BAR + * @nn: NFP Network structure + */ +static void nfp_net_read_link_status(struct nfp_net *nn) +{ + unsigned long flags; + bool link_up; + u32 sts; + + spin_lock_irqsave(&nn->link_status_lock, flags); + + sts = nn_readl(nn, NFP_NET_CFG_STS); + link_up = !!(sts & NFP_NET_CFG_STS_LINK); + + if (nn->link_up == link_up) + goto out; + + nn->link_up = link_up; + + if (nn->link_up) { + netif_carrier_on(nn->netdev); + netdev_info(nn->netdev, "NIC Link is Up\n"); + } else { + netif_carrier_off(nn->netdev); + netdev_info(nn->netdev, "NIC Link is Down\n"); + } +out: + spin_unlock_irqrestore(&nn->link_status_lock, flags); +} + +/** + * nfp_net_irq_lsc() - Interrupt service routine for link state changes + * @irq: Interrupt + * @data: Opaque data structure + * + * Return: Indicate if the interrupt has been handled. + */ +static irqreturn_t nfp_net_irq_lsc(int irq, void *data) +{ + struct nfp_net *nn = data; + + nfp_net_read_link_status(nn); + + nfp_net_irq_unmask(nn, NFP_NET_IRQ_LSC_IDX); + + return IRQ_HANDLED; +} + +/** + * nfp_net_irq_exn() - Interrupt service routine for exceptions + * @irq: Interrupt + * @data: Opaque data structure + * + * Return: Indicate if the interrupt has been handled. + */ +static irqreturn_t nfp_net_irq_exn(int irq, void *data) +{ + struct nfp_net *nn = data; + + nn_err(nn, "%s: UNIMPLEMENTED.\n", __func__); + /* XXX TO BE IMPLEMENTED */ + return IRQ_HANDLED; +} + +/** + * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring + * @tx_ring: TX ring structure + */ +static void nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + + tx_ring->qcidx = tx_ring->idx * nn->stride_tx; + tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx); +} + +/** + * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring + * @rx_ring: RX ring structure + */ +static void nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + + rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx; + rx_ring->rx_qcidx = rx_ring->fl_qcidx + (nn->stride_rx - 1); + + rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx); + rx_ring->qcp_rx = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->rx_qcidx); +} + +/** + * nfp_net_irqs_assign() - Assign IRQs and setup rvecs. + * @netdev: netdev structure + */ +static void nfp_net_irqs_assign(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_r_vector *r_vec; + int r; + + /* Assumes nn->num_tx_rings == nn->num_rx_rings */ + if (nn->num_tx_rings > nn->num_r_vecs) { + nn_warn(nn, "More rings (%d) than vectors (%d).\n", + nn->num_tx_rings, nn->num_r_vecs); + nn->num_tx_rings = nn->num_r_vecs; + nn->num_rx_rings = nn->num_r_vecs; + } + + nn->lsc_handler = nfp_net_irq_lsc; + nn->exn_handler = nfp_net_irq_exn; + + for (r = 0; r < nn->num_r_vecs; r++) { + r_vec = &nn->r_vecs[r]; + r_vec->nfp_net = nn; + r_vec->handler = nfp_net_irq_rxtx; + r_vec->irq_idx = NFP_NET_NON_Q_VECTORS + r; + + cpumask_set_cpu(r, &r_vec->affinity_mask); + + r_vec->tx_ring = &nn->tx_rings[r]; + nn->tx_rings[r].idx = r; + nn->tx_rings[r].r_vec = r_vec; + nfp_net_tx_ring_init(r_vec->tx_ring); + + r_vec->rx_ring = &nn->rx_rings[r]; + nn->rx_rings[r].idx = r; + nn->rx_rings[r].r_vec = r_vec; + nfp_net_rx_ring_init(r_vec->rx_ring); + } +} + +/** + * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN) + * @nn: NFP Network structure + * @ctrl_offset: Control BAR offset where IRQ configuration should be written + * @format: printf-style format to construct the interrupt name + * @name: Pointer to allocated space for interrupt name + * @name_sz: Size of space for interrupt name + * @vector_idx: Index of MSI-X vector used for this interrupt + * @handler: IRQ handler to register for this interrupt + */ +static int +nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset, + const char *format, char *name, size_t name_sz, + unsigned int vector_idx, irq_handler_t handler) +{ + struct msix_entry *entry; + int err; + + entry = &nn->irq_entries[vector_idx]; + + snprintf(name, name_sz, format, netdev_name(nn->netdev)); + err = request_irq(entry->vector, handler, 0, name, nn); + if (err) { + nn_err(nn, "Failed to request IRQ %d (err=%d).\n", + entry->vector, err); + return err; + } + nn_writeb(nn, ctrl_offset, vector_idx); + + return 0; +} + +/** + * nfp_net_aux_irq_free() - Free an auxiliary interrupt (LSC or EXN) + * @nn: NFP Network structure + * @ctrl_offset: Control BAR offset where IRQ configuration should be written + * @vector_idx: Index of MSI-X vector used for this interrupt + */ +static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset, + unsigned int vector_idx) +{ + nn_writeb(nn, ctrl_offset, 0xff); + free_irq(nn->irq_entries[vector_idx].vector, nn); +} + +/* Transmit + * + * One queue controller peripheral queue is used for transmit. The + * driver en-queues packets for transmit by advancing the write + * pointer. The device indicates that packets have transmitted by + * advancing the read pointer. The driver maintains a local copy of + * the read and write pointer in @struct nfp_net_tx_ring. The driver + * keeps @wr_p in sync with the queue controller write pointer and can + * determine how many packets have been transmitted by comparing its + * copy of the read pointer @rd_p with the read pointer maintained by + * the queue controller peripheral. + */ + +/** + * nfp_net_tx_full() - Check if the TX ring is full + * @tx_ring: TX ring to check + * @dcnt: Number of descriptors that need to be enqueued (must be >= 1) + * + * This function checks, based on the *host copy* of read/write + * pointer if a given TX ring is full. The real TX queue may have + * some newly made available slots. + * + * Return: True if the ring is full. + */ +static inline int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt) +{ + return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt); +} + +/* Wrappers for deciding when to stop and restart TX queues */ +static int nfp_net_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) +{ + return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4); +} + +static int nfp_net_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) +{ + return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1); +} + +/** + * nfp_net_tx_ring_stop() - stop tx ring + * @nd_q: netdev queue + * @tx_ring: driver tx queue structure + * + * Safely stop TX ring. Remember that while we are running .start_xmit() + * someone else may be cleaning the TX ring completions so we need to be + * extra careful here. + */ +static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q, + struct nfp_net_tx_ring *tx_ring) +{ + netif_tx_stop_queue(nd_q); + + /* We can race with the TX completion out of NAPI so recheck */ + smp_mb(); + if (unlikely(nfp_net_tx_ring_should_wake(tx_ring))) + netif_tx_start_queue(nd_q); +} + +/** + * nfp_net_tx_tso() - Set up Tx descriptor for LSO + * @nn: NFP Net device + * @r_vec: per-ring structure + * @txbuf: Pointer to driver soft TX descriptor + * @txd: Pointer to HW TX descriptor + * @skb: Pointer to SKB + * + * Set up Tx descriptor for LSO, do nothing for non-LSO skbs. + * Return error on packet header greater than maximum supported LSO header size. + */ +static void nfp_net_tx_tso(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_buf *txbuf, + struct nfp_net_tx_desc *txd, struct sk_buff *skb) +{ + u32 hdrlen; + u16 mss; + + if (!skb_is_gso(skb)) + return; + + if (!skb->encapsulation) + hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); + else + hdrlen = skb_inner_transport_header(skb) - skb->data + + inner_tcp_hdrlen(skb); + + txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs; + txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1); + + mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK; + txd->l4_offset = hdrlen; + txd->mss = cpu_to_le16(mss); + txd->flags |= PCIE_DESC_TX_LSO; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_lso++; + u64_stats_update_end(&r_vec->tx_sync); +} + +/** + * nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor + * @nn: NFP Net device + * @r_vec: per-ring structure + * @txbuf: Pointer to driver soft TX descriptor + * @txd: Pointer to TX descriptor + * @skb: Pointer to SKB + * + * This function sets the TX checksum flags in the TX descriptor based + * on the configuration and the protocol of the packet to be transmitted. + */ +static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_buf *txbuf, + struct nfp_net_tx_desc *txd, struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct iphdr *iph; + u8 l4_hdr; + + if (!(nn->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) + return; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return; + + txd->flags |= PCIE_DESC_TX_CSUM; + if (skb->encapsulation) + txd->flags |= PCIE_DESC_TX_ENCAP; + + iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + + if (iph->version == 4) { + txd->flags |= PCIE_DESC_TX_IP4_CSUM; + l4_hdr = iph->protocol; + } else if (ipv6h->version == 6) { + l4_hdr = ipv6h->nexthdr; + } else { + nn_warn_ratelimit(nn, "partial checksum but ipv=%x!\n", + iph->version); + return; + } + + switch (l4_hdr) { + case IPPROTO_TCP: + txd->flags |= PCIE_DESC_TX_TCP_CSUM; + break; + case IPPROTO_UDP: + txd->flags |= PCIE_DESC_TX_UDP_CSUM; + break; + default: + nn_warn_ratelimit(nn, "partial checksum but l4 proto=%x!\n", + l4_hdr); + return; + } + + u64_stats_update_begin(&r_vec->tx_sync); + if (skb->encapsulation) + r_vec->hw_csum_tx_inner += txbuf->pkt_cnt; + else + r_vec->hw_csum_tx += txbuf->pkt_cnt; + u64_stats_update_end(&r_vec->tx_sync); +} + +/** + * nfp_net_tx() - Main transmit entry point + * @skb: SKB to transmit + * @netdev: netdev structure + * + * Return: NETDEV_TX_OK on success. + */ +static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + const struct skb_frag_struct *frag; + struct nfp_net_r_vector *r_vec; + struct nfp_net_tx_desc *txd, txdg; + struct nfp_net_tx_buf *txbuf; + struct nfp_net_tx_ring *tx_ring; + struct netdev_queue *nd_q; + dma_addr_t dma_addr; + unsigned int fsize; + int f, nr_frags; + int wr_idx; + u16 qidx; + + qidx = skb_get_queue_mapping(skb); + tx_ring = &nn->tx_rings[qidx]; + r_vec = tx_ring->r_vec; + nd_q = netdev_get_tx_queue(nn->netdev, qidx); + + nr_frags = skb_shinfo(skb)->nr_frags; + + if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) { + nn_warn_ratelimit(nn, "TX ring %d busy. wrp=%u rdp=%u\n", + qidx, tx_ring->wr_p, tx_ring->rd_p); + netif_tx_stop_queue(nd_q); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_busy++; + u64_stats_update_end(&r_vec->tx_sync); + return NETDEV_TX_BUSY; + } + + /* Start with the head skbuf */ + dma_addr = dma_map_single(&nn->pdev->dev, skb->data, skb_headlen(skb), + DMA_TO_DEVICE); + if (dma_mapping_error(&nn->pdev->dev, dma_addr)) + goto err_free; + + wr_idx = tx_ring->wr_p % tx_ring->cnt; + + /* Stash the soft descriptor of the head then initialize it */ + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->skb = skb; + txbuf->dma_addr = dma_addr; + txbuf->fidx = -1; + txbuf->pkt_cnt = 1; + txbuf->real_len = skb->len; + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = (nr_frags == 0) ? PCIE_DESC_TX_EOP : 0; + txd->dma_len = cpu_to_le16(skb_headlen(skb)); + nfp_desc_set_dma_addr(txd, dma_addr); + txd->data_len = cpu_to_le16(skb->len); + + txd->flags = 0; + txd->mss = 0; + txd->l4_offset = 0; + + nfp_net_tx_tso(nn, r_vec, txbuf, txd, skb); + + nfp_net_tx_csum(nn, r_vec, txbuf, txd, skb); + + if (skb_vlan_tag_present(skb) && nn->ctrl & NFP_NET_CFG_CTRL_TXVLAN) { + txd->flags |= PCIE_DESC_TX_VLAN; + txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); + } + + /* Gather DMA */ + if (nr_frags > 0) { + /* all descs must match except for in addr, length and eop */ + txdg = *txd; + + for (f = 0; f < nr_frags; f++) { + frag = &skb_shinfo(skb)->frags[f]; + fsize = skb_frag_size(frag); + + dma_addr = skb_frag_dma_map(&nn->pdev->dev, frag, 0, + fsize, DMA_TO_DEVICE); + if (dma_mapping_error(&nn->pdev->dev, dma_addr)) + goto err_unmap; + + wr_idx = (wr_idx + 1) % tx_ring->cnt; + tx_ring->txbufs[wr_idx].skb = skb; + tx_ring->txbufs[wr_idx].dma_addr = dma_addr; + tx_ring->txbufs[wr_idx].fidx = f; + + txd = &tx_ring->txds[wr_idx]; + *txd = txdg; + txd->dma_len = cpu_to_le16(fsize); + nfp_desc_set_dma_addr(txd, dma_addr); + txd->offset_eop = + (f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0; + } + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_gather++; + u64_stats_update_end(&r_vec->tx_sync); + } + + netdev_tx_sent_queue(nd_q, txbuf->real_len); + + tx_ring->wr_p += nr_frags + 1; + if (nfp_net_tx_ring_should_stop(tx_ring)) + nfp_net_tx_ring_stop(nd_q, tx_ring); + + tx_ring->wr_ptr_add += nr_frags + 1; + if (!skb->xmit_more || netif_xmit_stopped(nd_q)) { + /* force memory write before we let HW know */ + wmb(); + nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); + tx_ring->wr_ptr_add = 0; + } + + skb_tx_timestamp(skb); + + return NETDEV_TX_OK; + +err_unmap: + --f; + while (f >= 0) { + frag = &skb_shinfo(skb)->frags[f]; + dma_unmap_page(&nn->pdev->dev, + tx_ring->txbufs[wr_idx].dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + tx_ring->txbufs[wr_idx].skb = NULL; + tx_ring->txbufs[wr_idx].dma_addr = 0; + tx_ring->txbufs[wr_idx].fidx = -2; + wr_idx = wr_idx - 1; + if (wr_idx < 0) + wr_idx += tx_ring->cnt; + } + dma_unmap_single(&nn->pdev->dev, tx_ring->txbufs[wr_idx].dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + tx_ring->txbufs[wr_idx].skb = NULL; + tx_ring->txbufs[wr_idx].dma_addr = 0; + tx_ring->txbufs[wr_idx].fidx = -2; +err_free: + nn_warn_ratelimit(nn, "Failed to map DMA TX buffer\n"); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +/** + * nfp_net_tx_complete() - Handled completed TX packets + * @tx_ring: TX ring structure + * + * Return: Number of completed TX descriptors + */ +static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + const struct skb_frag_struct *frag; + struct netdev_queue *nd_q; + u32 done_pkts = 0, done_bytes = 0; + struct sk_buff *skb; + int todo, nr_frags; + u32 qcp_rd_p; + int fidx; + int idx; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return; + + if (qcp_rd_p > tx_ring->qcp_rd_p) + todo = qcp_rd_p - tx_ring->qcp_rd_p; + else + todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p; + + while (todo--) { + idx = tx_ring->rd_p % tx_ring->cnt; + tx_ring->rd_p++; + + skb = tx_ring->txbufs[idx].skb; + if (!skb) + continue; + + nr_frags = skb_shinfo(skb)->nr_frags; + fidx = tx_ring->txbufs[idx].fidx; + + if (fidx == -1) { + /* unmap head */ + dma_unmap_single(&nn->pdev->dev, + tx_ring->txbufs[idx].dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + + done_pkts += tx_ring->txbufs[idx].pkt_cnt; + done_bytes += tx_ring->txbufs[idx].real_len; + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[fidx]; + dma_unmap_page(&nn->pdev->dev, + tx_ring->txbufs[idx].dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + } + + /* check for last gather fragment */ + if (fidx == nr_frags - 1) + dev_kfree_skb_any(skb); + + tx_ring->txbufs[idx].dma_addr = 0; + tx_ring->txbufs[idx].skb = NULL; + tx_ring->txbufs[idx].fidx = -2; + } + + tx_ring->qcp_rd_p = qcp_rd_p; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx); + netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); + if (nfp_net_tx_ring_should_wake(tx_ring)) { + /* Make sure TX thread will see updated tx_ring->rd_p */ + smp_mb(); + + if (unlikely(netif_tx_queue_stopped(nd_q))) + netif_tx_wake_queue(nd_q); + } + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); +} + +/** + * nfp_net_tx_flush() - Free any untransmitted buffers currently on the TX ring + * @tx_ring: TX ring structure + * + * Assumes that the device is stopped + */ +static void nfp_net_tx_flush(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + const struct skb_frag_struct *frag; + struct netdev_queue *nd_q; + struct sk_buff *skb; + int nr_frags; + int fidx; + int idx; + + while (tx_ring->rd_p != tx_ring->wr_p) { + idx = tx_ring->rd_p % tx_ring->cnt; + + skb = tx_ring->txbufs[idx].skb; + if (skb) { + nr_frags = skb_shinfo(skb)->nr_frags; + fidx = tx_ring->txbufs[idx].fidx; + + if (fidx == -1) { + /* unmap head */ + dma_unmap_single(&pdev->dev, + tx_ring->txbufs[idx].dma_addr, + skb_headlen(skb), + DMA_TO_DEVICE); + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[fidx]; + dma_unmap_page(&pdev->dev, + tx_ring->txbufs[idx].dma_addr, + skb_frag_size(frag), + DMA_TO_DEVICE); + } + + /* check for last gather fragment */ + if (fidx == nr_frags - 1) + dev_kfree_skb_any(skb); + + tx_ring->txbufs[idx].dma_addr = 0; + tx_ring->txbufs[idx].skb = NULL; + tx_ring->txbufs[idx].fidx = -2; + } + + memset(&tx_ring->txds[idx], 0, sizeof(tx_ring->txds[idx])); + + tx_ring->qcp_rd_p++; + tx_ring->rd_p++; + } + + nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx); + netdev_tx_reset_queue(nd_q); +} + +static void nfp_net_tx_timeout(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + for (i = 0; i < nn->num_tx_rings; i++) { + if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i))) + continue; + nn_warn(nn, "TX timeout on ring: %d\n", i); + } + nn_warn(nn, "TX watchdog timeout\n"); +} + +/* Receive processing + */ + +/** + * nfp_net_rx_space() - return the number of free slots on the RX ring + * @rx_ring: RX ring structure + * + * Make sure we leave at least one slot free. + * + * Return: True if there is space on the RX ring + */ +static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring) +{ + return (rx_ring->cnt - 1) - (rx_ring->wr_p - rx_ring->rd_p); +} + +/** + * nfp_net_rx_alloc_one() - Allocate and map skb for RX + * @rx_ring: RX ring structure of the skb + * @dma_addr: Pointer to storage for DMA address (output param) + * + * This function will allcate a new skb, map it for DMA. + * + * Return: allocated skb or NULL on failure. + */ +static struct sk_buff * +nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr) +{ + struct nfp_net *nn = rx_ring->r_vec->nfp_net; + struct sk_buff *skb; + + skb = netdev_alloc_skb(nn->netdev, nn->fl_bufsz); + if (!skb) { + nn_warn_ratelimit(nn, "Failed to alloc receive SKB\n"); + return NULL; + } + + *dma_addr = dma_map_single(&nn->pdev->dev, skb->data, + nn->fl_bufsz, DMA_FROM_DEVICE); + if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) { + dev_kfree_skb_any(skb); + nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n"); + return NULL; + } + + return skb; +} + +/** + * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings + * @rx_ring: RX ring structure + * @skb: Skb to put on rings + * @dma_addr: DMA address of skb mapping + */ +static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring, + struct sk_buff *skb, dma_addr_t dma_addr) +{ + unsigned int wr_idx; + + wr_idx = rx_ring->wr_p % rx_ring->cnt; + + /* Stash SKB and DMA address away */ + rx_ring->rxbufs[wr_idx].skb = skb; + rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; + + /* Fill freelist descriptor */ + rx_ring->rxds[wr_idx].fld.reserved = 0; + rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; + nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, dma_addr); + + rx_ring->wr_p++; + rx_ring->wr_ptr_add++; + if (rx_ring->wr_ptr_add >= NFP_NET_FL_BATCH) { + /* Update write pointer of the freelist queue. Make + * sure all writes are flushed before telling the hardware. + */ + wmb(); + nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, rx_ring->wr_ptr_add); + rx_ring->wr_ptr_add = 0; + } +} + +/** + * nfp_net_rx_flush() - Free any buffers currently on the RX ring + * @rx_ring: RX ring to remove buffers from + * + * Assumes that the device is stopped + */ +static void nfp_net_rx_flush(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net *nn = rx_ring->r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + int idx; + + while (rx_ring->rd_p != rx_ring->wr_p) { + idx = rx_ring->rd_p % rx_ring->cnt; + + if (rx_ring->rxbufs[idx].skb) { + dma_unmap_single(&pdev->dev, + rx_ring->rxbufs[idx].dma_addr, + nn->fl_bufsz, DMA_FROM_DEVICE); + dev_kfree_skb_any(rx_ring->rxbufs[idx].skb); + rx_ring->rxbufs[idx].dma_addr = 0; + rx_ring->rxbufs[idx].skb = NULL; + } + + memset(&rx_ring->rxds[idx], 0, sizeof(rx_ring->rxds[idx])); + + rx_ring->rd_p++; + } +} + +/** + * nfp_net_rx_fill_freelist() - Attempt filling freelist with RX buffers + * @rx_ring: RX ring to fill + * + * Try to fill as many buffers as possible into freelist. Return + * number of buffers added. + * + * Return: Number of freelist buffers added. + */ +static int nfp_net_rx_fill_freelist(struct nfp_net_rx_ring *rx_ring) +{ + struct sk_buff *skb; + dma_addr_t dma_addr; + + while (nfp_net_rx_space(rx_ring)) { + skb = nfp_net_rx_alloc_one(rx_ring, &dma_addr); + if (!skb) { + nfp_net_rx_flush(rx_ring); + return -ENOMEM; + } + nfp_net_rx_give_one(rx_ring, skb, dma_addr); + } + + return 0; +} + +/** + * nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors + * @flags: RX descriptor flags field in CPU byte order + */ +static int nfp_net_rx_csum_has_errors(u16 flags) +{ + u16 csum_all_checked, csum_all_ok; + + csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; + csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; + + return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); +} + +/** + * nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags + * @nn: NFP Net device + * @r_vec: per-ring structure + * @rxd: Pointer to RX descriptor + * @skb: Pointer to SKB + */ +static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_desc *rxd, struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + if (!(nn->netdev->features & NETIF_F_RXCSUM)) + return; + + if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_error++; + u64_stats_update_end(&r_vec->rx_sync); + return; + } + + /* Assume that the firmware will never report inner CSUM_OK unless outer + * L4 headers were successfully parsed. FW will always report zero UDP + * checksum as CSUM_OK. + */ + if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } + + if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_inner_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } +} + +/** + * nfp_net_set_hash() - Set SKB hash data + * @netdev: adapter's net_device structure + * @skb: SKB to set the hash data on + * @rxd: RX descriptor + * + * The RSS hash and hash-type are pre-pended to the packet data. + * Extract and decode it and set the skb fields. + */ +static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb, + struct nfp_net_rx_desc *rxd) +{ + struct nfp_net_rx_hash *rx_hash; + + if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) || + !(netdev->features & NETIF_F_RXHASH)) + return; + + rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash)); + + switch (be32_to_cpu(rx_hash->hash_type)) { + case NFP_NET_RSS_IPV4: + case NFP_NET_RSS_IPV6: + case NFP_NET_RSS_IPV6_EX: + skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3); + break; + default: + skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4); + break; + } +} + +/** + * nfp_net_rx() - receive up to @budget packets on @rx_ring + * @rx_ring: RX ring to receive from + * @budget: NAPI budget + * + * Note, this function is separated out from the napi poll function to + * more cleanly separate packet receive code from other bookkeeping + * functions performed in the napi poll function. + * + * There are differences between the NFP-3200 firmware and the + * NFP-6000 firmware. The NFP-3200 firmware uses a dedicated RX queue + * to indicate that new packets have arrived. The NFP-6000 does not + * have this queue and uses the DD bit in the RX descriptor. This + * method cannot be used on the NFP-3200 as it causes a race + * condition: The RX ring write pointer on the NFP-3200 is updated + * after packets (and descriptors) have been DMAed. If the DD bit is + * used and subsequently the read pointer is updated this may lead to + * the RX queue to underflow (if the firmware has not yet update the + * write pointer). Therefore we use slightly ugly conditional code + * below to handle the differences. We may, in the future update the + * NFP-3200 firmware to behave the same as the firmware on the + * NFP-6000. + * + * Return: Number of packets received. + */ +static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + unsigned int data_len, meta_len; + int avail = 0, pkts_polled = 0; + struct sk_buff *skb, *new_skb; + struct nfp_net_rx_desc *rxd; + dma_addr_t new_dma_addr; + u32 qcp_wr_p; + int idx; + + if (nn->is_nfp3200) { + /* Work out how many packets arrived */ + qcp_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx); + idx = rx_ring->rd_p % rx_ring->cnt; + + if (qcp_wr_p == idx) + /* No new packets */ + return 0; + + if (qcp_wr_p > idx) + avail = qcp_wr_p - idx; + else + avail = qcp_wr_p + rx_ring->cnt - idx; + } else { + avail = budget + 1; + } + + while (avail > 0 && pkts_polled < budget) { + idx = rx_ring->rd_p % rx_ring->cnt; + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) { + if (nn->is_nfp3200) + nn_dbg(nn, "RX descriptor not valid (DD)%d:%u rxd[0]=%#x rxd[1]=%#x\n", + rx_ring->idx, idx, + rxd->vals[0], rxd->vals[1]); + break; + } + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + rx_ring->rd_p++; + pkts_polled++; + avail--; + + skb = rx_ring->rxbufs[idx].skb; + + new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr); + if (!new_skb) { + nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[idx].skb, + rx_ring->rxbufs[idx].dma_addr); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + continue; + } + + dma_unmap_single(&nn->pdev->dev, + rx_ring->rxbufs[idx].dma_addr, + nn->fl_bufsz, DMA_FROM_DEVICE); + + nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr); + + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + + if (WARN_ON_ONCE(data_len > nn->fl_bufsz)) { + dev_kfree_skb_any(skb); + continue; + } + + if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) { + /* The packet data starts after the metadata */ + skb_reserve(skb, meta_len); + } else { + /* The packet data starts at a fixed offset */ + skb_reserve(skb, nn->rx_offset); + } + + /* Adjust the SKB for the dynamic meta data pre-pended */ + skb_put(skb, data_len - meta_len); + + nfp_net_set_hash(nn->netdev, skb, rxd); + + /* Pad small frames to minimum */ + if (skb_put_padto(skb, 60)) + break; + + /* Stats update */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += skb->len; + u64_stats_update_end(&r_vec->rx_sync); + + skb_record_rx_queue(skb, rx_ring->idx); + skb->protocol = eth_type_trans(skb, nn->netdev); + + nfp_net_rx_csum(nn, r_vec, rxd, skb); + + if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + le16_to_cpu(rxd->rxd.vlan)); + + napi_gro_receive(&rx_ring->r_vec->napi, skb); + } + + if (nn->is_nfp3200) + nfp_qcp_rd_ptr_add(rx_ring->qcp_rx, pkts_polled); + + return pkts_polled; +} + +/** + * nfp_net_poll() - napi poll function + * @napi: NAPI structure + * @budget: NAPI budget + * + * Return: number of packets polled. + */ +static int nfp_net_poll(struct napi_struct *napi, int budget) +{ + struct nfp_net_r_vector *r_vec = + container_of(napi, struct nfp_net_r_vector, napi); + struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; + struct nfp_net_tx_ring *tx_ring = r_vec->tx_ring; + struct nfp_net *nn = r_vec->nfp_net; + struct netdev_queue *txq; + unsigned int pkts_polled; + + tx_ring = &nn->tx_rings[rx_ring->idx]; + txq = netdev_get_tx_queue(nn->netdev, tx_ring->idx); + nfp_net_tx_complete(tx_ring); + + pkts_polled = nfp_net_rx(rx_ring, budget); + + if (pkts_polled < budget) { + napi_complete_done(napi, pkts_polled); + nfp_net_irq_unmask(nn, r_vec->irq_idx); + } + + return pkts_polled; +} + +/* Setup and Configuration + */ + +/** + * nfp_net_tx_ring_free() - Free resources allocated to a TX ring + * @tx_ring: TX ring to free + */ +static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + + nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(tx_ring->idx), 0); + nn_writeb(nn, NFP_NET_CFG_TXR_SZ(tx_ring->idx), 0); + nn_writeb(nn, NFP_NET_CFG_TXR_VEC(tx_ring->idx), 0); + + kfree(tx_ring->txbufs); + + if (tx_ring->txds) + dma_free_coherent(&pdev->dev, tx_ring->size, + tx_ring->txds, tx_ring->dma); + + tx_ring->cnt = 0; + tx_ring->wr_p = 0; + tx_ring->rd_p = 0; + tx_ring->qcp_rd_p = 0; + + tx_ring->txbufs = NULL; + tx_ring->txds = NULL; + tx_ring->dma = 0; + tx_ring->size = 0; +} + +/** + * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring + * @tx_ring: TX Ring structure to allocate + * + * Return: 0 on success, negative errno otherwise. + */ +static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + int sz; + + tx_ring->cnt = nn->txd_cnt; + + tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt; + tx_ring->txds = dma_zalloc_coherent(&pdev->dev, tx_ring->size, + &tx_ring->dma, GFP_KERNEL); + if (!tx_ring->txds) + goto err_alloc; + + sz = sizeof(*tx_ring->txbufs) * tx_ring->cnt; + tx_ring->txbufs = kzalloc(sz, GFP_KERNEL); + if (!tx_ring->txbufs) + goto err_alloc; + + /* Write the DMA address, size and MSI-X info to the device */ + nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(tx_ring->idx), tx_ring->dma); + nn_writeb(nn, NFP_NET_CFG_TXR_SZ(tx_ring->idx), ilog2(tx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_TXR_VEC(tx_ring->idx), r_vec->irq_idx); + + netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask, tx_ring->idx); + + nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p\n", + tx_ring->idx, tx_ring->qcidx, + tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds); + + return 0; + +err_alloc: + nfp_net_tx_ring_free(tx_ring); + return -ENOMEM; +} + +/** + * nfp_net_rx_ring_free() - Free resources allocated to a RX ring + * @rx_ring: RX ring to free + */ +static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + + nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(rx_ring->idx), 0); + nn_writeb(nn, NFP_NET_CFG_RXR_SZ(rx_ring->idx), 0); + nn_writeb(nn, NFP_NET_CFG_RXR_VEC(rx_ring->idx), 0); + + kfree(rx_ring->rxbufs); + + if (rx_ring->rxds) + dma_free_coherent(&pdev->dev, rx_ring->size, + rx_ring->rxds, rx_ring->dma); + + rx_ring->cnt = 0; + rx_ring->wr_p = 0; + rx_ring->rd_p = 0; + + rx_ring->rxbufs = NULL; + rx_ring->rxds = NULL; + rx_ring->dma = 0; + rx_ring->size = 0; +} + +/** + * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring + * @rx_ring: RX ring to allocate + * + * Return: 0 on success, negative errno otherwise. + */ +static int nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net *nn = r_vec->nfp_net; + struct pci_dev *pdev = nn->pdev; + int sz; + + rx_ring->cnt = nn->rxd_cnt; + + rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt; + rx_ring->rxds = dma_zalloc_coherent(&pdev->dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); + if (!rx_ring->rxds) + goto err_alloc; + + sz = sizeof(*rx_ring->rxbufs) * rx_ring->cnt; + rx_ring->rxbufs = kzalloc(sz, GFP_KERNEL); + if (!rx_ring->rxbufs) + goto err_alloc; + + /* Write the DMA address, size and MSI-X info to the device */ + nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(rx_ring->idx), rx_ring->dma); + nn_writeb(nn, NFP_NET_CFG_RXR_SZ(rx_ring->idx), ilog2(rx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_RXR_VEC(rx_ring->idx), r_vec->irq_idx); + + nn_dbg(nn, "RxQ%02d: FlQCidx=%02d RxQCidx=%02d cnt=%d dma=%#llx host=%p\n", + rx_ring->idx, rx_ring->fl_qcidx, rx_ring->rx_qcidx, + rx_ring->cnt, (unsigned long long)rx_ring->dma, rx_ring->rxds); + + return 0; + +err_alloc: + nfp_net_rx_ring_free(rx_ring); + return -ENOMEM; +} + +static void __nfp_net_free_rings(struct nfp_net *nn, unsigned int n_free) +{ + struct nfp_net_r_vector *r_vec; + struct msix_entry *entry; + + while (n_free--) { + r_vec = &nn->r_vecs[n_free]; + entry = &nn->irq_entries[r_vec->irq_idx]; + + nfp_net_rx_ring_free(r_vec->rx_ring); + nfp_net_tx_ring_free(r_vec->tx_ring); + + irq_set_affinity_hint(entry->vector, NULL); + free_irq(entry->vector, r_vec); + + netif_napi_del(&r_vec->napi); + } +} + +/** + * nfp_net_free_rings() - Free all ring resources + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_free_rings(struct nfp_net *nn) +{ + __nfp_net_free_rings(nn, nn->num_r_vecs); +} + +/** + * nfp_net_alloc_rings() - Allocate resources for RX and TX rings + * @nn: NFP Net device to reconfigure + * + * Return: 0 on success or negative errno on error. + */ +static int nfp_net_alloc_rings(struct nfp_net *nn) +{ + struct nfp_net_r_vector *r_vec; + struct msix_entry *entry; + int err; + int r; + + for (r = 0; r < nn->num_r_vecs; r++) { + r_vec = &nn->r_vecs[r]; + entry = &nn->irq_entries[r_vec->irq_idx]; + + /* Setup NAPI */ + netif_napi_add(nn->netdev, &r_vec->napi, + nfp_net_poll, NAPI_POLL_WEIGHT); + + snprintf(r_vec->name, sizeof(r_vec->name), + "%s-rxtx-%d", nn->netdev->name, r); + err = request_irq(entry->vector, r_vec->handler, 0, + r_vec->name, r_vec); + if (err) { + nn_dbg(nn, "Error requesting IRQ %d\n", entry->vector); + goto err_napi_del; + } + + irq_set_affinity_hint(entry->vector, &r_vec->affinity_mask); + + nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", + r, entry->vector, entry->entry); + + /* Allocate TX ring resources */ + err = nfp_net_tx_ring_alloc(r_vec->tx_ring); + if (err) + goto err_free_irq; + + /* Allocate RX ring resources */ + err = nfp_net_rx_ring_alloc(r_vec->rx_ring); + if (err) + goto err_free_tx; + } + + return 0; + +err_free_tx: + nfp_net_tx_ring_free(r_vec->tx_ring); +err_free_irq: + irq_set_affinity_hint(entry->vector, NULL); + free_irq(entry->vector, r_vec); +err_napi_del: + netif_napi_del(&r_vec->napi); + __nfp_net_free_rings(nn, r); + return err; +} + +/** + * nfp_net_rss_write_itbl() - Write RSS indirection table to device + * @nn: NFP Net device to reconfigure + */ +void nfp_net_rss_write_itbl(struct nfp_net *nn) +{ + int i; + + for (i = 0; i < NFP_NET_CFG_RSS_ITBL_SZ; i += 4) + nn_writel(nn, NFP_NET_CFG_RSS_ITBL + i, + get_unaligned_le32(nn->rss_itbl + i)); +} + +/** + * nfp_net_rss_write_key() - Write RSS hash key to device + * @nn: NFP Net device to reconfigure + */ +void nfp_net_rss_write_key(struct nfp_net *nn) +{ + int i; + + for (i = 0; i < NFP_NET_CFG_RSS_KEY_SZ; i += 4) + nn_writel(nn, NFP_NET_CFG_RSS_KEY + i, + get_unaligned_le32(nn->rss_key + i)); +} + +/** + * nfp_net_coalesce_write_cfg() - Write irq coalescence configuration to HW + * @nn: NFP Net device to reconfigure + */ +void nfp_net_coalesce_write_cfg(struct nfp_net *nn) +{ + u8 i; + u32 factor; + u32 value; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->me_freq_mhz / 16; + + /* copy RX interrupt coalesce parameters */ + value = (nn->rx_coalesce_max_frames << 16) | + (factor * nn->rx_coalesce_usecs); + for (i = 0; i < nn->num_r_vecs; i++) + nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value); + + /* copy TX interrupt coalesce parameters */ + value = (nn->tx_coalesce_max_frames << 16) | + (factor * nn->tx_coalesce_usecs); + for (i = 0; i < nn->num_r_vecs; i++) + nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value); +} + +/** + * nfp_net_write_mac_addr() - Write mac address to device registers + * @nn: NFP Net device to reconfigure + * @mac: Six-byte MAC address to be written + * + * We do a bit of byte swapping dance because firmware is LE. + */ +static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *mac) +{ + nn_writel(nn, NFP_NET_CFG_MACADDR + 0, + get_unaligned_be32(nn->netdev->dev_addr)); + /* We can't do writew for NFP-3200 compatibility */ + nn_writel(nn, NFP_NET_CFG_MACADDR + 4, + get_unaligned_be16(nn->netdev->dev_addr + 4) << 16); +} + +/** + * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_clear_config_and_disable(struct nfp_net *nn) +{ + u32 new_ctrl, update; + int err; + + new_ctrl = nn->ctrl; + new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE; + update = NFP_NET_CFG_UPDATE_GEN; + update |= NFP_NET_CFG_UPDATE_MSIX; + update |= NFP_NET_CFG_UPDATE_RING; + + if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG) + new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG; + + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0); + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0); + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, update); + if (err) { + nn_err(nn, "Could not disable device: %d\n", err); + return; + } + + nn->ctrl = new_ctrl; +} + +/** + * nfp_net_start_vec() - Start ring vector + * @nn: NFP Net device structure + * @r_vec: Ring vector to be started + */ +static int nfp_net_start_vec(struct nfp_net *nn, struct nfp_net_r_vector *r_vec) +{ + unsigned int irq_vec; + int err = 0; + + irq_vec = nn->irq_entries[r_vec->irq_idx].vector; + + disable_irq(irq_vec); + + err = nfp_net_rx_fill_freelist(r_vec->rx_ring); + if (err) { + nn_err(nn, "RV%02d: couldn't allocate enough buffers\n", + r_vec->irq_idx); + goto out; + } + + napi_enable(&r_vec->napi); +out: + enable_irq(irq_vec); + + return err; +} + +static int nfp_net_netdev_open(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int err, r; + u32 update = 0; + u32 new_ctrl; + + if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) { + nn_err(nn, "Dev is already enabled: 0x%08x\n", nn->ctrl); + return -EBUSY; + } + + new_ctrl = nn->ctrl; + + /* Step 1: Allocate resources for rings and the like + * - Request interrupts + * - Allocate RX and TX ring resources + * - Setup initial RSS table + */ + err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_EXN, "%s-exn", + nn->exn_name, sizeof(nn->exn_name), + NFP_NET_IRQ_EXN_IDX, nn->exn_handler); + if (err) + return err; + + err = nfp_net_alloc_rings(nn); + if (err) + goto err_free_exn; + + err = netif_set_real_num_tx_queues(netdev, nn->num_tx_rings); + if (err) + goto err_free_rings; + + err = netif_set_real_num_rx_queues(netdev, nn->num_rx_rings); + if (err) + goto err_free_rings; + + if (nn->cap & NFP_NET_CFG_CTRL_RSS) { + nfp_net_rss_write_key(nn); + nfp_net_rss_write_itbl(nn); + nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg); + update |= NFP_NET_CFG_UPDATE_RSS; + } + + if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) { + nfp_net_coalesce_write_cfg(nn); + + new_ctrl |= NFP_NET_CFG_CTRL_IRQMOD; + update |= NFP_NET_CFG_UPDATE_IRQMOD; + } + + /* Step 2: Configure the NFP + * - Enable rings from 0 to tx_rings/rx_rings - 1. + * - Write MAC address (in case it changed) + * - Set the MTU + * - Set the Freelist buffer size + * - Enable the FW + */ + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ? + 0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1); + + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ? + 0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1); + + nfp_net_write_mac_addr(nn, netdev->dev_addr); + + nn_writel(nn, NFP_NET_CFG_MTU, netdev->mtu); + nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz); + + /* Enable device */ + new_ctrl |= NFP_NET_CFG_CTRL_ENABLE; + update |= NFP_NET_CFG_UPDATE_GEN; + update |= NFP_NET_CFG_UPDATE_MSIX; + update |= NFP_NET_CFG_UPDATE_RING; + if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG) + new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG; + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, update); + if (err) + goto err_clear_config; + + nn->ctrl = new_ctrl; + + /* Since reconfiguration requests while NFP is down are ignored we + * have to wipe the entire VXLAN configuration and reinitialize it. + */ + if (nn->ctrl & NFP_NET_CFG_CTRL_VXLAN) { + memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports)); + memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt)); + vxlan_get_rx_port(netdev); + } + + /* Step 3: Enable for kernel + * - put some freelist descriptors on each RX ring + * - enable NAPI on each ring + * - enable all TX queues + * - set link state + */ + for (r = 0; r < nn->num_r_vecs; r++) { + err = nfp_net_start_vec(nn, &nn->r_vecs[r]); + if (err) + goto err_disable_napi; + } + + netif_tx_wake_all_queues(netdev); + + err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_LSC, "%s-lsc", + nn->lsc_name, sizeof(nn->lsc_name), + NFP_NET_IRQ_LSC_IDX, nn->lsc_handler); + if (err) + goto err_stop_tx; + nfp_net_read_link_status(nn); + + return 0; + +err_stop_tx: + netif_tx_disable(netdev); + for (r = 0; r < nn->num_r_vecs; r++) + nfp_net_tx_flush(nn->r_vecs[r].tx_ring); +err_disable_napi: + while (r--) { + napi_disable(&nn->r_vecs[r].napi); + nfp_net_rx_flush(nn->r_vecs[r].rx_ring); + } +err_clear_config: + nfp_net_clear_config_and_disable(nn); +err_free_rings: + nfp_net_free_rings(nn); +err_free_exn: + nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX); + return err; +} + +/** + * nfp_net_netdev_close() - Called when the device is downed + * @netdev: netdev structure + */ +static int nfp_net_netdev_close(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int r; + + if (!(nn->ctrl & NFP_NET_CFG_CTRL_ENABLE)) { + nn_err(nn, "Dev is not up: 0x%08x\n", nn->ctrl); + return 0; + } + + /* Step 1: Disable RX and TX rings from the Linux kernel perspective + */ + nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); + netif_carrier_off(netdev); + nn->link_up = false; + + for (r = 0; r < nn->num_r_vecs; r++) + napi_disable(&nn->r_vecs[r].napi); + + netif_tx_disable(netdev); + + /* Step 2: Tell NFP + */ + nfp_net_clear_config_and_disable(nn); + + /* Step 3: Free resources + */ + for (r = 0; r < nn->num_r_vecs; r++) { + nfp_net_rx_flush(nn->r_vecs[r].rx_ring); + nfp_net_tx_flush(nn->r_vecs[r].tx_ring); + } + + nfp_net_free_rings(nn); + nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX); + + nn_dbg(nn, "%s down", netdev->name); + return 0; +} + +static void nfp_net_set_rx_mode(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 new_ctrl; + + new_ctrl = nn->ctrl; + + if (netdev->flags & IFF_PROMISC) { + if (nn->cap & NFP_NET_CFG_CTRL_PROMISC) + new_ctrl |= NFP_NET_CFG_CTRL_PROMISC; + else + nn_warn(nn, "FW does not support promiscuous mode\n"); + } else { + new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC; + } + + if (new_ctrl == nn->ctrl) + return; + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + if (nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN)) + return; + + nn->ctrl = new_ctrl; +} + +static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 tmp; + + nn_dbg(nn, "New MTU = %d\n", new_mtu); + + if (new_mtu < 68 || new_mtu > nn->max_mtu) { + nn_err(nn, "New MTU (%d) is not valid\n", new_mtu); + return -EINVAL; + } + + netdev->mtu = new_mtu; + + /* Freelist buffer size rounded up to the nearest 1K */ + tmp = new_mtu + ETH_HLEN + VLAN_HLEN + NFP_NET_MAX_PREPEND; + nn->fl_bufsz = roundup(tmp, 1024); + + /* restart if running */ + if (netif_running(netdev)) { + nfp_net_netdev_close(netdev); + nfp_net_netdev_open(netdev); + } + + return 0; +} + +static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct nfp_net *nn = netdev_priv(netdev); + int r; + + for (r = 0; r < nn->num_r_vecs; r++) { + struct nfp_net_r_vector *r_vec = &nn->r_vecs[r]; + u64 data[3]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&r_vec->rx_sync); + data[0] = r_vec->rx_pkts; + data[1] = r_vec->rx_bytes; + data[2] = r_vec->rx_drops; + } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + stats->rx_packets += data[0]; + stats->rx_bytes += data[1]; + stats->rx_dropped += data[2]; + + do { + start = u64_stats_fetch_begin(&r_vec->tx_sync); + data[0] = r_vec->tx_pkts; + data[1] = r_vec->tx_bytes; + data[2] = r_vec->tx_errors; + } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + stats->tx_packets += data[0]; + stats->tx_bytes += data[1]; + stats->tx_errors += data[2]; + } + + return stats; +} + +static int nfp_net_set_features(struct net_device *netdev, + netdev_features_t features) +{ + netdev_features_t changed = netdev->features ^ features; + struct nfp_net *nn = netdev_priv(netdev); + u32 new_ctrl; + int err; + + /* Assume this is not called with features we have not advertised */ + + new_ctrl = nn->ctrl; + + if (changed & NETIF_F_RXCSUM) { + if (features & NETIF_F_RXCSUM) + new_ctrl |= NFP_NET_CFG_CTRL_RXCSUM; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_RXCSUM; + } + + if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) { + if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) + new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_TXCSUM; + } + + if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) { + if (features & (NETIF_F_TSO | NETIF_F_TSO6)) + new_ctrl |= NFP_NET_CFG_CTRL_LSO; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_LSO; + } + + if (changed & NETIF_F_HW_VLAN_CTAG_RX) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) + new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN; + } + + if (changed & NETIF_F_HW_VLAN_CTAG_TX) { + if (features & NETIF_F_HW_VLAN_CTAG_TX) + new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN; + } + + if (changed & NETIF_F_SG) { + if (features & NETIF_F_SG) + new_ctrl |= NFP_NET_CFG_CTRL_GATHER; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER; + } + + nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n", + netdev->features, features, changed); + + if (new_ctrl == nn->ctrl) + return 0; + + nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->ctrl, new_ctrl); + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + return err; + + nn->ctrl = new_ctrl; + + return 0; +} + +static netdev_features_t +nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, + netdev_features_t features) +{ + u8 l4_hdr; + + /* We can't do TSO over double tagged packets (802.1AD) */ + features &= vlan_features_check(skb, features); + + if (!skb->encapsulation) + return features; + + /* Ensure that inner L4 header offset fits into TX descriptor field */ + if (skb_is_gso(skb)) { + u32 hdrlen; + + hdrlen = skb_inner_transport_header(skb) - skb->data + + inner_tcp_hdrlen(skb); + + if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ)) + features &= ~NETIF_F_GSO_MASK; + } + + /* VXLAN/GRE check */ + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + l4_hdr = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_hdr = ipv6_hdr(skb)->nexthdr; + break; + default: + return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + } + + if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB) || + (l4_hdr != IPPROTO_UDP && l4_hdr != IPPROTO_GRE) || + (l4_hdr == IPPROTO_UDP && + (skb_inner_mac_header(skb) - skb_transport_header(skb) != + sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) + return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + + return features; +} + +/** + * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW + * @nn: NFP Net device to reconfigure + * @idx: Index into the port table where new port should be written + * @port: UDP port to configure (pass zero to remove VXLAN port) + */ +static void nfp_net_set_vxlan_port(struct nfp_net *nn, int idx, __be16 port) +{ + int i; + + nn->vxlan_ports[idx] = port; + + if (!(nn->ctrl & NFP_NET_CFG_CTRL_VXLAN)) + return; + + BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1); + for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i += 2) + nn_writel(nn, NFP_NET_CFG_VXLAN_PORT + i * sizeof(port), + be16_to_cpu(nn->vxlan_ports[i + 1]) << 16 | + be16_to_cpu(nn->vxlan_ports[i])); + + nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_VXLAN); +} + +/** + * nfp_net_find_vxlan_idx() - find table entry of the port or a free one + * @nn: NFP Network structure + * @port: UDP port to look for + * + * Return: if the port is already in the table -- it's position; + * if the port is not in the table -- free position to use; + * if the table is full -- -ENOSPC. + */ +static int nfp_net_find_vxlan_idx(struct nfp_net *nn, __be16 port) +{ + int i, free_idx = -ENOSPC; + + for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i++) { + if (nn->vxlan_ports[i] == port) + return i; + if (!nn->vxlan_usecnt[i]) + free_idx = i; + } + + return free_idx; +} + +static void nfp_net_add_vxlan_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) +{ + struct nfp_net *nn = netdev_priv(netdev); + int idx; + + idx = nfp_net_find_vxlan_idx(nn, port); + if (idx == -ENOSPC) + return; + + if (!nn->vxlan_usecnt[idx]++) + nfp_net_set_vxlan_port(nn, idx, port); +} + +static void nfp_net_del_vxlan_port(struct net_device *netdev, + sa_family_t sa_family, __be16 port) +{ + struct nfp_net *nn = netdev_priv(netdev); + int idx; + + idx = nfp_net_find_vxlan_idx(nn, port); + if (!nn->vxlan_usecnt[idx] || idx == -ENOSPC) + return; + + if (!--nn->vxlan_usecnt[idx]) + nfp_net_set_vxlan_port(nn, idx, 0); +} + +static const struct net_device_ops nfp_net_netdev_ops = { + .ndo_open = nfp_net_netdev_open, + .ndo_stop = nfp_net_netdev_close, + .ndo_start_xmit = nfp_net_tx, + .ndo_get_stats64 = nfp_net_stat64, + .ndo_tx_timeout = nfp_net_tx_timeout, + .ndo_set_rx_mode = nfp_net_set_rx_mode, + .ndo_change_mtu = nfp_net_change_mtu, + .ndo_set_mac_address = eth_mac_addr, + .ndo_set_features = nfp_net_set_features, + .ndo_features_check = nfp_net_features_check, + .ndo_add_vxlan_port = nfp_net_add_vxlan_port, + .ndo_del_vxlan_port = nfp_net_del_vxlan_port, +}; + +/** + * nfp_net_info() - Print general info about the NIC + * @nn: NFP Net device to reconfigure + */ +void nfp_net_info(struct nfp_net *nn) +{ + nn_info(nn, "Netronome %s %sNetdev: TxQs=%d/%d RxQs=%d/%d\n", + nn->is_nfp3200 ? "NFP-32xx" : "NFP-6xxx", + nn->is_vf ? "VF " : "", + nn->num_tx_rings, nn->max_tx_rings, + nn->num_rx_rings, nn->max_rx_rings); + nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n", + nn->fw_ver.resv, nn->fw_ver.class, + nn->fw_ver.major, nn->fw_ver.minor, + nn->max_mtu); + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn->cap, + nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", + nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", + nn->cap & NFP_NET_CFG_CTRL_L2MC ? "L2MCFILT " : "", + nn->cap & NFP_NET_CFG_CTRL_RXCSUM ? "RXCSUM " : "", + nn->cap & NFP_NET_CFG_CTRL_TXCSUM ? "TXCSUM " : "", + nn->cap & NFP_NET_CFG_CTRL_RXVLAN ? "RXVLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_TXVLAN ? "TXVLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_SCATTER ? "SCATTER " : "", + nn->cap & NFP_NET_CFG_CTRL_GATHER ? "GATHER " : "", + nn->cap & NFP_NET_CFG_CTRL_LSO ? "TSO " : "", + nn->cap & NFP_NET_CFG_CTRL_RSS ? "RSS " : "", + nn->cap & NFP_NET_CFG_CTRL_L2SWITCH ? "L2SWITCH " : "", + nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", + nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", + nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : ""); +} + +/** + * nfp_net_netdev_alloc() - Allocate netdev and related structure + * @pdev: PCI device + * @max_tx_rings: Maximum number of TX rings supported by device + * @max_rx_rings: Maximum number of RX rings supported by device + * + * This function allocates a netdev device and fills in the initial + * part of the @struct nfp_net structure. + * + * Return: NFP Net device structure, or ERR_PTR on error. + */ +struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, + int max_tx_rings, int max_rx_rings) +{ + struct net_device *netdev; + struct nfp_net *nn; + int nqs; + + netdev = alloc_etherdev_mqs(sizeof(struct nfp_net), + max_tx_rings, max_rx_rings); + if (!netdev) + return ERR_PTR(-ENOMEM); + + SET_NETDEV_DEV(netdev, &pdev->dev); + nn = netdev_priv(netdev); + + nn->netdev = netdev; + nn->pdev = pdev; + + nn->max_tx_rings = max_tx_rings; + nn->max_rx_rings = max_rx_rings; + + nqs = netif_get_num_default_rss_queues(); + nn->num_tx_rings = min_t(int, nqs, max_tx_rings); + nn->num_rx_rings = min_t(int, nqs, max_rx_rings); + + nn->txd_cnt = NFP_NET_TX_DESCS_DEFAULT; + nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; + + spin_lock_init(&nn->reconfig_lock); + spin_lock_init(&nn->link_status_lock); + + return nn; +} + +/** + * nfp_net_netdev_free() - Undo what @nfp_net_netdev_alloc() did + * @nn: NFP Net device to reconfigure + */ +void nfp_net_netdev_free(struct nfp_net *nn) +{ + free_netdev(nn->netdev); +} + +/** + * nfp_net_rss_init() - Set the initial RSS parameters + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_rss_init(struct nfp_net *nn) +{ + int i; + + netdev_rss_key_fill(nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ); + + for (i = 0; i < sizeof(nn->rss_itbl); i++) + nn->rss_itbl[i] = + ethtool_rxfh_indir_default(i, nn->num_rx_rings); + + /* Enable IPv4/IPv6 TCP by default */ + nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP | + NFP_NET_CFG_RSS_IPV6_TCP | + NFP_NET_CFG_RSS_TOEPLITZ | + NFP_NET_CFG_RSS_MASK; +} + +/** + * nfp_net_irqmod_init() - Set the initial IRQ moderation parameters + * @nn: NFP Net device to reconfigure + */ +static void nfp_net_irqmod_init(struct nfp_net *nn) +{ + nn->rx_coalesce_usecs = 50; + nn->rx_coalesce_max_frames = 64; + nn->tx_coalesce_usecs = 50; + nn->tx_coalesce_max_frames = 64; +} + +/** + * nfp_net_netdev_init() - Initialise/finalise the netdev structure + * @netdev: netdev structure + * + * Return: 0 on success or negative errno on error. + */ +int nfp_net_netdev_init(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int err; + + /* Get some of the read-only fields from the BAR */ + nn->cap = nn_readl(nn, NFP_NET_CFG_CAP); + nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU); + + nfp_net_write_mac_addr(nn, nn->netdev->dev_addr); + + /* Set default MTU and Freelist buffer size */ + if (nn->max_mtu < NFP_NET_DEFAULT_MTU) + netdev->mtu = nn->max_mtu; + else + netdev->mtu = NFP_NET_DEFAULT_MTU; + nn->fl_bufsz = NFP_NET_DEFAULT_RX_BUFSZ; + + /* Advertise/enable offloads based on capabilities + * + * Note: netdev->features show the currently enabled features + * and netdev->hw_features advertises which features are + * supported. By default we enable most features. + */ + netdev->hw_features = NETIF_F_HIGHDMA; + if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM) { + netdev->hw_features |= NETIF_F_RXCSUM; + nn->ctrl |= NFP_NET_CFG_CTRL_RXCSUM; + } + if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) { + netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + nn->ctrl |= NFP_NET_CFG_CTRL_TXCSUM; + } + if (nn->cap & NFP_NET_CFG_CTRL_GATHER) { + netdev->hw_features |= NETIF_F_SG; + nn->ctrl |= NFP_NET_CFG_CTRL_GATHER; + } + if ((nn->cap & NFP_NET_CFG_CTRL_LSO) && nn->fw_ver.major > 2) { + netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + nn->ctrl |= NFP_NET_CFG_CTRL_LSO; + } + if (nn->cap & NFP_NET_CFG_CTRL_RSS) { + netdev->hw_features |= NETIF_F_RXHASH; + nfp_net_rss_init(nn); + nn->ctrl |= NFP_NET_CFG_CTRL_RSS; + } + if (nn->cap & NFP_NET_CFG_CTRL_VXLAN && + nn->cap & NFP_NET_CFG_CTRL_NVGRE) { + if (nn->cap & NFP_NET_CFG_CTRL_LSO) + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL; + nn->ctrl |= NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE; + + netdev->hw_enc_features = netdev->hw_features; + } + + netdev->vlan_features = netdev->hw_features; + + if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN) { + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; + nn->ctrl |= NFP_NET_CFG_CTRL_RXVLAN; + } + if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN) { + netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; + nn->ctrl |= NFP_NET_CFG_CTRL_TXVLAN; + } + + netdev->features = netdev->hw_features; + + /* Advertise but disable TSO by default. */ + netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); + + /* Allow L2 Broadcast and Multicast through by default, if supported */ + if (nn->cap & NFP_NET_CFG_CTRL_L2BC) + nn->ctrl |= NFP_NET_CFG_CTRL_L2BC; + if (nn->cap & NFP_NET_CFG_CTRL_L2MC) + nn->ctrl |= NFP_NET_CFG_CTRL_L2MC; + + /* Allow IRQ moderation, if supported */ + if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) { + nfp_net_irqmod_init(nn); + nn->ctrl |= NFP_NET_CFG_CTRL_IRQMOD; + } + + /* On NFP-3200 enable MSI-X auto-masking, if supported and the + * interrupts are not shared. + */ + if (nn->is_nfp3200 && nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO) + nn->ctrl |= NFP_NET_CFG_CTRL_MSIXAUTO; + + /* On NFP4000/NFP6000, determine RX packet/metadata boundary offset */ + if (nn->fw_ver.major >= 2) + nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET); + else + nn->rx_offset = NFP_NET_RX_OFFSET; + + /* Stash the re-configuration queue away. First odd queue in TX Bar */ + nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; + + /* Make sure the FW knows the netdev is supposed to be disabled here */ + nn_writel(nn, NFP_NET_CFG_CTRL, 0); + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0); + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING | + NFP_NET_CFG_UPDATE_GEN); + if (err) + return err; + + /* Finalise the netdev setup */ + ether_setup(netdev); + netdev->netdev_ops = &nfp_net_netdev_ops; + netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000); + + nfp_net_set_ethtool_ops(netdev); + nfp_net_irqs_assign(netdev); + + return register_netdev(netdev); +} + +/** + * nfp_net_netdev_clean() - Undo what nfp_net_netdev_init() did. + * @netdev: netdev structure + */ +void nfp_net_netdev_clean(struct net_device *netdev) +{ + unregister_netdev(netdev); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h new file mode 100644 index 000000000000..8692003aeed8 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_ctrl.h + * Netronome network device driver: Control BAR layout + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * Brad Petrus <brad.petrus@netronome.com> + */ + +#ifndef _NFP_NET_CTRL_H_ +#define _NFP_NET_CTRL_H_ + +/* IMPORTANT: This header file is shared with the FW, + * no OS specific constructs, please! + */ + +/** + * Configuration BAR size. + * + * The configuration BAR is 8K in size, but on the NFP6000, due to + * THB-350, 32k needs to be reserved. + */ +#define NFP_NET_CFG_BAR_SZ (32 * 1024) + +/** + * Offset in Freelist buffer where packet starts on RX + */ +#define NFP_NET_RX_OFFSET 32 + +/** + * Maximum header size supported for LSO frames + */ +#define NFP_NET_LSO_MAX_HDR_SZ 255 + +/** + * Hash type pre-pended when a RSS hash was computed + */ +#define NFP_NET_RSS_NONE 0 +#define NFP_NET_RSS_IPV4 1 +#define NFP_NET_RSS_IPV6 2 +#define NFP_NET_RSS_IPV6_EX 3 +#define NFP_NET_RSS_IPV4_TCP 4 +#define NFP_NET_RSS_IPV6_TCP 5 +#define NFP_NET_RSS_IPV6_EX_TCP 6 +#define NFP_NET_RSS_IPV4_UDP 7 +#define NFP_NET_RSS_IPV6_UDP 8 +#define NFP_NET_RSS_IPV6_EX_UDP 9 + +/** + * @NFP_NET_TXR_MAX: Maximum number of TX rings + * @NFP_NET_TXR_MASK: Mask for TX rings + * @NFP_NET_RXR_MAX: Maximum number of RX rings + * @NFP_NET_RXR_MASK: Mask for RX rings + */ +#define NFP_NET_TXR_MAX 64 +#define NFP_NET_TXR_MASK (NFP_NET_TXR_MAX - 1) +#define NFP_NET_RXR_MAX 64 +#define NFP_NET_RXR_MASK (NFP_NET_RXR_MAX - 1) + +/** + * Read/Write config words (0x0000 - 0x002c) + * @NFP_NET_CFG_CTRL: Global control + * @NFP_NET_CFG_UPDATE: Indicate which fields are updated + * @NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings + * @NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings + * @NFP_NET_CFG_MTU: Set MTU size + * @NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) + * @NFP_NET_CFG_EXN: MSI-X table entry for exceptions + * @NFP_NET_CFG_LSC: MSI-X table entry for link state changes + * @NFP_NET_CFG_MACADDR: MAC address + * + * TODO: + * - define Error details in UPDATE + */ +#define NFP_NET_CFG_CTRL 0x0000 +#define NFP_NET_CFG_CTRL_ENABLE (0x1 << 0) /* Global enable */ +#define NFP_NET_CFG_CTRL_PROMISC (0x1 << 1) /* Enable Promisc mode */ +#define NFP_NET_CFG_CTRL_L2BC (0x1 << 2) /* Allow L2 Broadcast */ +#define NFP_NET_CFG_CTRL_L2MC (0x1 << 3) /* Allow L2 Multicast */ +#define NFP_NET_CFG_CTRL_RXCSUM (0x1 << 4) /* Enable RX Checksum */ +#define NFP_NET_CFG_CTRL_TXCSUM (0x1 << 5) /* Enable TX Checksum */ +#define NFP_NET_CFG_CTRL_RXVLAN (0x1 << 6) /* Enable VLAN strip */ +#define NFP_NET_CFG_CTRL_TXVLAN (0x1 << 7) /* Enable VLAN insert */ +#define NFP_NET_CFG_CTRL_SCATTER (0x1 << 8) /* Scatter DMA */ +#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */ +#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO */ +#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ +#define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS */ +#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ +#define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */ +#define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */ +#define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/ +#define NFP_NET_CFG_CTRL_L2SWITCH (0x1 << 22) /* L2 Switch */ +#define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */ +#define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ +#define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ +#define NFP_NET_CFG_UPDATE 0x0004 +#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ +#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ +#define NFP_NET_CFG_UPDATE_RSS (0x1 << 2) /* RSS config change */ +#define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */ +#define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */ +#define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */ +#define NFP_NET_CFG_UPDATE_L2SWITCH (0x1 << 6) /* Switch changes */ +#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ +#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ +#define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ +#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ +#define NFP_NET_CFG_TXRS_ENABLE 0x0008 +#define NFP_NET_CFG_RXRS_ENABLE 0x0010 +#define NFP_NET_CFG_MTU 0x0018 +#define NFP_NET_CFG_FLBUFSZ 0x001c +#define NFP_NET_CFG_EXN 0x001f +#define NFP_NET_CFG_LSC 0x0020 +#define NFP_NET_CFG_MACADDR 0x0024 + +/** + * Read-only words (0x0030 - 0x0050): + * @NFP_NET_CFG_VERSION: Firmware version number + * @NFP_NET_CFG_STS: Status + * @NFP_NET_CFG_CAP: Capabilities (same bits as @NFP_NET_CFG_CTRL) + * @NFP_NET_MAX_TXRINGS: Maximum number of TX rings + * @NFP_NET_MAX_RXRINGS: Maximum number of RX rings + * @NFP_NET_MAX_MTU: Maximum support MTU + * @NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only) + * @NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only) + * + * TODO: + * - define more STS bits + */ +#define NFP_NET_CFG_VERSION 0x0030 +#define NFP_NET_CFG_VERSION_RESERVED_MASK (0xff << 24) +#define NFP_NET_CFG_VERSION_CLASS_MASK (0xff << 16) +#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16) +#define NFP_NET_CFG_VERSION_CLASS_GENERIC 0 +#define NFP_NET_CFG_VERSION_MAJOR_MASK (0xff << 8) +#define NFP_NET_CFG_VERSION_MAJOR(x) (((x) & 0xff) << 8) +#define NFP_NET_CFG_VERSION_MINOR_MASK (0xff << 0) +#define NFP_NET_CFG_VERSION_MINOR(x) (((x) & 0xff) << 0) +#define NFP_NET_CFG_STS 0x0034 +#define NFP_NET_CFG_STS_LINK (0x1 << 0) /* Link up or down */ +#define NFP_NET_CFG_CAP 0x0038 +#define NFP_NET_CFG_MAX_TXRINGS 0x003c +#define NFP_NET_CFG_MAX_RXRINGS 0x0040 +#define NFP_NET_CFG_MAX_MTU 0x0044 +/* Next two words are being used by VFs for solving THB350 issue */ +#define NFP_NET_CFG_START_TXQ 0x0048 +#define NFP_NET_CFG_START_RXQ 0x004c + +/** + * NFP-3200 workaround (0x0050 - 0x0058) + * @NFP_NET_CFG_SPARE_ADDR: DMA address for ME code to use (e.g. YDS-155 fix) + */ +#define NFP_NET_CFG_SPARE_ADDR 0x0050 +/** + * NFP6000/NFP4000 - Prepend configuration + */ +#define NFP_NET_CFG_RX_OFFSET 0x0050 +#define NFP_NET_CFG_RX_OFFSET_DYNAMIC 0 /* Prepend mode */ + +/** + * NFP6000/NFP4000 - VXLAN/UDP encap configuration + * @NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports + * @NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes + */ +#define NFP_NET_CFG_VXLAN_PORT 0x0060 +#define NFP_NET_CFG_VXLAN_SZ 0x0008 + +/** + * 64B reserved for future use (0x0080 - 0x00c0) + */ +#define NFP_NET_CFG_RESERVED 0x0080 +#define NFP_NET_CFG_RESERVED_SZ 0x0040 + +/** + * RSS configuration (0x0100 - 0x01ac): + * Used only when NFP_NET_CFG_CTRL_RSS is enabled + * @NFP_NET_CFG_RSS_CFG: RSS configuration word + * @NFP_NET_CFG_RSS_KEY: RSS "secret" key + * @NFP_NET_CFG_RSS_ITBL: RSS indirection table + */ +#define NFP_NET_CFG_RSS_BASE 0x0100 +#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE +#define NFP_NET_CFG_RSS_MASK (0x7f) +#define NFP_NET_CFG_RSS_MASK_of(_x) ((_x) & 0x7f) +#define NFP_NET_CFG_RSS_IPV4 (1 << 8) /* RSS for IPv4 */ +#define NFP_NET_CFG_RSS_IPV6 (1 << 9) /* RSS for IPv6 */ +#define NFP_NET_CFG_RSS_IPV4_TCP (1 << 10) /* RSS for IPv4/TCP */ +#define NFP_NET_CFG_RSS_IPV4_UDP (1 << 11) /* RSS for IPv4/UDP */ +#define NFP_NET_CFG_RSS_IPV6_TCP (1 << 12) /* RSS for IPv6/TCP */ +#define NFP_NET_CFG_RSS_IPV6_UDP (1 << 13) /* RSS for IPv6/UDP */ +#define NFP_NET_CFG_RSS_TOEPLITZ (1 << 24) /* Use Toeplitz hash */ +#define NFP_NET_CFG_RSS_KEY (NFP_NET_CFG_RSS_BASE + 0x4) +#define NFP_NET_CFG_RSS_KEY_SZ 0x28 +#define NFP_NET_CFG_RSS_ITBL (NFP_NET_CFG_RSS_BASE + 0x4 + \ + NFP_NET_CFG_RSS_KEY_SZ) +#define NFP_NET_CFG_RSS_ITBL_SZ 0x80 + +/** + * TX ring configuration (0x200 - 0x800) + * @NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration + * @NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) + * @NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) + * @NFP_NET_CFG_TXR_SZ: Per TX ring ring size (1B entries) + * @NFP_NET_CFG_TXR_VEC: Per TX ring MSI-X table entry (1B entries) + * @NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) + * @NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet + */ +#define NFP_NET_CFG_TXR_BASE 0x0200 +#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8)) +#define NFP_NET_CFG_TXR_WB_ADDR(_x) (NFP_NET_CFG_TXR_BASE + 0x200 + \ + ((_x) * 0x8)) +#define NFP_NET_CFG_TXR_SZ(_x) (NFP_NET_CFG_TXR_BASE + 0x400 + (_x)) +#define NFP_NET_CFG_TXR_VEC(_x) (NFP_NET_CFG_TXR_BASE + 0x440 + (_x)) +#define NFP_NET_CFG_TXR_PRIO(_x) (NFP_NET_CFG_TXR_BASE + 0x480 + (_x)) +#define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \ + ((_x) * 0x4)) + +/** + * RX ring configuration (0x0800 - 0x0c00) + * @NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration + * @NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) + * @NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) + * @NFP_NET_CFG_RXR_VEC: Per RX ring MSI-X table entry (1B entries) + * @NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) + * @NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) + */ +#define NFP_NET_CFG_RXR_BASE 0x0800 +#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8)) +#define NFP_NET_CFG_RXR_SZ(_x) (NFP_NET_CFG_RXR_BASE + 0x200 + (_x)) +#define NFP_NET_CFG_RXR_VEC(_x) (NFP_NET_CFG_RXR_BASE + 0x240 + (_x)) +#define NFP_NET_CFG_RXR_PRIO(_x) (NFP_NET_CFG_RXR_BASE + 0x280 + (_x)) +#define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \ + ((_x) * 0x4)) + +/** + * Interrupt Control/Cause registers (0x0c00 - 0x0d00) + * These registers are only used when MSI-X auto-masking is not + * enabled (@NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index + * by MSI-X entry and are 1B in size. If an entry is zero, the + * corresponding entry is enabled. If the FW generates an interrupt, + * it writes a cause into the corresponding field. This also masks + * the MSI-X entry and the host driver must clear the register to + * re-enable the interrupt. + */ +#define NFP_NET_CFG_ICR_BASE 0x0c00 +#define NFP_NET_CFG_ICR(_x) (NFP_NET_CFG_ICR_BASE + (_x)) +#define NFP_NET_CFG_ICR_UNMASKED 0x0 +#define NFP_NET_CFG_ICR_RXTX 0x1 +#define NFP_NET_CFG_ICR_LSC 0x2 + +/** + * General device stats (0x0d00 - 0x0d90) + * all counters are 64bit. + */ +#define NFP_NET_CFG_STATS_BASE 0x0d00 +#define NFP_NET_CFG_STATS_RX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x00) +#define NFP_NET_CFG_STATS_RX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x08) +#define NFP_NET_CFG_STATS_RX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x10) +#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18) +#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20) +#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28) +#define NFP_NET_CFG_STATS_RX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x30) +#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38) +#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40) + +#define NFP_NET_CFG_STATS_TX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x48) +#define NFP_NET_CFG_STATS_TX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x50) +#define NFP_NET_CFG_STATS_TX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x58) +#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60) +#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68) +#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70) +#define NFP_NET_CFG_STATS_TX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x78) +#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) +#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) + +/** + * Per ring stats (0x1000 - 0x1800) + * options, 64bit per entry + * @NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) + * @NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) + */ +#define NFP_NET_CFG_TXR_STATS_BASE 0x1000 +#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \ + ((_x) * 0x10)) +#define NFP_NET_CFG_RXR_STATS_BASE 0x1400 +#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \ + ((_x) * 0x10)) + +#endif /* _NFP_NET_CTRL_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c new file mode 100644 index 000000000000..4c97c713121c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/rtnetlink.h> + +#include "nfp_net.h" + +static struct dentry *nfp_dir; + +static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data) +{ + struct nfp_net_rx_ring *rx_ring = file->private; + int fl_rd_p, fl_wr_p, rx_rd_p, rx_wr_p, rxd_cnt; + struct nfp_net_rx_desc *rxd; + struct sk_buff *skb; + struct nfp_net *nn; + int i; + + rtnl_lock(); + + if (!rx_ring->r_vec || !rx_ring->r_vec->nfp_net) + goto out; + nn = rx_ring->r_vec->nfp_net; + if (!netif_running(nn->netdev)) + goto out; + + rxd_cnt = rx_ring->cnt; + + fl_rd_p = nfp_qcp_rd_ptr_read(rx_ring->qcp_fl); + fl_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_fl); + rx_rd_p = nfp_qcp_rd_ptr_read(rx_ring->qcp_rx); + rx_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx); + + seq_printf(file, "RX[%02d]: H_RD=%d H_WR=%d FL_RD=%d FL_WR=%d RX_RD=%d RX_WR=%d\n", + rx_ring->idx, rx_ring->rd_p, rx_ring->wr_p, + fl_rd_p, fl_wr_p, rx_rd_p, rx_wr_p); + + for (i = 0; i < rxd_cnt; i++) { + rxd = &rx_ring->rxds[i]; + seq_printf(file, "%04d: 0x%08x 0x%08x", i, + rxd->vals[0], rxd->vals[1]); + + skb = READ_ONCE(rx_ring->rxbufs[i].skb); + if (skb) + seq_printf(file, " skb->head=%p skb->data=%p", + skb->head, skb->data); + + if (rx_ring->rxbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &rx_ring->rxbufs[i].dma_addr); + + if (i == rx_ring->rd_p % rxd_cnt) + seq_puts(file, " H_RD "); + if (i == rx_ring->wr_p % rxd_cnt) + seq_puts(file, " H_WR "); + if (i == fl_rd_p % rxd_cnt) + seq_puts(file, " FL_RD"); + if (i == fl_wr_p % rxd_cnt) + seq_puts(file, " FL_WR"); + if (i == rx_rd_p % rxd_cnt) + seq_puts(file, " RX_RD"); + if (i == rx_wr_p % rxd_cnt) + seq_puts(file, " RX_WR"); + + seq_putc(file, '\n'); + } +out: + rtnl_unlock(); + return 0; +} + +static int nfp_net_debugfs_rx_q_open(struct inode *inode, struct file *f) +{ + return single_open(f, nfp_net_debugfs_rx_q_read, inode->i_private); +} + +static const struct file_operations nfp_rx_q_fops = { + .owner = THIS_MODULE, + .open = nfp_net_debugfs_rx_q_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek +}; + +static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data) +{ + struct nfp_net_tx_ring *tx_ring = file->private; + struct nfp_net_tx_desc *txd; + int d_rd_p, d_wr_p, txd_cnt; + struct sk_buff *skb; + struct nfp_net *nn; + int i; + + rtnl_lock(); + + if (!tx_ring->r_vec || !tx_ring->r_vec->nfp_net) + goto out; + nn = tx_ring->r_vec->nfp_net; + if (!netif_running(nn->netdev)) + goto out; + + txd_cnt = tx_ring->cnt; + + d_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + d_wr_p = nfp_qcp_wr_ptr_read(tx_ring->qcp_q); + + seq_printf(file, "TX[%02d]: H_RD=%d H_WR=%d D_RD=%d D_WR=%d\n", + tx_ring->idx, tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p); + + for (i = 0; i < txd_cnt; i++) { + txd = &tx_ring->txds[i]; + seq_printf(file, "%04d: 0x%08x 0x%08x 0x%08x 0x%08x", i, + txd->vals[0], txd->vals[1], + txd->vals[2], txd->vals[3]); + + skb = READ_ONCE(tx_ring->txbufs[i].skb); + if (skb) + seq_printf(file, " skb->head=%p skb->data=%p", + skb->head, skb->data); + if (tx_ring->txbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &tx_ring->txbufs[i].dma_addr); + + if (i == tx_ring->rd_p % txd_cnt) + seq_puts(file, " H_RD"); + if (i == tx_ring->wr_p % txd_cnt) + seq_puts(file, " H_WR"); + if (i == d_rd_p % txd_cnt) + seq_puts(file, " D_RD"); + if (i == d_wr_p % txd_cnt) + seq_puts(file, " D_WR"); + + seq_putc(file, '\n'); + } +out: + rtnl_unlock(); + return 0; +} + +static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f) +{ + return single_open(f, nfp_net_debugfs_tx_q_read, inode->i_private); +} + +static const struct file_operations nfp_tx_q_fops = { + .owner = THIS_MODULE, + .open = nfp_net_debugfs_tx_q_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek +}; + +void nfp_net_debugfs_adapter_add(struct nfp_net *nn) +{ + static struct dentry *queues, *tx, *rx; + char int_name[16]; + int i; + + if (IS_ERR_OR_NULL(nfp_dir)) + return; + + nn->debugfs_dir = debugfs_create_dir(pci_name(nn->pdev), nfp_dir); + if (IS_ERR_OR_NULL(nn->debugfs_dir)) + return; + + /* Create queue debugging sub-tree */ + queues = debugfs_create_dir("queue", nn->debugfs_dir); + if (IS_ERR_OR_NULL(nn->debugfs_dir)) + return; + + rx = debugfs_create_dir("rx", queues); + tx = debugfs_create_dir("tx", queues); + if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx)) + return; + + for (i = 0; i < nn->num_rx_rings; i++) { + sprintf(int_name, "%d", i); + debugfs_create_file(int_name, S_IRUSR, rx, + &nn->rx_rings[i], &nfp_rx_q_fops); + } + + for (i = 0; i < nn->num_tx_rings; i++) { + sprintf(int_name, "%d", i); + debugfs_create_file(int_name, S_IRUSR, tx, + &nn->tx_rings[i], &nfp_tx_q_fops); + } +} + +void nfp_net_debugfs_adapter_del(struct nfp_net *nn) +{ + debugfs_remove_recursive(nn->debugfs_dir); + nn->debugfs_dir = NULL; +} + +void nfp_net_debugfs_create(void) +{ + nfp_dir = debugfs_create_dir("nfp_net", NULL); +} + +void nfp_net_debugfs_destroy(void) +{ + debugfs_remove_recursive(nfp_dir); + nfp_dir = NULL; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c new file mode 100644 index 000000000000..9a4084a68db5 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -0,0 +1,640 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_ethtool.c + * Netronome network device driver: ethtool support + * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> + * Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + * Brad Petrus <brad.petrus@netronome.com> + */ + +#include <linux/version.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/ethtool.h> + +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +/* Support for stats. Returns netdev, driver, and device stats */ +enum { NETDEV_ET_STATS, NFP_NET_DRV_ET_STATS, NFP_NET_DEV_ET_STATS }; +struct _nfp_net_et_stats { + char name[ETH_GSTRING_LEN]; + int type; + int sz; + int off; +}; + +#define NN_ET_NETDEV_STAT(m) NETDEV_ET_STATS, \ + FIELD_SIZEOF(struct net_device_stats, m), \ + offsetof(struct net_device_stats, m) +/* For stats in the control BAR (other than Q stats) */ +#define NN_ET_DEV_STAT(m) NFP_NET_DEV_ET_STATS, \ + sizeof(u64), \ + (m) +static const struct _nfp_net_et_stats nfp_net_et_stats[] = { + /* netdev stats */ + {"rx_packets", NN_ET_NETDEV_STAT(rx_packets)}, + {"tx_packets", NN_ET_NETDEV_STAT(tx_packets)}, + {"rx_bytes", NN_ET_NETDEV_STAT(rx_bytes)}, + {"tx_bytes", NN_ET_NETDEV_STAT(tx_bytes)}, + {"rx_errors", NN_ET_NETDEV_STAT(rx_errors)}, + {"tx_errors", NN_ET_NETDEV_STAT(tx_errors)}, + {"rx_dropped", NN_ET_NETDEV_STAT(rx_dropped)}, + {"tx_dropped", NN_ET_NETDEV_STAT(tx_dropped)}, + {"multicast", NN_ET_NETDEV_STAT(multicast)}, + {"collisions", NN_ET_NETDEV_STAT(collisions)}, + {"rx_over_errors", NN_ET_NETDEV_STAT(rx_over_errors)}, + {"rx_crc_errors", NN_ET_NETDEV_STAT(rx_crc_errors)}, + {"rx_frame_errors", NN_ET_NETDEV_STAT(rx_frame_errors)}, + {"rx_fifo_errors", NN_ET_NETDEV_STAT(rx_fifo_errors)}, + {"rx_missed_errors", NN_ET_NETDEV_STAT(rx_missed_errors)}, + {"tx_aborted_errors", NN_ET_NETDEV_STAT(tx_aborted_errors)}, + {"tx_carrier_errors", NN_ET_NETDEV_STAT(tx_carrier_errors)}, + {"tx_fifo_errors", NN_ET_NETDEV_STAT(tx_fifo_errors)}, + /* Stats from the device */ + {"dev_rx_discards", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_DISCARDS)}, + {"dev_rx_errors", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_ERRORS)}, + {"dev_rx_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_OCTETS)}, + {"dev_rx_uc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_UC_OCTETS)}, + {"dev_rx_mc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_MC_OCTETS)}, + {"dev_rx_bc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_BC_OCTETS)}, + {"dev_rx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_FRAMES)}, + {"dev_rx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_MC_FRAMES)}, + {"dev_rx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_RX_BC_FRAMES)}, + + {"dev_tx_discards", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_DISCARDS)}, + {"dev_tx_errors", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_ERRORS)}, + {"dev_tx_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_OCTETS)}, + {"dev_tx_uc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_UC_OCTETS)}, + {"dev_tx_mc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_OCTETS)}, + {"dev_tx_bc_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_OCTETS)}, + {"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)}, + {"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)}, + {"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)}, +}; + +#define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) +#define NN_ET_RVEC_STATS_LEN (nn->num_r_vecs * 3) +#define NN_ET_RVEC_GATHER_STATS 7 +#define NN_ET_QUEUE_STATS_LEN ((nn->num_tx_rings + nn->num_rx_rings) * 2) +#define NN_ET_STATS_LEN (NN_ET_GLOBAL_STATS_LEN + NN_ET_RVEC_GATHER_STATS + \ + NN_ET_RVEC_STATS_LEN + NN_ET_QUEUE_STATS_LEN) + +static void nfp_net_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct nfp_net *nn = netdev_priv(netdev); + + strlcpy(drvinfo->driver, nfp_net_driver_name, sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, nfp_net_driver_version, + sizeof(drvinfo->version)); + + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d.%d", + nn->fw_ver.resv, nn->fw_ver.class, + nn->fw_ver.major, nn->fw_ver.minor); + strlcpy(drvinfo->bus_info, pci_name(nn->pdev), + sizeof(drvinfo->bus_info)); + + drvinfo->n_stats = NN_ET_STATS_LEN; + drvinfo->regdump_len = NFP_NET_CFG_BAR_SZ; +} + +static void nfp_net_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct nfp_net *nn = netdev_priv(netdev); + + ring->rx_max_pending = NFP_NET_MAX_RX_DESCS; + ring->tx_max_pending = NFP_NET_MAX_TX_DESCS; + ring->rx_pending = nn->rxd_cnt; + ring->tx_pending = nn->txd_cnt; +} + +static int nfp_net_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 rxd_cnt, txd_cnt; + + if (netif_running(netdev)) { + /* Some NIC drivers allow reconfiguration on the fly, + * some down the interface, change and then up it + * again. For now we don't allow changes when the + * device is up. + */ + nn_warn(nn, "Can't change rings while device is up\n"); + return -EBUSY; + } + + /* We don't have separate queues/rings for small/large frames. */ + if (ring->rx_mini_pending || ring->rx_jumbo_pending) + return -EINVAL; + + /* Round up to supported values */ + rxd_cnt = roundup_pow_of_two(ring->rx_pending); + rxd_cnt = max_t(u32, rxd_cnt, NFP_NET_MIN_RX_DESCS); + rxd_cnt = min_t(u32, rxd_cnt, NFP_NET_MAX_RX_DESCS); + + txd_cnt = roundup_pow_of_two(ring->tx_pending); + txd_cnt = max_t(u32, txd_cnt, NFP_NET_MIN_TX_DESCS); + txd_cnt = min_t(u32, txd_cnt, NFP_NET_MAX_TX_DESCS); + + if (nn->rxd_cnt != rxd_cnt || nn->txd_cnt != txd_cnt) + nn_dbg(nn, "Change ring size: RxQ %u->%u, TxQ %u->%u\n", + nn->rxd_cnt, rxd_cnt, nn->txd_cnt, txd_cnt); + + nn->rxd_cnt = rxd_cnt; + nn->txd_cnt = txd_cnt; + + return 0; +} + +static void nfp_net_get_strings(struct net_device *netdev, + u32 stringset, u8 *data) +{ + struct nfp_net *nn = netdev_priv(netdev); + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) { + memcpy(p, nfp_net_et_stats[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < nn->num_r_vecs; i++) { + sprintf(p, "rvec_%u_rx_pkts", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rvec_%u_tx_pkts", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rvec_%u_tx_busy", i); + p += ETH_GSTRING_LEN; + } + strncpy(p, "hw_rx_csum_ok", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "hw_rx_csum_inner_ok", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "hw_rx_csum_err", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "hw_tx_csum", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "hw_tx_inner_csum", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "tx_gather", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + strncpy(p, "tx_lso", ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + for (i = 0; i < nn->num_tx_rings; i++) { + sprintf(p, "txq_%u_pkts", i); + p += ETH_GSTRING_LEN; + sprintf(p, "txq_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < nn->num_rx_rings; i++) { + sprintf(p, "rxq_%u_pkts", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rxq_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + break; + } +} + +static void nfp_net_get_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + u64 gathered_stats[NN_ET_RVEC_GATHER_STATS] = {}; + struct nfp_net *nn = netdev_priv(netdev); + struct rtnl_link_stats64 *netdev_stats; + struct rtnl_link_stats64 temp = {}; + u64 tmp[NN_ET_RVEC_GATHER_STATS]; + u8 __iomem *io_p; + int i, j, k; + u8 *p; + + netdev_stats = dev_get_stats(netdev, &temp); + + for (i = 0; i < NN_ET_GLOBAL_STATS_LEN; i++) { + switch (nfp_net_et_stats[i].type) { + case NETDEV_ET_STATS: + p = (char *)netdev_stats + nfp_net_et_stats[i].off; + data[i] = nfp_net_et_stats[i].sz == sizeof(u64) ? + *(u64 *)p : *(u32 *)p; + break; + + case NFP_NET_DEV_ET_STATS: + io_p = nn->ctrl_bar + nfp_net_et_stats[i].off; + data[i] = readq(io_p); + break; + } + } + for (j = 0; j < nn->num_r_vecs; j++) { + unsigned int start; + + do { + start = u64_stats_fetch_begin(&nn->r_vecs[j].rx_sync); + data[i++] = nn->r_vecs[j].rx_pkts; + tmp[0] = nn->r_vecs[j].hw_csum_rx_ok; + tmp[1] = nn->r_vecs[j].hw_csum_rx_inner_ok; + tmp[2] = nn->r_vecs[j].hw_csum_rx_error; + } while (u64_stats_fetch_retry(&nn->r_vecs[j].rx_sync, start)); + + do { + start = u64_stats_fetch_begin(&nn->r_vecs[j].tx_sync); + data[i++] = nn->r_vecs[j].tx_pkts; + data[i++] = nn->r_vecs[j].tx_busy; + tmp[3] = nn->r_vecs[j].hw_csum_tx; + tmp[4] = nn->r_vecs[j].hw_csum_tx_inner; + tmp[5] = nn->r_vecs[j].tx_gather; + tmp[6] = nn->r_vecs[j].tx_lso; + } while (u64_stats_fetch_retry(&nn->r_vecs[j].tx_sync, start)); + + for (k = 0; k < NN_ET_RVEC_GATHER_STATS; k++) + gathered_stats[k] += tmp[k]; + } + for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++) + data[i++] = gathered_stats[j]; + for (j = 0; j < nn->num_tx_rings; j++) { + io_p = nn->ctrl_bar + NFP_NET_CFG_TXR_STATS(j); + data[i++] = readq(io_p); + io_p = nn->ctrl_bar + NFP_NET_CFG_TXR_STATS(j) + 8; + data[i++] = readq(io_p); + } + for (j = 0; j < nn->num_rx_rings; j++) { + io_p = nn->ctrl_bar + NFP_NET_CFG_RXR_STATS(j); + data[i++] = readq(io_p); + io_p = nn->ctrl_bar + NFP_NET_CFG_RXR_STATS(j) + 8; + data[i++] = readq(io_p); + } +} + +static int nfp_net_get_sset_count(struct net_device *netdev, int sset) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (sset) { + case ETH_SS_STATS: + return NN_ET_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + +/* RX network flow classification (RSS, filters, etc) + */ +static u32 ethtool_flow_to_nfp_flag(u32 flow_type) +{ + static const u32 xlate_ethtool_to_nfp[IPV6_FLOW + 1] = { + [TCP_V4_FLOW] = NFP_NET_CFG_RSS_IPV4_TCP, + [TCP_V6_FLOW] = NFP_NET_CFG_RSS_IPV6_TCP, + [UDP_V4_FLOW] = NFP_NET_CFG_RSS_IPV4_UDP, + [UDP_V6_FLOW] = NFP_NET_CFG_RSS_IPV6_UDP, + [IPV4_FLOW] = NFP_NET_CFG_RSS_IPV4, + [IPV6_FLOW] = NFP_NET_CFG_RSS_IPV6, + }; + + if (flow_type >= ARRAY_SIZE(xlate_ethtool_to_nfp)) + return 0; + + return xlate_ethtool_to_nfp[flow_type]; +} + +static int nfp_net_get_rss_hash_opts(struct nfp_net *nn, + struct ethtool_rxnfc *cmd) +{ + u32 nfp_rss_flag; + + cmd->data = 0; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS)) + return -EOPNOTSUPP; + + nfp_rss_flag = ethtool_flow_to_nfp_flag(cmd->flow_type); + if (!nfp_rss_flag) + return -EINVAL; + + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + if (nn->rss_cfg & nfp_rss_flag) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + + return 0; +} + +static int nfp_net_get_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *cmd, u32 *rule_locs) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = nn->num_rx_rings; + return 0; + case ETHTOOL_GRXFH: + return nfp_net_get_rss_hash_opts(nn, cmd); + default: + return -EOPNOTSUPP; + } +} + +static int nfp_net_set_rss_hash_opt(struct nfp_net *nn, + struct ethtool_rxnfc *nfc) +{ + u32 new_rss_cfg = nn->rss_cfg; + u32 nfp_rss_flag; + int err; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS)) + return -EOPNOTSUPP; + + /* RSS only supports IP SA/DA and L4 src/dst ports */ + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + /* We need at least the IP SA/DA fields for hashing */ + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + + nfp_rss_flag = ethtool_flow_to_nfp_flag(nfc->flow_type); + if (!nfp_rss_flag) + return -EINVAL; + + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + new_rss_cfg &= ~nfp_rss_flag; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + new_rss_cfg |= nfp_rss_flag; + break; + default: + return -EINVAL; + } + + new_rss_cfg |= NFP_NET_CFG_RSS_TOEPLITZ; + new_rss_cfg |= NFP_NET_CFG_RSS_MASK; + + if (new_rss_cfg == nn->rss_cfg) + return 0; + + writel(new_rss_cfg, nn->ctrl_bar + NFP_NET_CFG_RSS_CTRL); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RSS); + if (err) + return err; + + nn->rss_cfg = new_rss_cfg; + + nn_dbg(nn, "Changed RSS config to 0x%x\n", nn->rss_cfg); + return 0; +} + +static int nfp_net_set_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *cmd) +{ + struct nfp_net *nn = netdev_priv(netdev); + + switch (cmd->cmd) { + case ETHTOOL_SRXFH: + return nfp_net_set_rss_hash_opt(nn, cmd); + default: + return -EOPNOTSUPP; + } +} + +static u32 nfp_net_get_rxfh_indir_size(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS)) + return 0; + + return ARRAY_SIZE(nn->rss_itbl); +} + +static u32 nfp_net_get_rxfh_key_size(struct net_device *netdev) +{ + return NFP_NET_CFG_RSS_KEY_SZ; +} + +static int nfp_net_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS)) + return -EOPNOTSUPP; + + if (indir) + for (i = 0; i < ARRAY_SIZE(nn->rss_itbl); i++) + indir[i] = nn->rss_itbl[i]; + if (key) + memcpy(key, nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ); + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + return 0; +} + +static int nfp_net_set_rxfh(struct net_device *netdev, + const u32 *indir, const u8 *key, + const u8 hfunc) +{ + struct nfp_net *nn = netdev_priv(netdev); + int i; + + if (!(nn->cap & NFP_NET_CFG_CTRL_RSS) || + !(hfunc == ETH_RSS_HASH_NO_CHANGE || hfunc == ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + + if (!key && !indir) + return 0; + + if (key) { + memcpy(nn->rss_key, key, NFP_NET_CFG_RSS_KEY_SZ); + nfp_net_rss_write_key(nn); + } + if (indir) { + for (i = 0; i < ARRAY_SIZE(nn->rss_itbl); i++) + nn->rss_itbl[i] = indir[i]; + + nfp_net_rss_write_itbl(nn); + } + + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RSS); +} + +/* Dump BAR registers + */ +static int nfp_net_get_regs_len(struct net_device *netdev) +{ + return NFP_NET_CFG_BAR_SZ; +} + +static void nfp_net_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *p) +{ + struct nfp_net *nn = netdev_priv(netdev); + u32 *regs_buf = p; + int i; + + regs->version = nn_readl(nn, NFP_NET_CFG_VERSION); + + for (i = 0; i < NFP_NET_CFG_BAR_SZ / sizeof(u32); i++) + regs_buf[i] = readl(nn->ctrl_bar + (i * sizeof(u32))); +} + +static int nfp_net_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD)) + return -EINVAL; + + ec->rx_coalesce_usecs = nn->rx_coalesce_usecs; + ec->rx_max_coalesced_frames = nn->rx_coalesce_max_frames; + ec->tx_coalesce_usecs = nn->tx_coalesce_usecs; + ec->tx_max_coalesced_frames = nn->tx_coalesce_max_frames; + + return 0; +} + +static int nfp_net_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int factor; + + if (ec->rx_coalesce_usecs_irq || + ec->rx_max_coalesced_frames_irq || + ec->tx_coalesce_usecs_irq || + ec->tx_max_coalesced_frames_irq || + ec->stats_block_coalesce_usecs || + ec->use_adaptive_rx_coalesce || + ec->use_adaptive_tx_coalesce || + ec->pkt_rate_low || + ec->rx_coalesce_usecs_low || + ec->rx_max_coalesced_frames_low || + ec->tx_coalesce_usecs_low || + ec->tx_max_coalesced_frames_low || + ec->pkt_rate_high || + ec->rx_coalesce_usecs_high || + ec->rx_max_coalesced_frames_high || + ec->tx_coalesce_usecs_high || + ec->tx_max_coalesced_frames_high || + ec->rate_sample_interval) + return -ENOTSUPP; + + /* Compute factor used to convert coalesce '_usecs' parameters to + * ME timestamp ticks. There are 16 ME clock cycles for each timestamp + * count. + */ + factor = nn->me_freq_mhz / 16; + + /* Each pair of (usecs, max_frames) fields specifies that interrupts + * should be coalesced until + * (usecs > 0 && time_since_first_completion >= usecs) || + * (max_frames > 0 && completed_frames >= max_frames) + * + * It is illegal to set both usecs and max_frames to zero as this would + * cause interrupts to never be generated. To disable coalescing, set + * usecs = 0 and max_frames = 1. + * + * Some implementations ignore the value of max_frames and use the + * condition time_since_first_completion >= usecs + */ + + if (!(nn->cap & NFP_NET_CFG_CTRL_IRQMOD)) + return -EINVAL; + + /* ensure valid configuration */ + if (!ec->rx_coalesce_usecs && !ec->rx_max_coalesced_frames) + return -EINVAL; + + if (!ec->tx_coalesce_usecs && !ec->tx_max_coalesced_frames) + return -EINVAL; + + if (ec->rx_coalesce_usecs * factor >= ((1 << 16) - 1)) + return -EINVAL; + + if (ec->tx_coalesce_usecs * factor >= ((1 << 16) - 1)) + return -EINVAL; + + if (ec->rx_max_coalesced_frames >= ((1 << 16) - 1)) + return -EINVAL; + + if (ec->tx_max_coalesced_frames >= ((1 << 16) - 1)) + return -EINVAL; + + /* configuration is valid */ + nn->rx_coalesce_usecs = ec->rx_coalesce_usecs; + nn->rx_coalesce_max_frames = ec->rx_max_coalesced_frames; + nn->tx_coalesce_usecs = ec->tx_coalesce_usecs; + nn->tx_coalesce_max_frames = ec->tx_max_coalesced_frames; + + /* write configuration to device */ + nfp_net_coalesce_write_cfg(nn); + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD); +} + +static const struct ethtool_ops nfp_net_ethtool_ops = { + .get_drvinfo = nfp_net_get_drvinfo, + .get_ringparam = nfp_net_get_ringparam, + .set_ringparam = nfp_net_set_ringparam, + .get_strings = nfp_net_get_strings, + .get_ethtool_stats = nfp_net_get_stats, + .get_sset_count = nfp_net_get_sset_count, + .get_rxnfc = nfp_net_get_rxnfc, + .set_rxnfc = nfp_net_set_rxnfc, + .get_rxfh_indir_size = nfp_net_get_rxfh_indir_size, + .get_rxfh_key_size = nfp_net_get_rxfh_key_size, + .get_rxfh = nfp_net_get_rxfh, + .set_rxfh = nfp_net_set_rxfh, + .get_regs_len = nfp_net_get_regs_len, + .get_regs = nfp_net_get_regs, + .get_coalesce = nfp_net_get_coalesce, + .set_coalesce = nfp_net_set_coalesce, +}; + +void nfp_net_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &nfp_net_ethtool_ops; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c new file mode 100644 index 000000000000..e2b22b8a20f1 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -0,0 +1,385 @@ +/* + * Copyright (C) 2015 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_netvf_main.c + * Netronome virtual function network device driver: Main entry point + * Author: Jason McMullan <jason.mcmullan@netronome.com> + * Rolf Neugebauer <rolf.neugebauer@netronome.com> + */ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/etherdevice.h> + +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +const char nfp_net_driver_name[] = "nfp_netvf"; +const char nfp_net_driver_version[] = "0.1"; +#define PCI_DEVICE_NFP6000VF 0x6003 +static const struct pci_device_id nfp_netvf_pci_device_ids[] = { + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_NFP6000VF, + PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, + PCI_ANY_ID, 0, + }, + { 0, } /* Required last entry. */ +}; +MODULE_DEVICE_TABLE(pci, nfp_netvf_pci_device_ids); + +static void nfp_netvf_get_mac_addr(struct nfp_net *nn) +{ + u8 mac_addr[ETH_ALEN]; + + put_unaligned_be32(nn_readl(nn, NFP_NET_CFG_MACADDR + 0), &mac_addr[0]); + /* We can't do readw for NFP-3200 compatibility */ + put_unaligned_be16(nn_readl(nn, NFP_NET_CFG_MACADDR + 4) >> 16, + &mac_addr[4]); + + if (!is_valid_ether_addr(mac_addr)) { + eth_hw_addr_random(nn->netdev); + return; + } + + ether_addr_copy(nn->netdev->dev_addr, mac_addr); + ether_addr_copy(nn->netdev->perm_addr, mac_addr); +} + +static int nfp_netvf_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *pci_id) +{ + struct nfp_net_fw_version fw_ver; + int max_tx_rings, max_rx_rings; + u32 tx_bar_off, rx_bar_off; + u32 tx_bar_sz, rx_bar_sz; + int tx_bar_no, rx_bar_no; + u8 __iomem *ctrl_bar; + struct nfp_net *nn; + int is_nfp3200; + u32 startq; + int stride; + int err; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + + err = pci_request_regions(pdev, nfp_net_driver_name); + if (err) { + dev_err(&pdev->dev, "Unable to allocate device memory.\n"); + goto err_pci_disable; + } + + switch (pdev->device) { + case PCI_DEVICE_NFP6000VF: + is_nfp3200 = 0; + break; + default: + err = -ENODEV; + goto err_pci_regions; + } + + pci_set_master(pdev); + + err = dma_set_mask_and_coherent(&pdev->dev, + DMA_BIT_MASK(NFP_NET_MAX_DMA_BITS)); + if (err) + goto err_pci_regions; + + /* Map the Control BAR. + * + * Irrespective of the advertised BAR size we only map the + * first NFP_NET_CFG_BAR_SZ of the BAR. This keeps the code + * the identical for PF and VF drivers. + */ + ctrl_bar = ioremap_nocache(pci_resource_start(pdev, NFP_NET_CRTL_BAR), + NFP_NET_CFG_BAR_SZ); + if (!ctrl_bar) { + dev_err(&pdev->dev, + "Failed to map resource %d\n", NFP_NET_CRTL_BAR); + err = -EIO; + goto err_pci_regions; + } + + nfp_net_get_fw_version(&fw_ver, ctrl_bar); + if (fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { + dev_err(&pdev->dev, "Unknown Firmware ABI %d.%d.%d.%d\n", + fw_ver.resv, fw_ver.class, fw_ver.major, fw_ver.minor); + err = -EINVAL; + goto err_ctrl_unmap; + } + + /* Determine stride */ + if (nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 0) || + nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 1) || + nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0x12, 0x48)) { + stride = 2; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = NFP_NET_Q1_BAR; + dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n"); + } else { + switch (fw_ver.major) { + case 1 ... 3: + if (is_nfp3200) { + stride = 2; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = NFP_NET_Q1_BAR; + } else { + stride = 4; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = tx_bar_no; + } + break; + default: + dev_err(&pdev->dev, "Unsupported Firmware ABI %d.%d.%d.%d\n", + fw_ver.resv, fw_ver.class, + fw_ver.major, fw_ver.minor); + err = -EINVAL; + goto err_ctrl_unmap; + } + } + + /* Find out how many rings are supported */ + max_tx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_TXRINGS); + max_rx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_RXRINGS); + + tx_bar_sz = NFP_QCP_QUEUE_ADDR_SZ * max_tx_rings * stride; + rx_bar_sz = NFP_QCP_QUEUE_ADDR_SZ * max_rx_rings * stride; + + /* Sanity checks */ + if (tx_bar_sz > pci_resource_len(pdev, tx_bar_no)) { + dev_err(&pdev->dev, + "TX BAR too small for number of TX rings. Adjusting\n"); + tx_bar_sz = pci_resource_len(pdev, tx_bar_no); + max_tx_rings = (tx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2; + } + if (rx_bar_sz > pci_resource_len(pdev, rx_bar_no)) { + dev_err(&pdev->dev, + "RX BAR too small for number of RX rings. Adjusting\n"); + rx_bar_sz = pci_resource_len(pdev, rx_bar_no); + max_rx_rings = (rx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2; + } + + /* XXX Implement a workaround for THB-350 here. Ideally, we + * have a different PCI ID for A rev VFs. + */ + switch (pdev->device) { + case PCI_DEVICE_NFP6000VF: + startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ); + tx_bar_off = NFP_PCIE_QUEUE(startq); + startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ); + rx_bar_off = NFP_PCIE_QUEUE(startq); + break; + default: + err = -ENODEV; + goto err_ctrl_unmap; + } + + /* Allocate and initialise the netdev */ + nn = nfp_net_netdev_alloc(pdev, max_tx_rings, max_rx_rings); + if (IS_ERR(nn)) { + err = PTR_ERR(nn); + goto err_ctrl_unmap; + } + + nn->fw_ver = fw_ver; + nn->ctrl_bar = ctrl_bar; + nn->is_vf = 1; + nn->is_nfp3200 = is_nfp3200; + nn->stride_tx = stride; + nn->stride_rx = stride; + + if (rx_bar_no == tx_bar_no) { + u32 bar_off, bar_sz; + resource_size_t map_addr; + + /* Make a single overlapping BAR mapping */ + if (tx_bar_off < rx_bar_off) + bar_off = tx_bar_off; + else + bar_off = rx_bar_off; + + if ((tx_bar_off + tx_bar_sz) > (rx_bar_off + rx_bar_sz)) + bar_sz = (tx_bar_off + tx_bar_sz) - bar_off; + else + bar_sz = (rx_bar_off + rx_bar_sz) - bar_off; + + map_addr = pci_resource_start(pdev, tx_bar_no) + bar_off; + nn->q_bar = ioremap_nocache(map_addr, bar_sz); + if (!nn->q_bar) { + nn_err(nn, "Failed to map resource %d\n", tx_bar_no); + err = -EIO; + goto err_netdev_free; + } + + /* TX queues */ + nn->tx_bar = nn->q_bar + (tx_bar_off - bar_off); + /* RX queues */ + nn->rx_bar = nn->q_bar + (rx_bar_off - bar_off); + } else { + resource_size_t map_addr; + + /* TX queues */ + map_addr = pci_resource_start(pdev, tx_bar_no) + tx_bar_off; + nn->tx_bar = ioremap_nocache(map_addr, tx_bar_sz); + if (!nn->tx_bar) { + nn_err(nn, "Failed to map resource %d\n", tx_bar_no); + err = -EIO; + goto err_netdev_free; + } + + /* RX queues */ + map_addr = pci_resource_start(pdev, rx_bar_no) + rx_bar_off; + nn->rx_bar = ioremap_nocache(map_addr, rx_bar_sz); + if (!nn->rx_bar) { + nn_err(nn, "Failed to map resource %d\n", rx_bar_no); + err = -EIO; + goto err_unmap_tx; + } + } + + nfp_netvf_get_mac_addr(nn); + + err = nfp_net_irqs_alloc(nn); + if (!err) { + nn_warn(nn, "Unable to allocate MSI-X Vectors. Exiting\n"); + err = -EIO; + goto err_unmap_rx; + } + + /* Get ME clock frequency from ctrl BAR + * XXX for now frequency is hardcoded until we figure out how + * to get the value from nfp-hwinfo into ctrl bar + */ + nn->me_freq_mhz = 1200; + + err = nfp_net_netdev_init(nn->netdev); + if (err) + goto err_irqs_disable; + + pci_set_drvdata(pdev, nn); + + nfp_net_info(nn); + nfp_net_debugfs_adapter_add(nn); + + return 0; + +err_irqs_disable: + nfp_net_irqs_disable(nn); +err_unmap_rx: + if (!nn->q_bar) + iounmap(nn->rx_bar); +err_unmap_tx: + if (!nn->q_bar) + iounmap(nn->tx_bar); + else + iounmap(nn->q_bar); +err_netdev_free: + pci_set_drvdata(pdev, NULL); + nfp_net_netdev_free(nn); +err_ctrl_unmap: + iounmap(ctrl_bar); +err_pci_regions: + pci_release_regions(pdev); +err_pci_disable: + pci_disable_device(pdev); + return err; +} + +static void nfp_netvf_pci_remove(struct pci_dev *pdev) +{ + struct nfp_net *nn = pci_get_drvdata(pdev); + + /* Note, the order is slightly different from above as we need + * to keep the nn pointer around till we have freed everything. + */ + nfp_net_debugfs_adapter_del(nn); + + nfp_net_netdev_clean(nn->netdev); + + nfp_net_irqs_disable(nn); + + if (!nn->q_bar) { + iounmap(nn->rx_bar); + iounmap(nn->tx_bar); + } else { + iounmap(nn->q_bar); + } + iounmap(nn->ctrl_bar); + + pci_set_drvdata(pdev, NULL); + + nfp_net_netdev_free(nn); + + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static struct pci_driver nfp_netvf_pci_driver = { + .name = nfp_net_driver_name, + .id_table = nfp_netvf_pci_device_ids, + .probe = nfp_netvf_pci_probe, + .remove = nfp_netvf_pci_remove, +}; + +static int __init nfp_netvf_init(void) +{ + int err; + + pr_info("%s: NFP VF Network driver, Copyright (C) 2014-2015 Netronome Systems\n", + nfp_net_driver_name); + + nfp_net_debugfs_create(); + err = pci_register_driver(&nfp_netvf_pci_driver); + if (err) { + nfp_net_debugfs_destroy(); + return err; + } + + return 0; +} + +static void __exit nfp_netvf_exit(void) +{ + pci_unregister_driver(&nfp_netvf_pci_driver); + nfp_net_debugfs_destroy(); +} + +module_init(nfp_netvf_init); +module_exit(nfp_netvf_exit); + +MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("NFP VF network device driver"); diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index b2f8e854dfd1..264e954675d1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -3993,6 +3993,8 @@ struct public_drv_mb { #define DRV_MSG_CODE_PHY_CORE_WRITE 0x000e0000 #define DRV_MSG_CODE_SET_VERSION 0x000f0000 +#define DRV_MSG_CODE_SET_LED_MODE 0x00200000 + #define DRV_MSG_SEQ_NUMBER_MASK 0x0000ffff u32 drv_mb_param; @@ -4044,6 +4046,10 @@ struct public_drv_mb { #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT 8 #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_MASK 0x0000FF00 +#define DRV_MB_PARAM_SET_LED_MODE_OPER 0x0 +#define DRV_MB_PARAM_SET_LED_MODE_ON 0x1 +#define DRV_MB_PARAM_SET_LED_MODE_OFF 0x2 + u32 fw_mb_header; #define FW_MSG_CODE_MASK 0xffff0000 #define FW_MSG_CODE_DRV_LOAD_ENGINE 0x10100000 diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 947c7af72b25..6b02e1134360 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1135,6 +1135,23 @@ static int qed_drain(struct qed_dev *cdev) return 0; } +static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *ptt; + int status = 0; + + ptt = qed_ptt_acquire(hwfn); + if (!ptt) + return -EAGAIN; + + status = qed_mcp_set_led(hwfn, ptt, mode); + + qed_ptt_release(hwfn, ptt); + + return status; +} + const struct qed_common_ops qed_common_ops_pass = { .probe = &qed_probe, .remove = &qed_remove, @@ -1155,6 +1172,7 @@ const struct qed_common_ops qed_common_ops_pass = { .update_msglvl = &qed_init_dp, .chain_alloc = &qed_chain_alloc, .chain_free = &qed_chain_free, + .set_led = &qed_set_led, }; u32 qed_get_protocol_version(enum qed_protocol protocol) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 20d048cdcb88..ba1b1f1ef789 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -858,3 +858,30 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, return 0; } + +int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + enum qed_led_mode mode) +{ + u32 resp = 0, param = 0, drv_mb_param; + int rc; + + switch (mode) { + case QED_LED_MODE_ON: + drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_ON; + break; + case QED_LED_MODE_OFF: + drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_OFF; + break; + case QED_LED_MODE_RESTORE: + drv_mb_param = DRV_MB_PARAM_SET_LED_MODE_OPER; + break; + default: + DP_NOTICE(p_hwfn, "Invalid LED mode %d\n", mode); + return -EINVAL; + } + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_SET_LED_MODE, + drv_mb_param, &resp, ¶m); + + return rc; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index dbaae586b4a7..506197d5c3dd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -224,6 +224,19 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_mcp_drv_version *p_ver); +/** + * @brief Set LED status + * + * @param p_hwfn + * @param p_ptt + * @param mode - LED mode + * + * @return int - 0 - operation was successful. + */ +int qed_mcp_set_led(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_led_mode mode); + /* Using hwfn number (and not pf_num) is required since in CMT mode, * same pf_num may be used by two different hwfn * TODO - this shouldn't really be in .h file, but until all fields diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index ea00d5f3bab4..7c6caf7f6612 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -116,6 +116,7 @@ struct qede_dev { (edev)->dev_info.num_tc) struct qede_fastpath *fp_array; + u16 req_rss; u16 num_rss; u8 num_tc; #define QEDE_RSS_CNT(edev) ((edev)->num_rss) @@ -269,13 +270,13 @@ int qede_change_mtu(struct net_device *dev, int new_mtu); void qede_fill_by_demand_stats(struct qede_dev *edev); #define RX_RING_SIZE_POW 13 -#define RX_RING_SIZE BIT(RX_RING_SIZE_POW) +#define RX_RING_SIZE ((u16)BIT(RX_RING_SIZE_POW)) #define NUM_RX_BDS_MAX (RX_RING_SIZE - 1) #define NUM_RX_BDS_MIN 128 #define NUM_RX_BDS_DEF NUM_RX_BDS_MAX #define TX_RING_SIZE_POW 13 -#define TX_RING_SIZE BIT(TX_RING_SIZE_POW) +#define TX_RING_SIZE ((u16)BIT(TX_RING_SIZE_POW)) #define NUM_TX_BDS_MAX (TX_RING_SIZE - 1) #define NUM_TX_BDS_MIN 128 #define NUM_TX_BDS_DEF NUM_TX_BDS_MAX diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 3a362476a22c..e442b85c9a5e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -322,6 +322,30 @@ static void qede_set_msglevel(struct net_device *ndev, u32 level) dp_module, dp_level); } +static int qede_nway_reset(struct net_device *dev) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qed_link_output current_link; + struct qed_link_params link_params; + + if (!netif_running(dev)) + return 0; + + memset(¤t_link, 0, sizeof(current_link)); + edev->ops->common->get_link(edev->cdev, ¤t_link); + if (!current_link.link_up) + return 0; + + /* Toggle the link */ + memset(&link_params, 0, sizeof(link_params)); + link_params.link_up = false; + edev->ops->common->set_link(edev->cdev, &link_params); + link_params.link_up = true; + edev->ops->common->set_link(edev->cdev, &link_params); + + return 0; +} + static u32 qede_get_link(struct net_device *dev) { struct qede_dev *edev = netdev_priv(dev); @@ -333,6 +357,106 @@ static u32 qede_get_link(struct net_device *dev) return current_link.link_up; } +static void qede_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *ering) +{ + struct qede_dev *edev = netdev_priv(dev); + + ering->rx_max_pending = NUM_RX_BDS_MAX; + ering->rx_pending = edev->q_num_rx_buffers; + ering->tx_max_pending = NUM_TX_BDS_MAX; + ering->tx_pending = edev->q_num_tx_buffers; +} + +static int qede_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *ering) +{ + struct qede_dev *edev = netdev_priv(dev); + + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "Set ring params command parameters: rx_pending = %d, tx_pending = %d\n", + ering->rx_pending, ering->tx_pending); + + /* Validate legality of configuration */ + if (ering->rx_pending > NUM_RX_BDS_MAX || + ering->rx_pending < NUM_RX_BDS_MIN || + ering->tx_pending > NUM_TX_BDS_MAX || + ering->tx_pending < NUM_TX_BDS_MIN) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "Can only support Rx Buffer size [0%08x,...,0x%08x] and Tx Buffer size [0x%08x,...,0x%08x]\n", + NUM_RX_BDS_MIN, NUM_RX_BDS_MAX, + NUM_TX_BDS_MIN, NUM_TX_BDS_MAX); + return -EINVAL; + } + + /* Change ring size and re-load */ + edev->q_num_rx_buffers = ering->rx_pending; + edev->q_num_tx_buffers = ering->tx_pending; + + if (netif_running(edev->ndev)) + qede_reload(edev, NULL, NULL); + + return 0; +} + +static void qede_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *epause) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qed_link_output current_link; + + memset(¤t_link, 0, sizeof(current_link)); + edev->ops->common->get_link(edev->cdev, ¤t_link); + + if (current_link.pause_config & QED_LINK_PAUSE_AUTONEG_ENABLE) + epause->autoneg = true; + if (current_link.pause_config & QED_LINK_PAUSE_RX_ENABLE) + epause->rx_pause = true; + if (current_link.pause_config & QED_LINK_PAUSE_TX_ENABLE) + epause->tx_pause = true; + + DP_VERBOSE(edev, QED_MSG_DEBUG, + "ethtool_pauseparam: cmd %d autoneg %d rx_pause %d tx_pause %d\n", + epause->cmd, epause->autoneg, epause->rx_pause, + epause->tx_pause); +} + +static int qede_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *epause) +{ + struct qede_dev *edev = netdev_priv(dev); + struct qed_link_params params; + struct qed_link_output current_link; + + if (!edev->dev_info.common.is_mf) { + DP_INFO(edev, + "Pause parameters can not be updated in non-default mode\n"); + return -EOPNOTSUPP; + } + + memset(¤t_link, 0, sizeof(current_link)); + edev->ops->common->get_link(edev->cdev, ¤t_link); + + memset(¶ms, 0, sizeof(params)); + params.override_flags |= QED_LINK_OVERRIDE_PAUSE_CONFIG; + if (epause->autoneg) { + if (!(current_link.supported_caps & SUPPORTED_Autoneg)) { + DP_INFO(edev, "autoneg not supported\n"); + return -EINVAL; + } + params.pause_config |= QED_LINK_PAUSE_AUTONEG_ENABLE; + } + if (epause->rx_pause) + params.pause_config |= QED_LINK_PAUSE_RX_ENABLE; + if (epause->tx_pause) + params.pause_config |= QED_LINK_PAUSE_TX_ENABLE; + + params.link_up = true; + edev->ops->common->set_link(edev->cdev, ¶ms); + + return 0; +} + static void qede_update_mtu(struct qede_dev *edev, union qede_reload_args *args) { edev->ndev->mtu = args->mtu; @@ -366,17 +490,104 @@ int qede_change_mtu(struct net_device *ndev, int new_mtu) return 0; } +static void qede_get_channels(struct net_device *dev, + struct ethtool_channels *channels) +{ + struct qede_dev *edev = netdev_priv(dev); + + channels->max_combined = QEDE_MAX_RSS_CNT(edev); + channels->combined_count = QEDE_RSS_CNT(edev); +} + +static int qede_set_channels(struct net_device *dev, + struct ethtool_channels *channels) +{ + struct qede_dev *edev = netdev_priv(dev); + + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "set-channels command parameters: rx = %d, tx = %d, other = %d, combined = %d\n", + channels->rx_count, channels->tx_count, + channels->other_count, channels->combined_count); + + /* We don't support separate rx / tx, nor `other' channels. */ + if (channels->rx_count || channels->tx_count || + channels->other_count || (channels->combined_count == 0) || + (channels->combined_count > QEDE_MAX_RSS_CNT(edev))) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "command parameters not supported\n"); + return -EINVAL; + } + + /* Check if there was a change in the active parameters */ + if (channels->combined_count == QEDE_RSS_CNT(edev)) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "No change in active parameters\n"); + return 0; + } + + /* We need the number of queues to be divisible between the hwfns */ + if (channels->combined_count % edev->dev_info.common.num_hwfns) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "Number of channels must be divisable by %04x\n", + edev->dev_info.common.num_hwfns); + return -EINVAL; + } + + /* Set number of queues and reload if necessary */ + edev->req_rss = channels->combined_count; + if (netif_running(dev)) + qede_reload(edev, NULL, NULL); + + return 0; +} + +static int qede_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct qede_dev *edev = netdev_priv(dev); + u8 led_state = 0; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + return 1; /* cycle on/off once per second */ + + case ETHTOOL_ID_ON: + led_state = QED_LED_MODE_ON; + break; + + case ETHTOOL_ID_OFF: + led_state = QED_LED_MODE_OFF; + break; + + case ETHTOOL_ID_INACTIVE: + led_state = QED_LED_MODE_RESTORE; + break; + } + + edev->ops->common->set_led(edev->cdev, led_state); + + return 0; +} + static const struct ethtool_ops qede_ethtool_ops = { .get_settings = qede_get_settings, .set_settings = qede_set_settings, .get_drvinfo = qede_get_drvinfo, .get_msglevel = qede_get_msglevel, .set_msglevel = qede_set_msglevel, + .nway_reset = qede_nway_reset, .get_link = qede_get_link, + .get_ringparam = qede_get_ringparam, + .set_ringparam = qede_set_ringparam, + .get_pauseparam = qede_get_pauseparam, + .set_pauseparam = qede_set_pauseparam, .get_strings = qede_get_strings, + .set_phys_id = qede_set_phys_id, .get_ethtool_stats = qede_get_ethtool_stats, .get_sset_count = qede_get_sset_count, + .get_channels = qede_get_channels, + .set_channels = qede_set_channels, }; void qede_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index f4657a2e730a..6237f10b5119 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1502,8 +1502,11 @@ static int qede_set_num_queues(struct qede_dev *edev) u16 rss_num; /* Setup queues according to possible resources*/ - rss_num = netif_get_num_default_rss_queues() * - edev->dev_info.common.num_hwfns; + if (edev->req_rss) + rss_num = edev->req_rss; + else + rss_num = netif_get_num_default_rss_queues() * + edev->dev_info.common.num_hwfns; rss_num = min_t(u16, QEDE_MAX_RSS_CNT(edev), rss_num); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index d4b5085a21fa..7bd6f25b4625 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -1604,7 +1604,7 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter, if (qlcnic_check_multi_tx(adapter) && !adapter->ahw->diag_test) { for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; - netif_napi_add(netdev, &tx_ring->napi, qlcnic_tx_poll, + netif_tx_napi_add(netdev, &tx_ring->napi, qlcnic_tx_poll, NAPI_POLL_WEIGHT); } } @@ -2135,7 +2135,7 @@ int qlcnic_83xx_napi_add(struct qlcnic_adapter *adapter, !(adapter->flags & QLCNIC_TX_INTR_SHARED)) { for (ring = 0; ring < adapter->drv_tx_rings; ring++) { tx_ring = &adapter->tx_ring[ring]; - netif_napi_add(netdev, &tx_ring->napi, + netif_tx_napi_add(netdev, &tx_ring->napi, qlcnic_83xx_msix_tx_poll, NAPI_POLL_WEIGHT); } diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 0623fff932e4..9fbe92ac225b 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -206,6 +206,7 @@ enum CCC_BIT { CCC_OPC_RESET = 0x00000000, CCC_OPC_CONFIG = 0x00000001, CCC_OPC_OPERATION = 0x00000002, + CCC_GAC = 0x00000080, CCC_DTSR = 0x00000100, CCC_CSEL = 0x00030000, CCC_CSEL_HPB = 0x00010000, @@ -576,6 +577,9 @@ enum GTI_BIT { GTI_TIV = 0x0FFFFFFF, }; +#define GTI_TIV_MAX GTI_TIV +#define GTI_TIV_MIN 0x20 + /* GIC */ enum GIC_BIT { GIC_PTCE = 0x00000001, /* Undocumented? */ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index ed5da4d47668..be019e769c83 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -32,6 +32,8 @@ #include <linux/slab.h> #include <linux/spinlock.h> +#include <asm/div64.h> + #include "ravb.h" #define RAVB_DEF_MSG_ENABLE \ @@ -1229,7 +1231,8 @@ static int ravb_open(struct net_device *ndev) ravb_emac_init(ndev); /* Initialise PTP Clock driver */ - ravb_ptp_init(ndev, priv->pdev); + if (priv->chip_id == RCAR_GEN2) + ravb_ptp_init(ndev, priv->pdev); netif_tx_start_all_queues(ndev); @@ -1242,7 +1245,8 @@ static int ravb_open(struct net_device *ndev) out_ptp_stop: /* Stop PTP Clock driver */ - ravb_ptp_stop(ndev); + if (priv->chip_id == RCAR_GEN2) + ravb_ptp_stop(ndev); out_free_irq2: if (priv->chip_id == RCAR_GEN3) free_irq(priv->emac_irq, ndev); @@ -1476,7 +1480,8 @@ static int ravb_close(struct net_device *ndev) ravb_write(ndev, 0, TIC); /* Stop PTP Clock driver */ - ravb_ptp_stop(ndev); + if (priv->chip_id == RCAR_GEN2) + ravb_ptp_stop(ndev); /* Set the config mode to stop the AVB-DMAC's processes */ if (ravb_stop_dma(ndev) < 0) @@ -1656,11 +1661,45 @@ static int ravb_mdio_release(struct ravb_private *priv) static const struct of_device_id ravb_match_table[] = { { .compatible = "renesas,etheravb-r8a7790", .data = (void *)RCAR_GEN2 }, { .compatible = "renesas,etheravb-r8a7794", .data = (void *)RCAR_GEN2 }, + { .compatible = "renesas,etheravb-rcar-gen2", .data = (void *)RCAR_GEN2 }, { .compatible = "renesas,etheravb-r8a7795", .data = (void *)RCAR_GEN3 }, + { .compatible = "renesas,etheravb-rcar-gen3", .data = (void *)RCAR_GEN3 }, { } }; MODULE_DEVICE_TABLE(of, ravb_match_table); +static int ravb_set_gti(struct net_device *ndev) +{ + + struct device *dev = ndev->dev.parent; + struct device_node *np = dev->of_node; + unsigned long rate; + struct clk *clk; + uint64_t inc; + + clk = of_clk_get(np, 0); + if (IS_ERR(clk)) { + dev_err(dev, "could not get clock\n"); + return PTR_ERR(clk); + } + + rate = clk_get_rate(clk); + clk_put(clk); + + inc = 1000000000ULL << 20; + do_div(inc, rate); + + if (inc < GTI_TIV_MIN || inc > GTI_TIV_MAX) { + dev_err(dev, "gti.tiv increment 0x%llx is outside the range 0x%x - 0x%x\n", + inc, GTI_TIV_MIN, GTI_TIV_MAX); + return -EINVAL; + } + + ravb_write(ndev, inc, GTI); + + return 0; +} + static int ravb_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -1749,15 +1788,25 @@ static int ravb_probe(struct platform_device *pdev) ndev->ethtool_ops = &ravb_ethtool_ops; /* Set AVB config mode */ - ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | CCC_OPC_CONFIG, - CCC); + if (chip_id == RCAR_GEN2) { + ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | + CCC_OPC_CONFIG, CCC); + /* Set CSEL value */ + ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | + CCC_CSEL_HPB, CCC); + } else { + ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | + CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB, CCC); + } /* Set CSEL value */ ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | CCC_CSEL_HPB, CCC); /* Set GTI value */ - ravb_write(ndev, ((1000 << 20) / 130) & GTI_TIV, GTI); + error = ravb_set_gti(ndev); + if (error) + goto out_release; /* Request GTI loading */ ravb_write(ndev, ravb_read(ndev, GCCR) | GCCR_LTI, GCCR); @@ -1780,6 +1829,10 @@ static int ravb_probe(struct platform_device *pdev) /* Initialise HW timestamp list */ INIT_LIST_HEAD(&priv->ts_skb_list); + /* Initialise PTP Clock driver */ + if (chip_id != RCAR_GEN2) + ravb_ptp_init(ndev, pdev); + /* Debug message level */ priv->msg_enable = RAVB_DEF_MSG_ENABLE; @@ -1821,6 +1874,10 @@ out_napi_del: out_dma_free: dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); + + /* Stop PTP Clock driver */ + if (chip_id != RCAR_GEN2) + ravb_ptp_stop(ndev); out_release: if (ndev) free_netdev(ndev); @@ -1835,6 +1892,10 @@ static int ravb_remove(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct ravb_private *priv = netdev_priv(ndev); + /* Stop PTP Clock driver */ + if (priv->chip_id != RCAR_GEN2) + ravb_ptp_stop(ndev); + dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); /* Set reset mode */ diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index e7bab7909ed9..7f3c6109d45e 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -449,6 +449,109 @@ static void sh_eth_set_duplex(struct net_device *ndev) sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_DM, ECMR); } +static void sh_eth_chip_reset(struct net_device *ndev) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); + + /* reset device */ + sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR); + mdelay(1); +} + +static void sh_eth_set_rate_gether(struct net_device *ndev) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); + + switch (mdp->speed) { + case 10: /* 10BASE */ + sh_eth_write(ndev, GECMR_10, GECMR); + break; + case 100:/* 100BASE */ + sh_eth_write(ndev, GECMR_100, GECMR); + break; + case 1000: /* 1000BASE */ + sh_eth_write(ndev, GECMR_1000, GECMR); + break; + default: + break; + } +} + +#ifdef CONFIG_OF +/* R7S72100 */ +static struct sh_eth_cpu_data r7s72100_data = { + .chip_reset = sh_eth_chip_reset, + .set_duplex = sh_eth_set_duplex, + + .register_type = SH_ETH_REG_FAST_RZ, + + .ecsr_value = ECSR_ICD, + .ecsipr_value = ECSIPR_ICDIP, + .eesipr_value = 0xff7f009f, + + .tx_check = EESR_TC1 | EESR_FTC, + .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | + EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE | + EESR_TDE | EESR_ECI, + .fdr_value = 0x0000070f, + + .no_psr = 1, + .apr = 1, + .mpr = 1, + .tpauser = 1, + .hw_swap = 1, + .rpadir = 1, + .rpadir_value = 2 << 16, + .no_trimd = 1, + .no_ade = 1, + .hw_crc = 1, + .tsu = 1, + .shift_rd0 = 1, +}; + +static void sh_eth_chip_reset_r8a7740(struct net_device *ndev) +{ + struct sh_eth_private *mdp = netdev_priv(ndev); + + /* reset device */ + sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR); + mdelay(1); + + sh_eth_select_mii(ndev); +} + +/* R8A7740 */ +static struct sh_eth_cpu_data r8a7740_data = { + .chip_reset = sh_eth_chip_reset_r8a7740, + .set_duplex = sh_eth_set_duplex, + .set_rate = sh_eth_set_rate_gether, + + .register_type = SH_ETH_REG_GIGABIT, + + .ecsr_value = ECSR_ICD | ECSR_MPD, + .ecsipr_value = ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP, + .eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff, + + .tx_check = EESR_TC1 | EESR_FTC, + .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | + EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE | + EESR_TDE | EESR_ECI, + .fdr_value = 0x0000070f, + + .apr = 1, + .mpr = 1, + .tpauser = 1, + .bculr = 1, + .hw_swap = 1, + .rpadir = 1, + .rpadir_value = 2 << 16, + .no_trimd = 1, + .no_ade = 1, + .tsu = 1, + .select_mii = 1, + .shift_rd0 = 1, +}; + /* There is CPU dependent code */ static void sh_eth_set_rate_r8a777x(struct net_device *ndev) { @@ -514,6 +617,7 @@ static struct sh_eth_cpu_data r8a779x_data = { .hw_swap = 1, .rmiimode = 1, }; +#endif /* CONFIG_OF */ static void sh_eth_set_rate_sh7724(struct net_device *ndev) { @@ -671,34 +775,6 @@ static struct sh_eth_cpu_data sh7757_data_giga = { .tsu = 1, }; -static void sh_eth_chip_reset(struct net_device *ndev) -{ - struct sh_eth_private *mdp = netdev_priv(ndev); - - /* reset device */ - sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR); - mdelay(1); -} - -static void sh_eth_set_rate_gether(struct net_device *ndev) -{ - struct sh_eth_private *mdp = netdev_priv(ndev); - - switch (mdp->speed) { - case 10: /* 10BASE */ - sh_eth_write(ndev, GECMR_10, GECMR); - break; - case 100:/* 100BASE */ - sh_eth_write(ndev, GECMR_100, GECMR); - break; - case 1000: /* 1000BASE */ - sh_eth_write(ndev, GECMR_1000, GECMR); - break; - default: - break; - } -} - /* SH7734 */ static struct sh_eth_cpu_data sh7734_data = { .chip_reset = sh_eth_chip_reset, @@ -756,80 +832,6 @@ static struct sh_eth_cpu_data sh7763_data = { .irq_flags = IRQF_SHARED, }; -static void sh_eth_chip_reset_r8a7740(struct net_device *ndev) -{ - struct sh_eth_private *mdp = netdev_priv(ndev); - - /* reset device */ - sh_eth_tsu_write(mdp, ARSTR_ARSTR, ARSTR); - mdelay(1); - - sh_eth_select_mii(ndev); -} - -/* R8A7740 */ -static struct sh_eth_cpu_data r8a7740_data = { - .chip_reset = sh_eth_chip_reset_r8a7740, - .set_duplex = sh_eth_set_duplex, - .set_rate = sh_eth_set_rate_gether, - - .register_type = SH_ETH_REG_GIGABIT, - - .ecsr_value = ECSR_ICD | ECSR_MPD, - .ecsipr_value = ECSIPR_LCHNGIP | ECSIPR_ICDIP | ECSIPR_MPDIP, - .eesipr_value = DMAC_M_RFRMER | DMAC_M_ECI | 0x003fffff, - - .tx_check = EESR_TC1 | EESR_FTC, - .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | - EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE | - EESR_TDE | EESR_ECI, - .fdr_value = 0x0000070f, - - .apr = 1, - .mpr = 1, - .tpauser = 1, - .bculr = 1, - .hw_swap = 1, - .rpadir = 1, - .rpadir_value = 2 << 16, - .no_trimd = 1, - .no_ade = 1, - .tsu = 1, - .select_mii = 1, - .shift_rd0 = 1, -}; - -/* R7S72100 */ -static struct sh_eth_cpu_data r7s72100_data = { - .chip_reset = sh_eth_chip_reset, - .set_duplex = sh_eth_set_duplex, - - .register_type = SH_ETH_REG_FAST_RZ, - - .ecsr_value = ECSR_ICD, - .ecsipr_value = ECSIPR_ICDIP, - .eesipr_value = 0xff7f009f, - - .tx_check = EESR_TC1 | EESR_FTC, - .eesr_err_check = EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT | - EESR_RFE | EESR_RDE | EESR_RFRMER | EESR_TFE | - EESR_TDE | EESR_ECI, - .fdr_value = 0x0000070f, - - .no_psr = 1, - .apr = 1, - .mpr = 1, - .tpauser = 1, - .hw_swap = 1, - .rpadir = 1, - .rpadir_value = 2 << 16, - .no_trimd = 1, - .no_ade = 1, - .hw_crc = 1, - .tsu = 1, - .shift_rd0 = 1, -}; - static struct sh_eth_cpu_data sh7619_data = { .register_type = SH_ETH_REG_FAST_SH3_SH2, @@ -3277,13 +3279,6 @@ static struct platform_device_id sh_eth_id_table[] = { { "sh7757-ether", (kernel_ulong_t)&sh7757_data }, { "sh7757-gether", (kernel_ulong_t)&sh7757_data_giga }, { "sh7763-gether", (kernel_ulong_t)&sh7763_data }, - { "r7s72100-ether", (kernel_ulong_t)&r7s72100_data }, - { "r8a7740-gether", (kernel_ulong_t)&r8a7740_data }, - { "r8a777x-ether", (kernel_ulong_t)&r8a777x_data }, - { "r8a7790-ether", (kernel_ulong_t)&r8a779x_data }, - { "r8a7791-ether", (kernel_ulong_t)&r8a779x_data }, - { "r8a7793-ether", (kernel_ulong_t)&r8a779x_data }, - { "r8a7794-ether", (kernel_ulong_t)&r8a779x_data }, { } }; MODULE_DEVICE_TABLE(platform, sh_eth_id_table); diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index e9f2349e98bc..a4ab71d43e4e 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4998,7 +4998,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number) dev->netdev_ops = &rocker_port_netdev_ops; dev->ethtool_ops = &rocker_port_ethtool_ops; dev->switchdev_ops = &rocker_port_switchdev_ops; - netif_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, + netif_tx_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx, NAPI_POLL_WEIGHT); netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx, NAPI_POLL_WEIGHT); diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index bc6d21b471be..425df3dbc77d 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -181,13 +181,6 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) MCDI_WORD(outbuf, GET_CAPABILITIES_OUT_TX_DPCPU_FW_ID); if (!(nic_data->datapath_caps & - (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))) { - netif_err(efx, drv, efx->net_dev, - "current firmware does not support TSO\n"); - return -ENODEV; - } - - if (!(nic_data->datapath_caps & (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_PREFIX_LEN_14_LBN))) { netif_err(efx, probe, efx->net_dev, "current firmware does not support an RX prefix\n"); @@ -1797,6 +1790,12 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload, ESF_DZ_TX_OPTION_IP_CSUM, csum_offload); tx_queue->write_count = 1; + + if (nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) { + tx_queue->tso_version = 1; + } + wmb(); efx_ef10_push_tx_desc(tx_queue, txd); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index a3c42a376741..b405349a570c 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2059,7 +2059,6 @@ static void efx_init_napi_channel(struct efx_channel *channel) channel->napi_dev = efx->net_dev; netif_napi_add(channel->napi_dev, &channel->napi_str, efx_poll, napi_weight); - napi_hash_add(&channel->napi_str); efx_channel_busy_poll_init(channel); } @@ -2785,6 +2784,12 @@ static const struct pci_device_id efx_pci_table[] = { .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923), /* SFC9140 PF */ .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923), /* SFC9140 VF */ + .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03), /* SFC9220 PF */ + .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, + {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03), /* SFC9220 VF */ + .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, {0} /* end of list */ }; diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index a8ddd122f685..38c422321cda 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -182,6 +182,7 @@ struct efx_tx_buffer { * * @efx: The associated Efx NIC * @queue: DMA queue number + * @tso_version: Version of TSO in use for this queue. * @channel: The associated channel * @core_txq: The networking core TX queue structure * @buffer: The software buffer ring @@ -228,6 +229,7 @@ struct efx_tx_queue { /* Members which don't change on the fast path */ struct efx_nic *efx ____cacheline_aligned_in_smp; unsigned queue; + unsigned int tso_version; struct efx_channel *channel; struct netdev_queue *core_txq; struct efx_tx_buffer *buffer; @@ -1502,8 +1504,9 @@ static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue, * same cycle, the XMAC can miss the IPG altogether. We work around * this by adding a further 16 bytes. */ +#define EFX_FRAME_PAD 16 #define EFX_MAX_FRAME_LEN(mtu) \ - ((((mtu) + ETH_HLEN + VLAN_HLEN + 4/* FCS */ + 7) & ~7) + 16) + (ALIGN(((mtu) + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN + EFX_FRAME_PAD), 8)) static inline bool efx_xmit_with_hwtstamp(struct sk_buff *skb) { diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 809ea4610a77..8956995b2fe7 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -463,7 +463,6 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, skb_record_rx_queue(skb, channel->rx_queue.core_index); - skb_mark_napi_id(skb, &channel->napi_str); gro_result = napi_gro_frags(napi); if (gro_result != GRO_DROP) channel->irq_mod_score += 2; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 67f6afaa022f..f7a0ec1bca97 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -1010,13 +1010,17 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, /* Parse the SKB header and initialise state. */ static int tso_start(struct tso_state *st, struct efx_nic *efx, + struct efx_tx_queue *tx_queue, const struct sk_buff *skb) { - bool use_opt_desc = efx_nic_rev(efx) >= EFX_REV_HUNT_A0; struct device *dma_dev = &efx->pci_dev->dev; unsigned int header_len, in_len; + bool use_opt_desc = false; dma_addr_t dma_addr; + if (tx_queue->tso_version == 1) + use_opt_desc = true; + st->ip_off = skb_network_header(skb) - skb->data; st->tcp_off = skb_transport_header(skb) - skb->data; header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); @@ -1271,7 +1275,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, /* Find the packet protocol and sanity-check it */ state.protocol = efx_tso_check_protocol(skb); - rc = tso_start(&state, efx, skb); + rc = tso_start(&state, efx, tx_queue, skb); if (rc) goto mem_err; diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 623c6ed8764a..f4518bc2cd28 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -137,6 +137,31 @@ struct stmmac_extra_stats { unsigned long pcs_link; unsigned long pcs_duplex; unsigned long pcs_speed; + /* debug register */ + unsigned long mtl_tx_status_fifo_full; + unsigned long mtl_tx_fifo_not_empty; + unsigned long mmtl_fifo_ctrl; + unsigned long mtl_tx_fifo_read_ctrl_write; + unsigned long mtl_tx_fifo_read_ctrl_wait; + unsigned long mtl_tx_fifo_read_ctrl_read; + unsigned long mtl_tx_fifo_read_ctrl_idle; + unsigned long mac_tx_in_pause; + unsigned long mac_tx_frame_ctrl_xfer; + unsigned long mac_tx_frame_ctrl_idle; + unsigned long mac_tx_frame_ctrl_wait; + unsigned long mac_tx_frame_ctrl_pause; + unsigned long mac_gmii_tx_proto_engine; + unsigned long mtl_rx_fifo_fill_level_full; + unsigned long mtl_rx_fifo_fill_above_thresh; + unsigned long mtl_rx_fifo_fill_below_thresh; + unsigned long mtl_rx_fifo_fill_level_empty; + unsigned long mtl_rx_fifo_read_ctrl_flush; + unsigned long mtl_rx_fifo_read_ctrl_read_data; + unsigned long mtl_rx_fifo_read_ctrl_status; + unsigned long mtl_rx_fifo_read_ctrl_idle; + unsigned long mtl_rx_fifo_ctrl_active; + unsigned long mac_rx_frame_ctrl_fifo; + unsigned long mac_gmii_rx_proto_engine; }; /* CSR Frequency Access Defines*/ @@ -408,6 +433,7 @@ struct stmmac_ops { void (*set_eee_pls)(struct mac_device_info *hw, int link); void (*ctrl_ane)(struct mac_device_info *hw, bool restart); void (*get_adv)(struct mac_device_info *hw, struct rgmii_adv *adv); + void (*debug)(void __iomem *ioaddr, struct stmmac_extra_stats *x); }; /* PTP and HW Timer helpers */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 82de68b1a452..36d3355f2fb0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -198,19 +198,19 @@ static int ipq806x_gmac_set_speed(struct ipq806x_gmac *gmac, unsigned int speed) return 0; } -static void *ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) +static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) { struct device *dev = &gmac->pdev->dev; gmac->phy_mode = of_get_phy_mode(dev->of_node); if (gmac->phy_mode < 0) { dev_err(dev, "missing phy mode property\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (of_property_read_u32(dev->of_node, "qcom,id", &gmac->id) < 0) { dev_err(dev, "missing qcom id property\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } /* The GMACs are called 1 to 4 in the documentation, but to simplify the @@ -219,13 +219,13 @@ static void *ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) */ if (gmac->id < 0 || gmac->id > 3) { dev_err(dev, "invalid gmac id\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } gmac->core_clk = devm_clk_get(dev, "stmmaceth"); if (IS_ERR(gmac->core_clk)) { dev_err(dev, "missing stmmaceth clk property\n"); - return gmac->core_clk; + return PTR_ERR(gmac->core_clk); } clk_set_rate(gmac->core_clk, 266000000); @@ -234,18 +234,16 @@ static void *ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) "qcom,nss-common"); if (IS_ERR(gmac->nss_common)) { dev_err(dev, "missing nss-common node\n"); - return gmac->nss_common; + return PTR_ERR(gmac->nss_common); } /* Setup the register map for the qsgmii csr registers */ gmac->qsgmii_csr = syscon_regmap_lookup_by_phandle(dev->of_node, "qcom,qsgmii-csr"); - if (IS_ERR(gmac->qsgmii_csr)) { + if (IS_ERR(gmac->qsgmii_csr)) dev_err(dev, "missing qsgmii-csr node\n"); - return gmac->qsgmii_csr; - } - return NULL; + return PTR_ERR_OR_ZERO(gmac->qsgmii_csr); } static void ipq806x_gmac_fix_mac_speed(void *priv, unsigned int speed) @@ -262,7 +260,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct ipq806x_gmac *gmac; int val; - void *err; + int err; val = stmmac_get_platform_resources(pdev, &stmmac_res); if (val) @@ -279,9 +277,9 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) gmac->pdev = pdev; err = ipq806x_gmac_of_parse(gmac); - if (IS_ERR(err)) { + if (err) { dev_err(dev, "device tree parsing error\n"); - return PTR_ERR(err); + return err; } regmap_write(gmac->qsgmii_csr, QSGMII_PCS_CAL_LCKDT_CTL, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index b3fe0575ff6b..8831a053ac13 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -34,6 +34,7 @@ #define GMAC_FLOW_CTRL 0x00000018 /* Flow Control */ #define GMAC_VLAN_TAG 0x0000001c /* VLAN Tag */ #define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ +#define GMAC_DEBUG 0x00000024 /* GMAC debug register */ #define GMAC_WAKEUP_FILTER 0x00000028 /* Wake-up Frame Filter */ #define GMAC_INT_STATUS 0x00000038 /* interrupt status register */ @@ -177,6 +178,47 @@ enum inter_frame_gap { #define GMAC_FLOW_CTRL_TFE 0x00000002 /* Tx Flow Control Enable */ #define GMAC_FLOW_CTRL_FCB_BPA 0x00000001 /* Flow Control Busy ... */ +/* DEBUG Register defines */ +/* MTL TxStatus FIFO */ +#define GMAC_DEBUG_TXSTSFSTS BIT(25) /* MTL TxStatus FIFO Full Status */ +#define GMAC_DEBUG_TXFSTS BIT(24) /* MTL Tx FIFO Not Empty Status */ +#define GMAC_DEBUG_TWCSTS BIT(22) /* MTL Tx FIFO Write Controller */ +/* MTL Tx FIFO Read Controller Status */ +#define GMAC_DEBUG_TRCSTS_MASK GENMASK(21, 20) +#define GMAC_DEBUG_TRCSTS_SHIFT 20 +#define GMAC_DEBUG_TRCSTS_IDLE 0 +#define GMAC_DEBUG_TRCSTS_READ 1 +#define GMAC_DEBUG_TRCSTS_TXW 2 +#define GMAC_DEBUG_TRCSTS_WRITE 3 +#define GMAC_DEBUG_TXPAUSED BIT(19) /* MAC Transmitter in PAUSE */ +/* MAC Transmit Frame Controller Status */ +#define GMAC_DEBUG_TFCSTS_MASK GENMASK(18, 17) +#define GMAC_DEBUG_TFCSTS_SHIFT 17 +#define GMAC_DEBUG_TFCSTS_IDLE 0 +#define GMAC_DEBUG_TFCSTS_WAIT 1 +#define GMAC_DEBUG_TFCSTS_GEN_PAUSE 2 +#define GMAC_DEBUG_TFCSTS_XFER 3 +/* MAC GMII or MII Transmit Protocol Engine Status */ +#define GMAC_DEBUG_TPESTS BIT(16) +#define GMAC_DEBUG_RXFSTS_MASK GENMASK(9, 8) /* MTL Rx FIFO Fill-level */ +#define GMAC_DEBUG_RXFSTS_SHIFT 8 +#define GMAC_DEBUG_RXFSTS_EMPTY 0 +#define GMAC_DEBUG_RXFSTS_BT 1 +#define GMAC_DEBUG_RXFSTS_AT 2 +#define GMAC_DEBUG_RXFSTS_FULL 3 +#define GMAC_DEBUG_RRCSTS_MASK GENMASK(6, 5) /* MTL Rx FIFO Read Controller */ +#define GMAC_DEBUG_RRCSTS_SHIFT 5 +#define GMAC_DEBUG_RRCSTS_IDLE 0 +#define GMAC_DEBUG_RRCSTS_RDATA 1 +#define GMAC_DEBUG_RRCSTS_RSTAT 2 +#define GMAC_DEBUG_RRCSTS_FLUSH 3 +#define GMAC_DEBUG_RWCSTS BIT(4) /* MTL Rx FIFO Write Controller Active */ +/* MAC Receive Frame Controller FIFO Status */ +#define GMAC_DEBUG_RFCFCSTS_MASK GENMASK(2, 1) +#define GMAC_DEBUG_RFCFCSTS_SHIFT 1 +/* MAC GMII or MII Receive Protocol Engine Status */ +#define GMAC_DEBUG_RPESTS BIT(0) + /*--- DMA BLOCK defines ---*/ /* DMA Bus Mode register defines */ #define DMA_BUS_MODE_SFT_RESET 0x00000001 /* Software Reset */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 371a669d69fd..c2941172f6d1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -397,6 +397,80 @@ static void dwmac1000_get_adv(struct mac_device_info *hw, struct rgmii_adv *adv) adv->lp_pause = (value & GMAC_ANE_PSE) >> GMAC_ANE_PSE_SHIFT; } +static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x) +{ + u32 value = readl(ioaddr + GMAC_DEBUG); + + if (value & GMAC_DEBUG_TXSTSFSTS) + x->mtl_tx_status_fifo_full++; + if (value & GMAC_DEBUG_TXFSTS) + x->mtl_tx_fifo_not_empty++; + if (value & GMAC_DEBUG_TWCSTS) + x->mmtl_fifo_ctrl++; + if (value & GMAC_DEBUG_TRCSTS_MASK) { + u32 trcsts = (value & GMAC_DEBUG_TRCSTS_MASK) + >> GMAC_DEBUG_TRCSTS_SHIFT; + if (trcsts == GMAC_DEBUG_TRCSTS_WRITE) + x->mtl_tx_fifo_read_ctrl_write++; + else if (trcsts == GMAC_DEBUG_TRCSTS_TXW) + x->mtl_tx_fifo_read_ctrl_wait++; + else if (trcsts == GMAC_DEBUG_TRCSTS_READ) + x->mtl_tx_fifo_read_ctrl_read++; + else + x->mtl_tx_fifo_read_ctrl_idle++; + } + if (value & GMAC_DEBUG_TXPAUSED) + x->mac_tx_in_pause++; + if (value & GMAC_DEBUG_TFCSTS_MASK) { + u32 tfcsts = (value & GMAC_DEBUG_TFCSTS_MASK) + >> GMAC_DEBUG_TFCSTS_SHIFT; + + if (tfcsts == GMAC_DEBUG_TFCSTS_XFER) + x->mac_tx_frame_ctrl_xfer++; + else if (tfcsts == GMAC_DEBUG_TFCSTS_GEN_PAUSE) + x->mac_tx_frame_ctrl_pause++; + else if (tfcsts == GMAC_DEBUG_TFCSTS_WAIT) + x->mac_tx_frame_ctrl_wait++; + else + x->mac_tx_frame_ctrl_idle++; + } + if (value & GMAC_DEBUG_TPESTS) + x->mac_gmii_tx_proto_engine++; + if (value & GMAC_DEBUG_RXFSTS_MASK) { + u32 rxfsts = (value & GMAC_DEBUG_RXFSTS_MASK) + >> GMAC_DEBUG_RRCSTS_SHIFT; + + if (rxfsts == GMAC_DEBUG_RXFSTS_FULL) + x->mtl_rx_fifo_fill_level_full++; + else if (rxfsts == GMAC_DEBUG_RXFSTS_AT) + x->mtl_rx_fifo_fill_above_thresh++; + else if (rxfsts == GMAC_DEBUG_RXFSTS_BT) + x->mtl_rx_fifo_fill_below_thresh++; + else + x->mtl_rx_fifo_fill_level_empty++; + } + if (value & GMAC_DEBUG_RRCSTS_MASK) { + u32 rrcsts = (value & GMAC_DEBUG_RRCSTS_MASK) >> + GMAC_DEBUG_RRCSTS_SHIFT; + + if (rrcsts == GMAC_DEBUG_RRCSTS_FLUSH) + x->mtl_rx_fifo_read_ctrl_flush++; + else if (rrcsts == GMAC_DEBUG_RRCSTS_RSTAT) + x->mtl_rx_fifo_read_ctrl_read_data++; + else if (rrcsts == GMAC_DEBUG_RRCSTS_RDATA) + x->mtl_rx_fifo_read_ctrl_status++; + else + x->mtl_rx_fifo_read_ctrl_idle++; + } + if (value & GMAC_DEBUG_RWCSTS) + x->mtl_rx_fifo_ctrl_active++; + if (value & GMAC_DEBUG_RFCFCSTS_MASK) + x->mac_rx_frame_ctrl_fifo = (value & GMAC_DEBUG_RFCFCSTS_MASK) + >> GMAC_DEBUG_RFCFCSTS_SHIFT; + if (value & GMAC_DEBUG_RPESTS) + x->mac_gmii_rx_proto_engine++; +} + static const struct stmmac_ops dwmac1000_ops = { .core_init = dwmac1000_core_init, .rx_ipc = dwmac1000_rx_ipc_enable, @@ -413,6 +487,7 @@ static const struct stmmac_ops dwmac1000_ops = { .set_eee_pls = dwmac1000_set_eee_pls, .ctrl_ane = dwmac1000_ctrl_ane, .get_adv = dwmac1000_get_adv, + .debug = dwmac1000_debug, }; struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 2e51b816a7e8..4c6486cc80fb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -136,6 +136,31 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = { STMMAC_STAT(irq_pcs_ane_n), STMMAC_STAT(irq_pcs_link_n), STMMAC_STAT(irq_rgmii_n), + /* DEBUG */ + STMMAC_STAT(mtl_tx_status_fifo_full), + STMMAC_STAT(mtl_tx_fifo_not_empty), + STMMAC_STAT(mmtl_fifo_ctrl), + STMMAC_STAT(mtl_tx_fifo_read_ctrl_write), + STMMAC_STAT(mtl_tx_fifo_read_ctrl_wait), + STMMAC_STAT(mtl_tx_fifo_read_ctrl_read), + STMMAC_STAT(mtl_tx_fifo_read_ctrl_idle), + STMMAC_STAT(mac_tx_in_pause), + STMMAC_STAT(mac_tx_frame_ctrl_xfer), + STMMAC_STAT(mac_tx_frame_ctrl_idle), + STMMAC_STAT(mac_tx_frame_ctrl_wait), + STMMAC_STAT(mac_tx_frame_ctrl_pause), + STMMAC_STAT(mac_gmii_tx_proto_engine), + STMMAC_STAT(mtl_rx_fifo_fill_level_full), + STMMAC_STAT(mtl_rx_fifo_fill_above_thresh), + STMMAC_STAT(mtl_rx_fifo_fill_below_thresh), + STMMAC_STAT(mtl_rx_fifo_fill_level_empty), + STMMAC_STAT(mtl_rx_fifo_read_ctrl_flush), + STMMAC_STAT(mtl_rx_fifo_read_ctrl_read_data), + STMMAC_STAT(mtl_rx_fifo_read_ctrl_status), + STMMAC_STAT(mtl_rx_fifo_read_ctrl_idle), + STMMAC_STAT(mtl_rx_fifo_ctrl_active), + STMMAC_STAT(mac_rx_frame_ctrl_fifo), + STMMAC_STAT(mac_gmii_rx_proto_engine), }; #define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats) @@ -497,6 +522,11 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, if (val) priv->xstats.phy_eee_wakeup_error_n = val; } + + if ((priv->hw->mac->debug) && + (priv->synopsys_id >= DWMAC_CORE_3_50)) + priv->hw->mac->debug(priv->ioaddr, + (void *)&priv->xstats); } for (i = 0; i < STMMAC_STATS_LEN; i++) { char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 48b92c9de12a..15322c08de80 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2469,7 +2469,7 @@ static int cpsw_probe(struct platform_device *pdev) ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; netif_napi_add(ndev, &priv->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT); - netif_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); + netif_tx_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); /* register the network device */ SET_NETDEV_DEV(ndev, &pdev->dev); diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 37b9b39192ec..e5e20e734f21 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1990,7 +1990,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device, /* NAPI register */ netif_napi_add(ndev, &netcp->rx_napi, netcp_rx_poll, NETCP_NAPI_WEIGHT); - netif_napi_add(ndev, &netcp->tx_napi, netcp_tx_poll, NETCP_NAPI_WEIGHT); + netif_tx_napi_add(ndev, &netcp->tx_napi, netcp_tx_poll, NETCP_NAPI_WEIGHT); /* Register the network device */ ndev->dev_id = 0; |