diff options
Diffstat (limited to 'drivers/net/ethernet')
450 files changed, 37108 insertions, 8516 deletions
diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index c5987f518cb2..b648e3f95c01 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -367,7 +367,7 @@ static struct net_device *corkscrew_scan(int unit); static int corkscrew_setup(struct net_device *dev, int ioaddr, struct pnp_dev *idev, int card_number); static int corkscrew_open(struct net_device *dev); -static void corkscrew_timer(unsigned long arg); +static void corkscrew_timer(struct timer_list *t); static netdev_tx_t corkscrew_start_xmit(struct sk_buff *skb, struct net_device *dev); static int corkscrew_rx(struct net_device *dev); @@ -627,7 +627,7 @@ static int corkscrew_setup(struct net_device *dev, int ioaddr, spin_lock_init(&vp->lock); - setup_timer(&vp->timer, corkscrew_timer, (unsigned long) dev); + timer_setup(&vp->timer, corkscrew_timer, 0); /* Read the station address from the EEPROM. */ EL3WINDOW(0); @@ -869,11 +869,11 @@ static int corkscrew_open(struct net_device *dev) return 0; } -static void corkscrew_timer(unsigned long data) +static void corkscrew_timer(struct timer_list *t) { #ifdef AUTOMEDIA - struct net_device *dev = (struct net_device *) data; - struct corkscrew_private *vp = netdev_priv(dev); + struct corkscrew_private *vp = from_timer(vp, t, timer); + struct net_device *dev = vp->our_dev; int ioaddr = dev->base_addr; unsigned long flags; int ok = 0; diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c index 47c844cc9d27..3044a6f35f04 100644 --- a/drivers/net/ethernet/3com/3c574_cs.c +++ b/drivers/net/ethernet/3com/3c574_cs.c @@ -225,7 +225,7 @@ static unsigned short read_eeprom(unsigned int ioaddr, int index); static void tc574_wait_for_completion(struct net_device *dev, int cmd); static void tc574_reset(struct net_device *dev); -static void media_check(unsigned long arg); +static void media_check(struct timer_list *t); static int el3_open(struct net_device *dev); static netdev_tx_t el3_start_xmit(struct sk_buff *skb, struct net_device *dev); @@ -377,7 +377,7 @@ static int tc574_config(struct pcmcia_device *link) lp->autoselect = config & Autoselect ? 1 : 0; } - init_timer(&lp->media); + timer_setup(&lp->media, media_check, 0); { int phy; @@ -681,8 +681,6 @@ static int el3_open(struct net_device *dev) netif_start_queue(dev); tc574_reset(dev); - lp->media.function = media_check; - lp->media.data = (unsigned long) dev; lp->media.expires = jiffies + HZ; add_timer(&lp->media); @@ -859,10 +857,10 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) (and as a last resort, poll the NIC for events), and to monitor the MII, reporting changes in cable status. */ -static void media_check(unsigned long arg) +static void media_check(struct timer_list *t) { - struct net_device *dev = (struct net_device *) arg; - struct el3_private *lp = netdev_priv(dev); + struct el3_private *lp = from_timer(lp, t, media); + struct net_device *dev = lp->p_dev->priv; unsigned int ioaddr = dev->base_addr; unsigned long flags; unsigned short /* cable, */ media, partner; @@ -1048,6 +1046,7 @@ static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) switch(cmd) { case SIOCGMIIPHY: /* Get the address of the PHY in use. */ data->phy_id = phy; + /* fall through */ case SIOCGMIIREG: /* Read the specified MII register. */ { int saved_window; diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c index e28254a00599..2b2695311bda 100644 --- a/drivers/net/ethernet/3com/3c589_cs.c +++ b/drivers/net/ethernet/3com/3c589_cs.c @@ -163,7 +163,7 @@ static void tc589_release(struct pcmcia_device *link); static u16 read_eeprom(unsigned int ioaddr, int index); static void tc589_reset(struct net_device *dev); -static void media_check(unsigned long arg); +static void media_check(struct timer_list *t); static int el3_config(struct net_device *dev, struct ifmap *map); static int el3_open(struct net_device *dev); static netdev_tx_t el3_start_xmit(struct sk_buff *skb, @@ -517,7 +517,7 @@ static int el3_open(struct net_device *dev) netif_start_queue(dev); tc589_reset(dev); - setup_timer(&lp->media, media_check, (unsigned long)dev); + timer_setup(&lp->media, media_check, 0); mod_timer(&lp->media, jiffies + HZ); dev_dbg(&link->dev, "%s: opened, status %4.4x.\n", @@ -676,10 +676,10 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) return IRQ_RETVAL(handled); } -static void media_check(unsigned long arg) +static void media_check(struct timer_list *t) { - struct net_device *dev = (struct net_device *)(arg); - struct el3_private *lp = netdev_priv(dev); + struct el3_private *lp = from_timer(lp, t, media); + struct net_device *dev = lp->p_dev->priv; unsigned int ioaddr = dev->base_addr; u16 media, errs; unsigned long flags; diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 402d9090ad29..f4e13a7014bd 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -759,8 +759,8 @@ static int vortex_open(struct net_device *dev); static void mdio_sync(struct vortex_private *vp, int bits); static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *vp, int phy_id, int location, int value); -static void vortex_timer(unsigned long arg); -static void rx_oom_timer(unsigned long arg); +static void vortex_timer(struct timer_list *t); +static void rx_oom_timer(struct timer_list *t); static netdev_tx_t vortex_start_xmit(struct sk_buff *skb, struct net_device *dev); static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb, @@ -1599,9 +1599,9 @@ vortex_up(struct net_device *dev) dev->name, media_tbl[dev->if_port].name); } - setup_timer(&vp->timer, vortex_timer, (unsigned long)dev); + timer_setup(&vp->timer, vortex_timer, 0); mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait)); - setup_timer(&vp->rx_oom_timer, rx_oom_timer, (unsigned long)dev); + timer_setup(&vp->rx_oom_timer, rx_oom_timer, 0); if (vortex_debug > 1) pr_debug("%s: Initial media type %s.\n", @@ -1784,10 +1784,10 @@ out: } static void -vortex_timer(unsigned long data) +vortex_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct vortex_private *vp = netdev_priv(dev); + struct vortex_private *vp = from_timer(vp, t, timer); + struct net_device *dev = vp->mii.dev; void __iomem *ioaddr = vp->ioaddr; int next_tick = 60*HZ; int ok = 0; @@ -2687,10 +2687,10 @@ boomerang_rx(struct net_device *dev) * for some memory. Otherwise there is no way to restart the rx process. */ static void -rx_oom_timer(unsigned long arg) +rx_oom_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)arg; - struct vortex_private *vp = netdev_priv(dev); + struct vortex_private *vp = from_timer(vp, t, rx_oom_timer); + struct net_device *dev = vp->mii.dev; spin_lock_irq(&vp->lock); if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) /* This test is redundant, but makes me feel good */ diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 3da1fc539ef9..7bddb8efb6d5 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -85,7 +85,7 @@ static struct net_device_stats *get_stats(struct net_device *dev); static void set_multicast_list(struct net_device *dev); static void axnet_tx_timeout(struct net_device *dev); static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); -static void ei_watchdog(u_long arg); +static void ei_watchdog(struct timer_list *t); static void axnet_reset_8390(struct net_device *dev); static int mdio_read(unsigned int addr, int phy_id, int loc); @@ -483,7 +483,7 @@ static int axnet_open(struct net_device *dev) link->open++; info->link_status = 0x00; - setup_timer(&info->watchdog, ei_watchdog, (u_long)dev); + timer_setup(&info->watchdog, ei_watchdog, 0); mod_timer(&info->watchdog, jiffies + HZ); return ax_open(dev); @@ -547,10 +547,10 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id) return ax_interrupt(irq, dev_id); } -static void ei_watchdog(u_long arg) +static void ei_watchdog(struct timer_list *t) { - struct net_device *dev = (struct net_device *)(arg); - struct axnet_dev *info = PRIV(dev); + struct axnet_dev *info = from_timer(info, t, watchdog); + struct net_device *dev = info->p_dev->priv; unsigned int nic_base = dev->base_addr; unsigned int mii_addr = nic_base + AXNET_MII_EEP; u_short link; diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index bd0a2a14b649..bcad4a7fac9f 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -99,7 +99,7 @@ static int pcnet_open(struct net_device *dev); static int pcnet_close(struct net_device *dev); static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static irqreturn_t ei_irq_wrapper(int irq, void *dev_id); -static void ei_watchdog(u_long arg); +static void ei_watchdog(struct timer_list *t); static void pcnet_reset_8390(struct net_device *dev); static int set_config(struct net_device *dev, struct ifmap *map); static int setup_shmem_window(struct pcmcia_device *link, int start_pg, @@ -917,7 +917,7 @@ static int pcnet_open(struct net_device *dev) info->phy_id = info->eth_phy; info->link_status = 0x00; - setup_timer(&info->watchdog, ei_watchdog, (u_long)dev); + timer_setup(&info->watchdog, ei_watchdog, 0); mod_timer(&info->watchdog, jiffies + HZ); return ei_open(dev); @@ -1006,10 +1006,10 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id) return ret; } -static void ei_watchdog(u_long arg) +static void ei_watchdog(struct timer_list *t) { - struct net_device *dev = (struct net_device *)arg; - struct pcnet_dev *info = PRIV(dev); + struct pcnet_dev *info = from_timer(info, t, watchdog); + struct net_device *dev = info->p_dev->priv; unsigned int nic_base = dev->base_addr; unsigned int mii_addr = nic_base + DLINK_GPIO; u_short link; @@ -1107,6 +1107,7 @@ static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) switch (cmd) { case SIOCGMIIPHY: data->phy_id = info->phy_id; + /* fall through */ case SIOCGMIIREG: /* Read MII PHY register. */ data->val_out = mdio_read(mii_addr, data->phy_id, data->reg_num & 0x1f); return 0; diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index a251de8d9a91..0658cde1586a 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -1650,9 +1650,8 @@ static int bfin_mac_probe(struct platform_device *pdev) ndev->netdev_ops = &bfin_mac_netdev_ops; ndev->ethtool_ops = &bfin_mac_ethtool_ops; - init_timer(&lp->tx_reclaim_timer); - lp->tx_reclaim_timer.data = (unsigned long)lp; - lp->tx_reclaim_timer.function = tx_reclaim_skb_timeout; + setup_timer(&lp->tx_reclaim_timer, tx_reclaim_skb_timeout, + (unsigned long)lp); lp->flags = 0; netif_napi_add(ndev, &lp->napi, bfin_mac_poll, CONFIG_BFIN_RX_DESC_NUM); diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 54eff90e2f02..658e92f79d36 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3624,11 +3624,10 @@ static int et131x_open(struct net_device *netdev) int result; /* Start the timer to track NIC errors */ - init_timer(&adapter->error_timer); + setup_timer(&adapter->error_timer, et131x_error_timer_handler, + (unsigned long)adapter); adapter->error_timer.expires = jiffies + msecs_to_jiffies(TX_ERROR_PERIOD); - adapter->error_timer.function = et131x_error_timer_handler; - adapter->error_timer.data = (unsigned long)adapter; add_timer(&adapter->error_timer); result = request_irq(irq, et131x_isr, diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c index 15a8096c60df..0b60921c392f 100644 --- a/drivers/net/ethernet/alacritech/slicoss.c +++ b/drivers/net/ethernet/alacritech/slicoss.c @@ -355,10 +355,10 @@ static void slic_xmit_complete(struct slic_device *sdev) { struct slic_tx_queue *txq = &sdev->txq; struct net_device *dev = sdev->netdev; - unsigned int idx = txq->done_idx; struct slic_tx_buffer *buff; unsigned int frames = 0; unsigned int bytes = 0; + unsigned int idx; /* Limit processing to SLIC_MAX_TX_COMPLETIONS frames to avoid that new * completions during processing keeps the loop running endlessly. diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h index 305dc1996b4e..4532e574ebcd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -627,6 +627,12 @@ enum ena_admin_flow_hash_proto { ENA_ADMIN_RSS_NOT_IP = 7, + /* TCPv6 with extension header */ + ENA_ADMIN_RSS_TCP6_EX = 8, + + /* IPv6 with extension header */ + ENA_ADMIN_RSS_IP6_EX = 9, + ENA_ADMIN_RSS_PROTO_NUM = 16, }; diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 52beba8c7a39..bf2de5298005 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -63,6 +63,8 @@ #define ENA_REGS_ADMIN_INTR_MASK 1 +#define ENA_POLL_MS 5 + /*****************************************************************************/ /*****************************************************************************/ /*****************************************************************************/ @@ -315,7 +317,7 @@ static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue cmd_size_in_bytes, comp, comp_size_in_bytes); - if (unlikely(IS_ERR(comp_ctx))) + if (IS_ERR(comp_ctx)) admin_queue->running_state = false; spin_unlock_irqrestore(&admin_queue->q_lock, flags); @@ -533,7 +535,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c goto err; } - msleep(100); + msleep(ENA_POLL_MS); } if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) { @@ -746,6 +748,9 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, { u32 val, i; + /* Convert timeout from resolution of 100ms to ENA_POLL_MS */ + timeout = (timeout * 100) / ENA_POLL_MS; + for (i = 0; i < timeout; i++) { val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); @@ -758,8 +763,7 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, exp_state) return 0; - /* The resolution of the timeout is 100ms */ - msleep(100); + msleep(ENA_POLL_MS); } return -ETIME; @@ -1130,7 +1134,7 @@ int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue, comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size, comp, comp_size); - if (unlikely(IS_ERR(comp_ctx))) { + if (IS_ERR(comp_ctx)) { if (comp_ctx == ERR_PTR(-ENODEV)) pr_debug("Failed to submit command [%ld]\n", PTR_ERR(comp_ctx)); @@ -1253,7 +1257,7 @@ void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev) spin_lock_irqsave(&admin_queue->q_lock, flags); while (atomic_read(&admin_queue->outstanding_cmds) != 0) { spin_unlock_irqrestore(&admin_queue->q_lock, flags); - msleep(20); + msleep(ENA_POLL_MS); spin_lock_irqsave(&admin_queue->q_lock, flags); } spin_unlock_irqrestore(&admin_queue->q_lock, flags); diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 967020fb26ee..060cb18fa659 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -60,8 +60,8 @@ struct ena_stats { static const struct ena_stats ena_stats_global_strings[] = { ENA_STAT_GLOBAL_ENTRY(tx_timeout), - ENA_STAT_GLOBAL_ENTRY(io_suspend), - ENA_STAT_GLOBAL_ENTRY(io_resume), + ENA_STAT_GLOBAL_ENTRY(suspend), + ENA_STAT_GLOBAL_ENTRY(resume), ENA_STAT_GLOBAL_ENTRY(wd_expired), ENA_STAT_GLOBAL_ENTRY(interface_up), ENA_STAT_GLOBAL_ENTRY(interface_down), @@ -81,6 +81,7 @@ static const struct ena_stats ena_stats_tx_strings[] = { ENA_STAT_TX_ENTRY(doorbells), ENA_STAT_TX_ENTRY(prepare_ctx_err), ENA_STAT_TX_ENTRY(bad_req_id), + ENA_STAT_TX_ENTRY(missed_tx), }; static const struct ena_stats ena_stats_rx_strings[] = { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index c6bd5e24005d..7451922c209d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -517,7 +517,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) rc = ena_alloc_rx_page(rx_ring, rx_info, - __GFP_COLD | GFP_ATOMIC | __GFP_COMP); + GFP_ATOMIC | __GFP_COMP); if (unlikely(rc < 0)) { netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, "failed to alloc buffer for rx queue %d\n", @@ -2361,38 +2361,6 @@ static const struct net_device_ops ena_netdev_ops = { #endif /* CONFIG_NET_POLL_CONTROLLER */ }; -static void ena_device_io_suspend(struct work_struct *work) -{ - struct ena_adapter *adapter = - container_of(work, struct ena_adapter, suspend_io_task); - struct net_device *netdev = adapter->netdev; - - /* ena_napi_disable_all disables only the IO handling. - * We are still subject to AENQ keep alive watchdog. - */ - u64_stats_update_begin(&adapter->syncp); - adapter->dev_stats.io_suspend++; - u64_stats_update_begin(&adapter->syncp); - ena_napi_disable_all(adapter); - netif_tx_lock(netdev); - netif_device_detach(netdev); - netif_tx_unlock(netdev); -} - -static void ena_device_io_resume(struct work_struct *work) -{ - struct ena_adapter *adapter = - container_of(work, struct ena_adapter, resume_io_task); - struct net_device *netdev = adapter->netdev; - - u64_stats_update_begin(&adapter->syncp); - adapter->dev_stats.io_resume++; - u64_stats_update_end(&adapter->syncp); - - netif_device_attach(netdev); - ena_napi_enable_all(adapter); -} - static int ena_device_validate_params(struct ena_adapter *adapter, struct ena_com_dev_get_features_ctx *get_feat_ctx) { @@ -2561,38 +2529,31 @@ err_disable_msix: return rc; } -static void ena_fw_reset_device(struct work_struct *work) +static void ena_destroy_device(struct ena_adapter *adapter) { - struct ena_com_dev_get_features_ctx get_feat_ctx; - struct ena_adapter *adapter = - container_of(work, struct ena_adapter, reset_task); struct net_device *netdev = adapter->netdev; struct ena_com_dev *ena_dev = adapter->ena_dev; - struct pci_dev *pdev = adapter->pdev; - bool dev_up, wd_state; - int rc; - - if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { - dev_err(&pdev->dev, - "device reset schedule while reset bit is off\n"); - return; - } + bool dev_up; netif_carrier_off(netdev); del_timer_sync(&adapter->timer_service); - rtnl_lock(); - dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + adapter->dev_up_before_reset = dev_up; + ena_com_set_admin_running_state(ena_dev, false); - /* After calling ena_close the tx queues and the napi - * are disabled so no one can interfere or touch the - * data structures - */ ena_close(netdev); + /* Before releasing the ENA resources, a device reset is required. + * (to prevent the device from accessing them). + * In case the reset flag is set and the device is up, ena_close + * already perform the reset, so it can be skipped. + */ + if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) + ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); + ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); @@ -2606,9 +2567,17 @@ static void ena_fw_reset_device(struct work_struct *work) ena_com_mmio_reg_read_request_destroy(ena_dev); adapter->reset_reason = ENA_REGS_RESET_NORMAL; + clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); +} - /* Finish with the destroy part. Start the init part */ +static int ena_restore_device(struct ena_adapter *adapter) +{ + struct ena_com_dev_get_features_ctx get_feat_ctx; + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct pci_dev *pdev = adapter->pdev; + bool wd_state; + int rc; rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state); if (rc) { @@ -2630,7 +2599,7 @@ static void ena_fw_reset_device(struct work_struct *work) goto err_device_destroy; } /* If the interface was up before the reset bring it up */ - if (dev_up) { + if (adapter->dev_up_before_reset) { rc = ena_up(adapter); if (rc) { dev_err(&pdev->dev, "Failed to create I/O queues\n"); @@ -2639,24 +2608,38 @@ static void ena_fw_reset_device(struct work_struct *work) } mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); - - rtnl_unlock(); - dev_err(&pdev->dev, "Device reset completed successfully\n"); - return; + return rc; err_disable_msix: ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); err_device_destroy: ena_com_admin_destroy(ena_dev); err: - rtnl_unlock(); - clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); dev_err(&pdev->dev, "Reset attempt failed. Can not reset the device\n"); + + return rc; +} + +static void ena_fw_reset_device(struct work_struct *work) +{ + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, reset_task); + struct pci_dev *pdev = adapter->pdev; + + if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + dev_err(&pdev->dev, + "device reset schedule while reset bit is off\n"); + return; + } + rtnl_lock(); + ena_destroy_device(adapter); + ena_restore_device(adapter); + rtnl_unlock(); } static int check_missing_comp_in_queue(struct ena_adapter *adapter, @@ -2665,7 +2648,7 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter, struct ena_tx_buffer *tx_buf; unsigned long last_jiffies; u32 missed_tx = 0; - int i; + int i, rc = 0; for (i = 0; i < tx_ring->ring_size; i++) { tx_buf = &tx_ring->tx_buffer_info[i]; @@ -2679,21 +2662,25 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter, tx_buf->print_once = 1; missed_tx++; - - if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) { - netif_err(adapter, tx_err, adapter->netdev, - "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n", - missed_tx, - adapter->missing_tx_completion_threshold); - adapter->reset_reason = - ENA_REGS_RESET_MISS_TX_CMPL; - set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); - return -EIO; - } } } - return 0; + if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) { + netif_err(adapter, tx_err, adapter->netdev, + "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n", + missed_tx, + adapter->missing_tx_completion_threshold); + adapter->reset_reason = + ENA_REGS_RESET_MISS_TX_CMPL; + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + rc = -EIO; + } + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.missed_tx = missed_tx; + u64_stats_update_end(&tx_ring->syncp); + + return rc; } static void check_for_missing_tx_completions(struct ena_adapter *adapter) @@ -3276,8 +3263,6 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_rss; } - INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend); - INIT_WORK(&adapter->resume_io_task, ena_device_io_resume); INIT_WORK(&adapter->reset_task, ena_fw_reset_device); adapter->last_keep_alive_jiffies = jiffies; @@ -3311,8 +3296,6 @@ err_free_msix: err_worker_destroy: ena_com_destroy_interrupt_moderation(ena_dev); del_timer(&adapter->timer_service); - cancel_work_sync(&adapter->suspend_io_task); - cancel_work_sync(&adapter->resume_io_task); err_netdev_destroy: free_netdev(netdev); err_device_destroy: @@ -3382,10 +3365,6 @@ static void ena_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->reset_task); - cancel_work_sync(&adapter->suspend_io_task); - - cancel_work_sync(&adapter->resume_io_task); - /* Reset the device only if the device is running. */ if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) ena_com_dev_reset(ena_dev, adapter->reset_reason); @@ -3419,11 +3398,59 @@ static void ena_remove(struct pci_dev *pdev) vfree(ena_dev); } +#ifdef CONFIG_PM +/* ena_suspend - PM suspend callback + * @pdev: PCI device information struct + * @state:power state + */ +static int ena_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct ena_adapter *adapter = pci_get_drvdata(pdev); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.suspend++; + u64_stats_update_end(&adapter->syncp); + + rtnl_lock(); + if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + dev_err(&pdev->dev, + "ignoring device reset request as the device is being suspended\n"); + clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } + ena_destroy_device(adapter); + rtnl_unlock(); + return 0; +} + +/* ena_resume - PM resume callback + * @pdev: PCI device information struct + * + */ +static int ena_resume(struct pci_dev *pdev) +{ + struct ena_adapter *adapter = pci_get_drvdata(pdev); + int rc; + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.resume++; + u64_stats_update_end(&adapter->syncp); + + rtnl_lock(); + rc = ena_restore_device(adapter); + rtnl_unlock(); + return rc; +} +#endif + static struct pci_driver ena_pci_driver = { .name = DRV_MODULE_NAME, .id_table = ena_pci_tbl, .probe = ena_probe, .remove = ena_remove, +#ifdef CONFIG_PM + .suspend = ena_suspend, + .resume = ena_resume, +#endif .sriov_configure = ena_sriov_configure, }; @@ -3504,16 +3531,6 @@ static void ena_notification(void *adapter_data, ENA_ADMIN_NOTIFICATION); switch (aenq_e->aenq_common_desc.syndrom) { - case ENA_ADMIN_SUSPEND: - /* Suspend just the IO queues. - * We deliberately don't suspend admin so the timer and - * the keep_alive events should remain. - */ - queue_work(ena_wq, &adapter->suspend_io_task); - break; - case ENA_ADMIN_RESUME: - queue_work(ena_wq, &adapter->resume_io_task); - break; case ENA_ADMIN_UPDATE_HINTS: hints = (struct ena_admin_ena_hw_hints *) (&aenq_e->inline_data_w4); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 29bb5704260b..ed8bd0a579c4 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -44,7 +44,7 @@ #include "ena_eth_com.h" #define DRV_MODULE_VER_MAJOR 1 -#define DRV_MODULE_VER_MINOR 2 +#define DRV_MODULE_VER_MINOR 3 #define DRV_MODULE_VER_SUBMINOR 0 #define DRV_MODULE_NAME "ena" @@ -52,7 +52,7 @@ #define DRV_MODULE_VERSION \ __stringify(DRV_MODULE_VER_MAJOR) "." \ __stringify(DRV_MODULE_VER_MINOR) "." \ - __stringify(DRV_MODULE_VER_SUBMINOR) "k" + __stringify(DRV_MODULE_VER_SUBMINOR) "K" #endif #define DEVICE_NAME "Elastic Network Adapter (ENA)" @@ -185,6 +185,7 @@ struct ena_stats_tx { u64 tx_poll; u64 doorbells; u64 bad_req_id; + u64 missed_tx; }; struct ena_stats_rx { @@ -257,8 +258,8 @@ struct ena_ring { struct ena_stats_dev { u64 tx_timeout; - u64 io_suspend; - u64 io_resume; + u64 suspend; + u64 resume; u64 wd_expired; u64 interface_up; u64 interface_down; @@ -326,11 +327,10 @@ struct ena_adapter { /* timer service */ struct work_struct reset_task; - struct work_struct suspend_io_task; - struct work_struct resume_io_task; struct timer_list timer_service; bool wd_state; + bool dev_up_before_reset; unsigned long last_keep_alive_jiffies; struct u64_stats_sync syncp; diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index e22f976a0d18..212fe72a190b 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -123,6 +123,7 @@ struct lance_private { int burst_sizes; /* ledma SBus burst sizes */ #endif struct timer_list multicast_timer; + struct net_device *dev; }; #define LANCE_ADDR(x) ((int)(x) & ~0xff000000) @@ -638,6 +639,13 @@ static void lance_set_multicast(struct net_device *dev) netif_wake_queue(dev); } +static void lance_set_multicast_retry(struct timer_list *t) +{ + struct lance_private *lp = from_timer(lp, t, multicast_timer); + + lance_set_multicast(lp->dev); +} + static int a2065_init_one(struct zorro_dev *z, const struct zorro_device_id *ent); static void a2065_remove_one(struct zorro_dev *z); @@ -728,15 +736,13 @@ static int a2065_init_one(struct zorro_dev *z, priv->lance_log_tx_bufs = LANCE_LOG_TX_BUFFERS; priv->rx_ring_mod_mask = RX_RING_MOD_MASK; priv->tx_ring_mod_mask = TX_RING_MOD_MASK; + priv->dev = dev; dev->netdev_ops = &lance_netdev_ops; dev->watchdog_timeo = 5*HZ; dev->dma = 0; - init_timer(&priv->multicast_timer); - priv->multicast_timer.data = (unsigned long) dev; - priv->multicast_timer.function = - (void (*)(unsigned long))lance_set_multicast; + timer_setup(&priv->multicast_timer, lance_set_multicast_retry, 0); err = register_netdev(dev); if (err) { diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index b11e910850f7..01d132c02ff9 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -302,10 +302,10 @@ am79c961_init_for_open(struct net_device *dev) write_rreg (dev->base_addr, CSR0, CSR0_IENA|CSR0_STRT); } -static void am79c961_timer(unsigned long data) +static void am79c961_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct dev_priv *priv = netdev_priv(dev); + struct dev_priv *priv = from_timer(priv, t, timer); + struct net_device *dev = priv->dev; unsigned int lnkstat, carrier; unsigned long flags; @@ -728,9 +728,8 @@ static int am79c961_probe(struct platform_device *pdev) am79c961_banner(); spin_lock_init(&priv->chip_lock); - init_timer(&priv->timer); - priv->timer.data = (unsigned long)dev; - priv->timer.function = am79c961_timer; + priv->dev = dev; + timer_setup(&priv->timer, am79c961_timer, 0); if (am79c961_hw_init(dev)) goto release; diff --git a/drivers/net/ethernet/amd/am79c961a.h b/drivers/net/ethernet/amd/am79c961a.h index 9f384b79507b..fc5088c70731 100644 --- a/drivers/net/ethernet/amd/am79c961a.h +++ b/drivers/net/ethernet/amd/am79c961a.h @@ -140,6 +140,7 @@ struct dev_priv { unsigned long txhdr; spinlock_t chip_lock; struct timer_list timer; + struct net_device *dev; }; #endif diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 7b5df562f30f..358f7ab77c70 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1669,9 +1669,9 @@ static int amd8111e_resume(struct pci_dev *pci_dev) return 0; } -static void amd8111e_config_ipg(struct net_device *dev) +static void amd8111e_config_ipg(struct timer_list *t) { - struct amd8111e_priv *lp = netdev_priv(dev); + struct amd8111e_priv *lp = from_timer(lp, t, ipg_data.ipg_timer); struct ipg_info *ipg_data = &lp->ipg_data; void __iomem *mmio = lp->mmio; unsigned int prev_col_cnt = ipg_data->col_cnt; @@ -1883,9 +1883,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev, /* Initialize software ipg timer */ if(lp->options & OPTION_DYN_IPG_ENABLE){ - init_timer(&lp->ipg_data.ipg_timer); - lp->ipg_data.ipg_timer.data = (unsigned long) dev; - lp->ipg_data.ipg_timer.function = (void *)&amd8111e_config_ipg; + timer_setup(&lp->ipg_data.ipg_timer, amd8111e_config_ipg, 0); lp->ipg_data.ipg_timer.expires = jiffies + IPG_CONVERGE_JIFFIES; lp->ipg_data.ipg = DEFAULT_IPG; diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 82cc81385033..116997a8b593 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -260,6 +260,7 @@ struct lance_private { unsigned short busmaster_regval; struct timer_list multicast_timer; + struct net_device *dev; /* Pointers to the ring buffers as seen from the CPU */ char *rx_buf_ptr_cpu[RX_RING_SIZE]; @@ -1000,9 +1001,10 @@ static void lance_set_multicast(struct net_device *dev) netif_wake_queue(dev); } -static void lance_set_multicast_retry(unsigned long _opaque) +static void lance_set_multicast_retry(struct timer_list *t) { - struct net_device *dev = (struct net_device *) _opaque; + struct lance_private *lp = from_timer(lp, t, multicast_timer); + struct net_device *dev = lp->dev; lance_set_multicast(dev); } @@ -1246,9 +1248,9 @@ static int dec_lance_probe(struct device *bdev, const int type) * can occur from interrupts (ex. IPv6). So we * use a timer to try again later when necessary. -DaveM */ - init_timer(&lp->multicast_timer); - lp->multicast_timer.data = (unsigned long) dev; - lp->multicast_timer.function = lance_set_multicast_retry; + lp->dev = dev; + timer_setup(&lp->multicast_timer, lance_set_multicast_retry, 0); + ret = register_netdev(dev); if (ret) { diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 7f60d17819ce..a561705f232c 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -321,7 +321,7 @@ static struct net_device_stats *pcnet32_get_stats(struct net_device *); static void pcnet32_load_multicast(struct net_device *dev); static void pcnet32_set_multicast_list(struct net_device *); static int pcnet32_ioctl(struct net_device *, struct ifreq *, int); -static void pcnet32_watchdog(struct net_device *); +static void pcnet32_watchdog(struct timer_list *); static int mdio_read(struct net_device *dev, int phy_id, int reg_num); static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val); @@ -1970,9 +1970,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) lp->options |= PCNET32_PORT_MII; } - init_timer(&lp->watchdog_timer); - lp->watchdog_timer.data = (unsigned long)dev; - lp->watchdog_timer.function = (void *)&pcnet32_watchdog; + timer_setup(&lp->watchdog_timer, pcnet32_watchdog, 0); /* The PCNET32-specific entries in the device structure. */ dev->netdev_ops = &pcnet32_netdev_ops; @@ -2902,9 +2900,10 @@ static void pcnet32_check_media(struct net_device *dev, int verbose) * Could possibly be changed to use mii_check_media instead. */ -static void pcnet32_watchdog(struct net_device *dev) +static void pcnet32_watchdog(struct timer_list *t) { - struct pcnet32_private *lp = netdev_priv(dev); + struct pcnet32_private *lp = from_timer(lp, t, watchdog_timer); + struct net_device *dev = lp->dev; unsigned long flags; /* Print the link status if it has changed */ diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 291ca5187f12..cdd7a611479b 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1248,9 +1248,10 @@ static void lance_set_multicast(struct net_device *dev) netif_wake_queue(dev); } -static void lance_set_multicast_retry(unsigned long _opaque) +static void lance_set_multicast_retry(struct timer_list *t) { - struct net_device *dev = (struct net_device *) _opaque; + struct lance_private *lp = from_timer(lp, t, multicast_timer); + struct net_device *dev = lp->dev; lance_set_multicast(dev); } @@ -1459,9 +1460,7 @@ no_link_test: * can occur from interrupts (ex. IPv6). So we * use a timer to try again later when necessary. -DaveM */ - init_timer(&lp->multicast_timer); - lp->multicast_timer.data = (unsigned long) dev; - lp->multicast_timer.function = lance_set_multicast_retry; + timer_setup(&lp->multicast_timer, lance_set_multicast_retry, 0); if (register_netdev(dev)) { printk(KERN_ERR "SunLance: Cannot register device.\n"); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c index 45d92304068e..cc1e4f820e64 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c @@ -295,7 +295,7 @@ again: order = alloc_order; /* Try to obtain pages, decreasing order if necessary */ - gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP | __GFP_NOWARN; + gfp = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; while (order >= 0) { pages = alloc_pages_node(node, gfp, order); if (pages) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 608693d11bd7..a74a8fbad53a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -642,9 +642,9 @@ static irqreturn_t xgbe_dma_isr(int irq, void *data) return IRQ_HANDLED; } -static void xgbe_tx_timer(unsigned long data) +static void xgbe_tx_timer(struct timer_list *t) { - struct xgbe_channel *channel = (struct xgbe_channel *)data; + struct xgbe_channel *channel = from_timer(channel, t, tx_timer); struct xgbe_prv_data *pdata = channel->pdata; struct napi_struct *napi; @@ -680,9 +680,9 @@ static void xgbe_service(struct work_struct *work) pdata->phy_if.phy_status(pdata); } -static void xgbe_service_timer(unsigned long data) +static void xgbe_service_timer(struct timer_list *t) { - struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data; + struct xgbe_prv_data *pdata = from_timer(pdata, t, service_timer); queue_work(pdata->dev_workqueue, &pdata->service_work); @@ -694,16 +694,14 @@ static void xgbe_init_timers(struct xgbe_prv_data *pdata) struct xgbe_channel *channel; unsigned int i; - setup_timer(&pdata->service_timer, xgbe_service_timer, - (unsigned long)pdata); + timer_setup(&pdata->service_timer, xgbe_service_timer, 0); for (i = 0; i < pdata->channel_count; i++) { channel = pdata->channel[i]; if (!channel->tx_ring) break; - setup_timer(&channel->tx_timer, xgbe_tx_timer, - (unsigned long)channel); + timer_setup(&channel->tx_timer, xgbe_tx_timer, 0); } } @@ -2208,7 +2206,7 @@ static int xgbe_setup_tc(struct net_device *netdev, enum tc_setup_type type, struct tc_mqprio_qopt *mqprio = type_data; u8 tc; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c index eac740c476ce..5a655d289dd5 100644 --- a/drivers/net/ethernet/apple/bmac.c +++ b/drivers/net/ethernet/apple/bmac.c @@ -157,7 +157,7 @@ static irqreturn_t bmac_misc_intr(int irq, void *dev_id); static irqreturn_t bmac_txdma_intr(int irq, void *dev_id); static irqreturn_t bmac_rxdma_intr(int irq, void *dev_id); static void bmac_set_timeout(struct net_device *dev); -static void bmac_tx_timeout(unsigned long data); +static void bmac_tx_timeout(struct timer_list *t); static int bmac_output(struct sk_buff *skb, struct net_device *dev); static void bmac_start(struct net_device *dev); @@ -555,8 +555,6 @@ static inline void bmac_set_timeout(struct net_device *dev) if (bp->timeout_active) del_timer(&bp->tx_timeout); bp->tx_timeout.expires = jiffies + TX_TIMEOUT; - bp->tx_timeout.function = bmac_tx_timeout; - bp->tx_timeout.data = (unsigned long) dev; add_timer(&bp->tx_timeout); bp->timeout_active = 1; spin_unlock_irqrestore(&bp->lock, flags); @@ -1321,7 +1319,7 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match) bp->queue = (struct sk_buff_head *)(bp->rx_cmds + N_RX_RING + 1); skb_queue_head_init(bp->queue); - init_timer(&bp->tx_timeout); + timer_setup(&bp->tx_timeout, bmac_tx_timeout, 0); ret = request_irq(dev->irq, bmac_misc_intr, 0, "BMAC-misc", dev); if (ret) { @@ -1471,10 +1469,10 @@ bmac_output(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void bmac_tx_timeout(unsigned long data) +static void bmac_tx_timeout(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct bmac_data *bp = netdev_priv(dev); + struct bmac_data *bp = from_timer(bp, t, tx_timeout); + struct net_device *dev = macio_get_drvdata(bp->mdev); volatile struct dbdma_regs __iomem *td = bp->tx_dma; volatile struct dbdma_regs __iomem *rd = bp->rx_dma; volatile struct dbdma_cmd *cp; diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c index e58b157b7d7c..0b5429d76bcf 100644 --- a/drivers/net/ethernet/apple/mace.c +++ b/drivers/net/ethernet/apple/mace.c @@ -86,7 +86,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id); static irqreturn_t mace_txdma_intr(int irq, void *dev_id); static irqreturn_t mace_rxdma_intr(int irq, void *dev_id); static void mace_set_timeout(struct net_device *dev); -static void mace_tx_timeout(unsigned long data); +static void mace_tx_timeout(struct timer_list *t); static inline void dbdma_reset(volatile struct dbdma_regs __iomem *dma); static inline void mace_clean_rings(struct mace_data *mp); static void __mace_set_address(struct net_device *dev, void *addr); @@ -196,7 +196,7 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match) memset((char *) mp->tx_cmds, 0, (NCMDS_TX*N_TX_RING + N_RX_RING + 2) * sizeof(struct dbdma_cmd)); - init_timer(&mp->tx_timeout); + timer_setup(&mp->tx_timeout, mace_tx_timeout, 0); spin_lock_init(&mp->lock); mp->timeout_active = 0; @@ -521,8 +521,6 @@ static inline void mace_set_timeout(struct net_device *dev) if (mp->timeout_active) del_timer(&mp->tx_timeout); mp->tx_timeout.expires = jiffies + TX_TIMEOUT; - mp->tx_timeout.function = mace_tx_timeout; - mp->tx_timeout.data = (unsigned long) dev; add_timer(&mp->tx_timeout); mp->timeout_active = 1; } @@ -801,10 +799,10 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void mace_tx_timeout(unsigned long data) +static void mace_tx_timeout(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct mace_data *mp = netdev_priv(dev); + struct mace_data *mp = from_timer(mp, t, tx_timeout); + struct net_device *dev = macio_get_drvdata(mp->mdev); volatile struct mace __iomem *mb = mp->mace; volatile struct dbdma_regs __iomem *td = mp->tx_dma; volatile struct dbdma_regs __iomem *rd = mp->rx_dma; diff --git a/drivers/net/ethernet/aquantia/Kconfig b/drivers/net/ethernet/aquantia/Kconfig index cdf78e069a39..7d623e90dc19 100644 --- a/drivers/net/ethernet/aquantia/Kconfig +++ b/drivers/net/ethernet/aquantia/Kconfig @@ -9,7 +9,7 @@ config NET_VENDOR_AQUANTIA Set this to y if you have an Ethernet network cards that uses the aQuantia AQC107/AQC108 chipset. - This option does not build any drivers; it casues the aQuantia + This option does not build any drivers; it causes the aQuantia drivers that can be built to appear in the list of Ethernet drivers. diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index d5e99b468870..70efb7467bf3 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -221,8 +221,8 @@ static int aq_ethtool_get_rxnfc(struct net_device *ndev, return err; } -int aq_ethtool_get_coalesce(struct net_device *ndev, - struct ethtool_coalesce *coal) +static int aq_ethtool_get_coalesce(struct net_device *ndev, + struct ethtool_coalesce *coal) { struct aq_nic_s *aq_nic = netdev_priv(ndev); struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic); @@ -242,8 +242,8 @@ int aq_ethtool_get_coalesce(struct net_device *ndev, return 0; } -int aq_ethtool_set_coalesce(struct net_device *ndev, - struct ethtool_coalesce *coal) +static int aq_ethtool_set_coalesce(struct net_device *ndev, + struct ethtool_coalesce *coal) { struct aq_nic_s *aq_nic = netdev_priv(ndev); struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(aq_nic); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 0654e0c76bc2..519ca6534b85 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -304,8 +304,7 @@ int aq_ring_rx_fill(struct aq_ring_s *self) buff->flags = 0U; buff->len = AQ_CFG_RX_FRAME_MAX; - buff->page = alloc_pages(GFP_ATOMIC | __GFP_COLD | - __GFP_COMP, pages_order); + buff->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, pages_order); if (!buff->page) { err = -ENOMEM; goto err_exit; diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 67134ece1107..af75156919ed 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -184,6 +184,7 @@ config BGMAC_PLATFORM config SYSTEMPORT tristate "Broadcom SYSTEMPORT internal MAC support" depends on OF + depends on NET_DSA || !NET_DSA select MII select PHYLIB select FIXED_PHY diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index a1125d10c825..42e44fc03a18 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1474,10 +1474,8 @@ static int b44_open(struct net_device *dev) goto out; } - init_timer(&bp->timer); + setup_timer(&bp->timer, b44_timer, (unsigned long)bp); bp->timer.expires = jiffies + HZ; - bp->timer.data = (unsigned long) bp; - bp->timer.function = b44_timer; add_timer(&bp->timer); b44_enable_ints(bp); diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 4f3845a58126..d9346e2ac720 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -295,16 +295,13 @@ static int bcm_enet_refill_rx(struct net_device *dev) /* * timer callback to defer refill rx queue in case we're OOM */ -static void bcm_enet_refill_rx_timer(unsigned long data) +static void bcm_enet_refill_rx_timer(struct timer_list *t) { - struct net_device *dev; - struct bcm_enet_priv *priv; - - dev = (struct net_device *)data; - priv = netdev_priv(dev); + struct bcm_enet_priv *priv = from_timer(priv, t, rx_timeout); + struct net_device *dev = priv->net_dev; spin_lock(&priv->rx_lock); - bcm_enet_refill_rx((struct net_device *)data); + bcm_enet_refill_rx(dev); spin_unlock(&priv->rx_lock); } @@ -1062,7 +1059,8 @@ static int bcm_enet_open(struct net_device *dev) val = enet_readl(priv, ENET_CTL_REG); val |= ENET_CTL_ENABLE_MASK; enet_writel(priv, val, ENET_CTL_REG); - enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG); + if (priv->dma_has_sram) + enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG); enet_dmac_writel(priv, priv->dma_chan_en_mask, ENETDMAC_CHANCFG, priv->rx_chan); @@ -1721,10 +1719,8 @@ static int bcm_enet_probe(struct platform_device *pdev) const char *clk_name; int i, ret; - /* stop if shared driver failed, assume driver->probe will be - * called in the same order we register devices (correct ?) */ if (!bcm_enet_shared_base[0]) - return -ENODEV; + return -EPROBE_DEFER; res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); res_irq_rx = platform_get_resource(pdev, IORESOURCE_IRQ, 1); @@ -1768,12 +1764,14 @@ static int bcm_enet_probe(struct platform_device *pdev) clk_name = "enet1"; } - priv->mac_clk = clk_get(&pdev->dev, clk_name); + priv->mac_clk = devm_clk_get(&pdev->dev, clk_name); if (IS_ERR(priv->mac_clk)) { ret = PTR_ERR(priv->mac_clk); goto out; } - clk_prepare_enable(priv->mac_clk); + ret = clk_prepare_enable(priv->mac_clk); + if (ret) + goto out; /* initialize default and fetch platform data */ priv->rx_ring_size = BCMENET_DEF_RX_DESC; @@ -1801,13 +1799,15 @@ static int bcm_enet_probe(struct platform_device *pdev) if (priv->mac_id == 0 && priv->has_phy && !priv->use_external_mii) { /* using internal PHY, enable clock */ - priv->phy_clk = clk_get(&pdev->dev, "ephy"); + priv->phy_clk = devm_clk_get(&pdev->dev, "ephy"); if (IS_ERR(priv->phy_clk)) { ret = PTR_ERR(priv->phy_clk); priv->phy_clk = NULL; - goto out_put_clk_mac; + goto out_disable_clk_mac; } - clk_prepare_enable(priv->phy_clk); + ret = clk_prepare_enable(priv->phy_clk); + if (ret) + goto out_disable_clk_mac; } /* do minimal hardware init to be able to probe mii bus */ @@ -1857,9 +1857,7 @@ static int bcm_enet_probe(struct platform_device *pdev) spin_lock_init(&priv->rx_lock); /* init rx timeout (used for oom) */ - init_timer(&priv->rx_timeout); - priv->rx_timeout.function = bcm_enet_refill_rx_timer; - priv->rx_timeout.data = (unsigned long)dev; + timer_setup(&priv->rx_timeout, bcm_enet_refill_rx_timer, 0); /* init the mib update lock&work */ mutex_init(&priv->mib_update_lock); @@ -1901,14 +1899,10 @@ out_free_mdio: out_uninit_hw: /* turn off mdc clock */ enet_writel(priv, 0, ENET_MIISC_REG); - if (priv->phy_clk) { - clk_disable_unprepare(priv->phy_clk); - clk_put(priv->phy_clk); - } + clk_disable_unprepare(priv->phy_clk); -out_put_clk_mac: +out_disable_clk_mac: clk_disable_unprepare(priv->mac_clk); - clk_put(priv->mac_clk); out: free_netdev(dev); return ret; @@ -1944,12 +1938,8 @@ static int bcm_enet_remove(struct platform_device *pdev) } /* disable hw block clocks */ - if (priv->phy_clk) { - clk_disable_unprepare(priv->phy_clk); - clk_put(priv->phy_clk); - } + clk_disable_unprepare(priv->phy_clk); clk_disable_unprepare(priv->mac_clk); - clk_put(priv->mac_clk); free_netdev(dev); return 0; @@ -2021,9 +2011,9 @@ static inline int bcm_enet_port_is_rgmii(int portid) /* * enet sw PHY polling */ -static void swphy_poll_timer(unsigned long data) +static void swphy_poll_timer(struct timer_list *t) { - struct bcm_enet_priv *priv = (struct bcm_enet_priv *)data; + struct bcm_enet_priv *priv = from_timer(priv, t, swphy_poll); unsigned int i; for (i = 0; i < priv->num_ports; i++) { @@ -2332,11 +2322,8 @@ static int bcm_enetsw_open(struct net_device *dev) } /* start phy polling timer */ - init_timer(&priv->swphy_poll); - priv->swphy_poll.function = swphy_poll_timer; - priv->swphy_poll.data = (unsigned long)priv; - priv->swphy_poll.expires = jiffies; - add_timer(&priv->swphy_poll); + timer_setup(&priv->swphy_poll, swphy_poll_timer, 0); + mod_timer(&priv->swphy_poll, jiffies); return 0; out: @@ -2692,11 +2679,8 @@ static int bcm_enetsw_probe(struct platform_device *pdev) struct resource *res_mem; int ret, irq_rx, irq_tx; - /* stop if shared driver failed, assume driver->probe will be - * called in the same order we register devices (correct ?) - */ if (!bcm_enet_shared_base[0]) - return -ENODEV; + return -EPROBE_DEFER; res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); irq_rx = platform_get_irq(pdev, 0); @@ -2735,33 +2719,27 @@ static int bcm_enetsw_probe(struct platform_device *pdev) if (ret) goto out; - if (!request_mem_region(res_mem->start, resource_size(res_mem), - "bcm63xx_enetsw")) { - ret = -EBUSY; + priv->base = devm_ioremap_resource(&pdev->dev, res_mem); + if (IS_ERR(priv->base)) { + ret = PTR_ERR(priv->base); goto out; } - priv->base = ioremap(res_mem->start, resource_size(res_mem)); - if (priv->base == NULL) { - ret = -ENOMEM; - goto out_release_mem; - } - - priv->mac_clk = clk_get(&pdev->dev, "enetsw"); + priv->mac_clk = devm_clk_get(&pdev->dev, "enetsw"); if (IS_ERR(priv->mac_clk)) { ret = PTR_ERR(priv->mac_clk); - goto out_unmap; + goto out; } - clk_enable(priv->mac_clk); + ret = clk_prepare_enable(priv->mac_clk); + if (ret) + goto out; priv->rx_chan = 0; priv->tx_chan = 1; spin_lock_init(&priv->rx_lock); /* init rx timeout (used for oom) */ - init_timer(&priv->rx_timeout); - priv->rx_timeout.function = bcm_enet_refill_rx_timer; - priv->rx_timeout.data = (unsigned long)dev; + timer_setup(&priv->rx_timeout, bcm_enet_refill_rx_timer, 0); /* register netdevice */ dev->netdev_ops = &bcm_enetsw_ops; @@ -2773,7 +2751,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev) ret = register_netdev(dev); if (ret) - goto out_put_clk; + goto out_disable_clk; netif_carrier_off(dev); platform_set_drvdata(pdev, dev); @@ -2782,14 +2760,8 @@ static int bcm_enetsw_probe(struct platform_device *pdev) return 0; -out_put_clk: - clk_put(priv->mac_clk); - -out_unmap: - iounmap(priv->base); - -out_release_mem: - release_mem_region(res_mem->start, resource_size(res_mem)); +out_disable_clk: + clk_disable_unprepare(priv->mac_clk); out: free_netdev(dev); return ret; @@ -2801,17 +2773,13 @@ static int bcm_enetsw_remove(struct platform_device *pdev) { struct bcm_enet_priv *priv; struct net_device *dev; - struct resource *res; /* stop netdevice */ dev = platform_get_drvdata(pdev); priv = netdev_priv(dev); unregister_netdev(dev); - /* release device resources */ - iounmap(priv->base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); + clk_disable_unprepare(priv->mac_clk); free_netdev(dev); return 0; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.h b/drivers/net/ethernet/broadcom/bcm63xx_enet.h index c6f6f14e87ca..5a66728d4776 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h @@ -9,7 +9,6 @@ #include <linux/platform_device.h> #include <bcm63xx_regs.h> -#include <bcm63xx_irq.h> #include <bcm63xx_io.h> #include <bcm63xx_iudma.h> diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index eb441e5e2cd8..087f01b4dc3a 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1416,9 +1416,24 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, tdma_writel(priv, 0, TDMA_DESC_RING_COUNT(index)); tdma_writel(priv, 1, TDMA_DESC_RING_INTR_CONTROL(index)); tdma_writel(priv, 0, TDMA_DESC_RING_PROD_CONS_INDEX(index)); - tdma_writel(priv, RING_IGNORE_STATUS, TDMA_DESC_RING_MAPPING(index)); + + /* Configure QID and port mapping */ + reg = tdma_readl(priv, TDMA_DESC_RING_MAPPING(index)); + reg &= ~(RING_QID_MASK | RING_PORT_ID_MASK << RING_PORT_ID_SHIFT); + if (ring->inspect) { + reg |= ring->switch_queue & RING_QID_MASK; + reg |= ring->switch_port << RING_PORT_ID_SHIFT; + } else { + reg |= RING_IGNORE_STATUS; + } + tdma_writel(priv, reg, TDMA_DESC_RING_MAPPING(index)); tdma_writel(priv, 0, TDMA_DESC_RING_PCP_DEI_VID(index)); + /* Enable ACB algorithm 2 */ + reg = tdma_readl(priv, TDMA_CONTROL); + reg |= tdma_control_bit(priv, ACB_ALGO); + tdma_writel(priv, reg, TDMA_CONTROL); + /* Do not use tdma_control_bit() here because TSB_SWAP1 collides * with the original definition of ACB_ALGO */ @@ -1447,8 +1462,9 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv, napi_enable(&ring->napi); netif_dbg(priv, hw, priv->netdev, - "TDMA cfg, size=%d, desc_cpu=%p\n", - ring->size, ring->desc_cpu); + "TDMA cfg, size=%d, desc_cpu=%p switch q=%d,port=%d\n", + ring->size, ring->desc_cpu, ring->switch_queue, + ring->switch_port); return 0; } @@ -2013,6 +2029,29 @@ static const struct ethtool_ops bcm_sysport_ethtool_ops = { .set_link_ksettings = phy_ethtool_set_link_ksettings, }; +static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, + select_queue_fallback_t fallback) +{ + struct bcm_sysport_priv *priv = netdev_priv(dev); + u16 queue = skb_get_queue_mapping(skb); + struct bcm_sysport_tx_ring *tx_ring; + unsigned int q, port; + + if (!netdev_uses_dsa(dev)) + return fallback(dev, skb); + + /* DSA tagging layer will have configured the correct queue */ + q = BRCM_TAG_GET_QUEUE(queue); + port = BRCM_TAG_GET_PORT(queue); + tx_ring = priv->ring_map[q + port * priv->per_port_num_tx_queues]; + + if (unlikely(!tx_ring)) + return fallback(dev, skb); + + return tx_ring->index; +} + static const struct net_device_ops bcm_sysport_netdev_ops = { .ndo_start_xmit = bcm_sysport_xmit, .ndo_tx_timeout = bcm_sysport_tx_timeout, @@ -2025,8 +2064,79 @@ static const struct net_device_ops bcm_sysport_netdev_ops = { .ndo_poll_controller = bcm_sysport_poll_controller, #endif .ndo_get_stats64 = bcm_sysport_get_stats64, + .ndo_select_queue = bcm_sysport_select_queue, }; +static int bcm_sysport_map_queues(struct net_device *dev, + struct dsa_notifier_register_info *info) +{ + struct bcm_sysport_priv *priv = netdev_priv(dev); + struct bcm_sysport_tx_ring *ring; + struct net_device *slave_dev; + unsigned int num_tx_queues; + unsigned int q, start, port; + + /* We can't be setting up queue inspection for non directly attached + * switches + */ + if (info->switch_number) + return 0; + + if (dev->netdev_ops != &bcm_sysport_netdev_ops) + return 0; + + port = info->port_number; + slave_dev = info->info.dev; + + /* On SYSTEMPORT Lite we have twice as less queues, so we cannot do a + * 1:1 mapping, we can only do a 2:1 mapping. By reducing the number of + * per-port (slave_dev) network devices queue, we achieve just that. + * This need to happen now before any slave network device is used such + * it accurately reflects the number of real TX queues. + */ + if (priv->is_lite) + netif_set_real_num_tx_queues(slave_dev, + slave_dev->num_tx_queues / 2); + num_tx_queues = slave_dev->real_num_tx_queues; + + if (priv->per_port_num_tx_queues && + priv->per_port_num_tx_queues != num_tx_queues) + netdev_warn(slave_dev, "asymetric number of per-port queues\n"); + + priv->per_port_num_tx_queues = num_tx_queues; + + start = find_first_zero_bit(&priv->queue_bitmap, dev->num_tx_queues); + for (q = 0; q < num_tx_queues; q++) { + ring = &priv->tx_rings[q + start]; + + /* Just remember the mapping actual programming done + * during bcm_sysport_init_tx_ring + */ + ring->switch_queue = q; + ring->switch_port = port; + ring->inspect = true; + priv->ring_map[q + port * num_tx_queues] = ring; + + /* Set all queues as being used now */ + set_bit(q + start, &priv->queue_bitmap); + } + + return 0; +} + +static int bcm_sysport_dsa_notifier(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct dsa_notifier_register_info *info; + + if (event != DSA_PORT_REGISTER) + return NOTIFY_DONE; + + info = ptr; + + return notifier_from_errno(bcm_sysport_map_queues(info->master, info)); +} + #define REV_FMT "v%2x.%02x" static const struct bcm_sysport_hw_params bcm_sysport_params[] = { @@ -2174,10 +2284,18 @@ static int bcm_sysport_probe(struct platform_device *pdev) u64_stats_init(&priv->syncp); + priv->dsa_notifier.notifier_call = bcm_sysport_dsa_notifier; + + ret = register_dsa_notifier(&priv->dsa_notifier); + if (ret) { + dev_err(&pdev->dev, "failed to register DSA notifier\n"); + goto err_deregister_fixed_link; + } + ret = register_netdev(dev); if (ret) { dev_err(&pdev->dev, "failed to register net_device\n"); - goto err_deregister_fixed_link; + goto err_deregister_notifier; } priv->rev = topctrl_readl(priv, REV_CNTL) & REV_MASK; @@ -2190,6 +2308,8 @@ static int bcm_sysport_probe(struct platform_device *pdev) return 0; +err_deregister_notifier: + unregister_dsa_notifier(&priv->dsa_notifier); err_deregister_fixed_link: if (of_phy_is_fixed_link(dn)) of_phy_deregister_fixed_link(dn); @@ -2201,11 +2321,13 @@ err_free_netdev: static int bcm_sysport_remove(struct platform_device *pdev) { struct net_device *dev = dev_get_drvdata(&pdev->dev); + struct bcm_sysport_priv *priv = netdev_priv(dev); struct device_node *dn = pdev->dev.of_node; /* Not much to do, ndo_close has been called * and we use managed allocations */ + unregister_dsa_notifier(&priv->dsa_notifier); unregister_netdev(dev); if (of_phy_is_fixed_link(dn)) of_phy_deregister_fixed_link(dn); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 82e401df199e..f5a984c1c986 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -404,7 +404,7 @@ struct bcm_rsb { #define RING_CONS_INDEX_MASK 0xffff #define RING_MAPPING 0x14 -#define RING_QID_MASK 0x3 +#define RING_QID_MASK 0x7 #define RING_PORT_ID_SHIFT 3 #define RING_PORT_ID_MASK 0x7 #define RING_IGNORE_STATUS (1 << 6) @@ -712,6 +712,9 @@ struct bcm_sysport_tx_ring { struct bcm_sysport_priv *priv; /* private context backpointer */ unsigned long packets; /* packets statistics */ unsigned long bytes; /* bytes statistics */ + unsigned int switch_queue; /* switch port queue number */ + unsigned int switch_port; /* switch port queue number */ + bool inspect; /* inspect switch port and queue */ }; /* Driver private structure */ @@ -765,5 +768,12 @@ struct bcm_sysport_priv { /* For atomic update generic 64bit value on 32bit Machine */ struct u64_stats_sync syncp; + + /* map information between switch port queues and local queues */ + struct notifier_block dsa_notifier; + unsigned int per_port_num_tx_queues; + unsigned long queue_bitmap; + struct bcm_sysport_tx_ring *ring_map[DSA_MAX_PORTS * 8]; + }; #endif /* __BCM_SYSPORT_H */ diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index 6322594ab260..6fe074c1588b 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -184,13 +184,19 @@ static int bgmac_probe(struct bcma_device *core) if (!bgmac_is_bcm4707_family(core) && !(ci->id == BCMA_CHIP_ID_BCM53573 && core->core_unit == 1)) { + struct phy_device *phydev; + mii_bus = bcma_mdio_mii_register(bgmac); if (IS_ERR(mii_bus)) { err = PTR_ERR(mii_bus); goto err; } - bgmac->mii_bus = mii_bus; + + phydev = mdiobus_get_phy(bgmac->mii_bus, bgmac->phyaddr); + if (ci->id == BCMA_CHIP_ID_BCM53573 && phydev && + (phydev->drv->phy_id & phydev->drv->phy_id_mask) == PHY_ID_BCM54210E) + phydev->dev_flags |= PHY_BRCM_EN_MASTER_MODE; } if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) { diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c index d937083db9a4..894eda5b13cf 100644 --- a/drivers/net/ethernet/broadcom/bgmac-platform.c +++ b/drivers/net/ethernet/broadcom/bgmac-platform.c @@ -131,6 +131,7 @@ static void bgmac_nicpm_speed_set(struct net_device *net_dev) switch (bgmac->net_dev->phydev->speed) { default: netdev_err(net_dev, "Unsupported speed. Defaulting to 1000Mb\n"); + /* fall through */ case SPEED_1000: val |= NICPM_IOMUX_CTRL_SPD_1000M << NICPM_IOMUX_CTRL_SPD_SHIFT; break; diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 48d672b204a4..1d96cd594ade 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -15,6 +15,7 @@ #include <linux/bcm47xx_nvram.h> #include <linux/phy.h> #include <linux/phy_fixed.h> +#include <net/dsa.h> #include "bgmac.h" static bool bgmac_wait_value(struct bgmac *bgmac, u16 reg, u32 mask, @@ -127,6 +128,8 @@ bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring, dma_desc->ctl1 = cpu_to_le32(ctl1); } +#define ENET_BRCM_TAG_LEN 4 + static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, struct bgmac_dma_ring *ring, struct sk_buff *skb) @@ -139,6 +142,18 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac, u32 flags; int i; + /* The Ethernet switch we are interfaced with needs packets to be at + * least 64 bytes (including FCS) otherwise they will be discarded when + * they enter the switch port logic. When Broadcom tags are enabled, we + * need to make sure that packets are at least 68 bytes + * (including FCS and tag) because the length verification is done after + * the Broadcom tag is stripped off the ingress packet. + */ + if (netdev_uses_dsa(net_dev)) { + if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) + goto err_stats; + } + if (skb->len > BGMAC_DESC_CTL1_LEN) { netdev_err(bgmac->net_dev, "Too long skb (%d)\n", skb->len); goto err_drop; @@ -225,6 +240,7 @@ err_dma_head: err_drop: dev_kfree_skb(skb); +err_stats: net_dev->stats.tx_dropped++; net_dev->stats.tx_errors++; return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index e3af1f3cb61f..b3055a76dfbf 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8462,10 +8462,8 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) bnx2_set_default_link(bp); bp->req_flow_ctrl = FLOW_CTRL_RX | FLOW_CTRL_TX; - init_timer(&bp->timer); + setup_timer(&bp->timer, bnx2_timer, (unsigned long)bp); bp->timer.expires = RUN_AT(BNX2_TIMER_INTERVAL); - bp->timer.data = (unsigned long) bp; - bp->timer.function = bnx2_timer; #ifdef BCM_CNIC if (bnx2_shmem_rd(bp, BNX2_ISCSI_INITIATOR) & BNX2_ISCSI_INITIATOR_EN) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 1216c1f1e052..4c739d5355d2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -4289,7 +4289,7 @@ int __bnx2x_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct tc_mqprio_qopt *mqprio = type_data; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c12b4d3e946e..be9fd7d184d0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9332,7 +9332,7 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link) /* Schedule the rx_mode command */ if (test_bit(BNX2X_FILTER_RX_MODE_PENDING, &bp->sp_state)) set_bit(BNX2X_FILTER_RX_MODE_SCHED, &bp->sp_state); - else + else if (bp->slowpath) bnx2x_set_storm_rx_mode(bp); /* Cleanup multicast configuration */ @@ -10271,8 +10271,15 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work) smp_mb(); bnx2x_nic_unload(bp, UNLOAD_NORMAL, true); - bnx2x_nic_load(bp, LOAD_NORMAL); - + /* When ret value shows failure of allocation failure, + * the nic is rebooted again. If open still fails, a error + * message to notify the user. + */ + if (bnx2x_nic_load(bp, LOAD_NORMAL) == -ENOMEM) { + bnx2x_nic_unload(bp, UNLOAD_NORMAL, true); + if (bnx2x_nic_load(bp, LOAD_NORMAL)) + BNX2X_ERR("Open the NIC fails again!\n"); + } rtnl_unlock(); return; } @@ -12414,10 +12421,8 @@ static int bnx2x_init_bp(struct bnx2x *bp) bp->current_interval = CHIP_REV_IS_SLOW(bp) ? 5*HZ : HZ; - init_timer(&bp->timer); + setup_timer(&bp->timer, bnx2x_timer, (unsigned long)bp); bp->timer.expires = jiffies + bp->current_interval; - bp->timer.data = (unsigned long) bp; - bp->timer.function = bnx2x_timer; if (SHMEM2_HAS(bp, dcbx_lldp_params_offset) && SHMEM2_HAS(bp, dcbx_lldp_dcbx_stat_offset) && diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 9ca994d0bab6..3591077a5f6b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -1074,11 +1074,6 @@ static void bnx2x_vf_set_bars(struct bnx2x *bp, struct bnx2x_virtf *vf) } } -static int bnx2x_ari_enabled(struct pci_dev *dev) -{ - return dev->bus->self && dev->bus->self->ari_enabled; -} - static int bnx2x_get_vf_igu_cam_info(struct bnx2x *bp) { @@ -1212,7 +1207,7 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param, err = -EIO; /* verify ari is enabled */ - if (!bnx2x_ari_enabled(bp->pdev)) { + if (!pci_ari_enabled(bp->pdev->bus)) { BNX2X_ERR("ARI not supported (check pci bridge ARI forwarding), SRIOV can not be enabled\n"); return 0; } diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile index 4f0cb8e1ffc0..59c8ec9c1cad 100644 --- a/drivers/net/ethernet/broadcom/bnxt/Makefile +++ b/drivers/net/ethernet/broadcom/bnxt/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_BNXT) += bnxt_en.o -bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_tc.o +bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_devlink.o +bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index dc5de275352a..33c49ad697e4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -61,6 +61,7 @@ #include "bnxt_xdp.h" #include "bnxt_vfr.h" #include "bnxt_tc.h" +#include "bnxt_devlink.h" #define BNXT_TX_TIMEOUT (5 * HZ) @@ -107,9 +108,11 @@ enum board_idx { BCM57452, BCM57454, BCM58802, + BCM58804, BCM58808, NETXTREME_E_VF, NETXTREME_C_VF, + NETXTREME_S_VF, }; /* indexed by enum above */ @@ -145,9 +148,11 @@ static const struct { [BCM57452] = { "Broadcom BCM57452 NetXtreme-E 10Gb/25Gb/40Gb/50Gb Ethernet" }, [BCM57454] = { "Broadcom BCM57454 NetXtreme-E 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, [BCM58802] = { "Broadcom BCM58802 NetXtreme-S 10Gb/25Gb/40Gb/50Gb Ethernet" }, + [BCM58804] = { "Broadcom BCM58804 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, [BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, [NETXTREME_E_VF] = { "Broadcom NetXtreme-E Ethernet Virtual Function" }, [NETXTREME_C_VF] = { "Broadcom NetXtreme-C Ethernet Virtual Function" }, + [NETXTREME_S_VF] = { "Broadcom NetXtreme-S Ethernet Virtual Function" }, }; static const struct pci_device_id bnxt_pci_tbl[] = { @@ -185,6 +190,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x16f0), .driver_data = BCM58808 }, { PCI_VDEVICE(BROADCOM, 0x16f1), .driver_data = BCM57452 }, { PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 }, + { PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 }, #ifdef CONFIG_BNXT_SRIOV { PCI_VDEVICE(BROADCOM, 0x1606), .driver_data = NETXTREME_E_VF }, { PCI_VDEVICE(BROADCOM, 0x1609), .driver_data = NETXTREME_E_VF }, @@ -194,6 +200,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF }, { PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF }, { PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF }, + { PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF }, #endif { 0 } }; @@ -218,7 +225,8 @@ static struct workqueue_struct *bnxt_pf_wq; static bool bnxt_vf_pciid(enum board_idx idx) { - return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF); + return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF || + idx == NETXTREME_S_VF); } #define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID) @@ -1509,7 +1517,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, (struct rx_tpa_end_cmp *)rxcmp, (struct rx_tpa_end_cmp_ext *)rxcmp1, event); - if (unlikely(IS_ERR(skb))) + if (IS_ERR(skb)) return -EBUSY; rc = -ENOMEM; @@ -2827,7 +2835,8 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) if (page_mode) { if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) return -EOPNOTSUPP; - bp->dev->max_mtu = BNXT_MAX_PAGE_MODE_MTU; + bp->dev->max_mtu = + min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU); bp->flags &= ~BNXT_FLAG_AGG_RINGS; bp->flags |= BNXT_FLAG_NO_AGG_RINGS | BNXT_FLAG_RX_PAGE_MODE; bp->dev->hw_features &= ~NETIF_F_LRO; @@ -2835,7 +2844,7 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) bp->rx_dir = DMA_BIDIRECTIONAL; bp->rx_skb_func = bnxt_rx_page_skb; } else { - bp->dev->max_mtu = BNXT_MAX_MTU; + bp->dev->max_mtu = bp->max_mtu; bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE; bp->rx_dir = DMA_FROM_DEVICE; bp->rx_skb_func = bnxt_rx_skb; @@ -4528,19 +4537,46 @@ static int bnxt_hwrm_check_tx_rings(struct bnxt *bp, int tx_rings) return 0; } -static void bnxt_hwrm_set_coal_params(struct bnxt *bp, u32 max_bufs, - u32 buf_tmrs, u16 flags, +static void bnxt_hwrm_set_coal_params(struct bnxt_coal *hw_coal, struct hwrm_ring_cmpl_ring_cfg_aggint_params_input *req) { + u16 val, tmr, max, flags; + + max = hw_coal->bufs_per_record * 128; + if (hw_coal->budget) + max = hw_coal->bufs_per_record * hw_coal->budget; + + val = clamp_t(u16, hw_coal->coal_bufs, 1, max); + req->num_cmpl_aggr_int = cpu_to_le16(val); + + /* This is a 6-bit value and must not be 0, or we'll get non stop IRQ */ + val = min_t(u16, val, 63); + req->num_cmpl_dma_aggr = cpu_to_le16(val); + + /* This is a 6-bit value and must not be 0, or we'll get non stop IRQ */ + val = clamp_t(u16, hw_coal->coal_bufs_irq, 1, 63); + req->num_cmpl_dma_aggr_during_int = cpu_to_le16(val); + + tmr = BNXT_USEC_TO_COAL_TIMER(hw_coal->coal_ticks); + tmr = max_t(u16, tmr, 1); + req->int_lat_tmr_max = cpu_to_le16(tmr); + + /* min timer set to 1/2 of interrupt timer */ + val = tmr / 2; + req->int_lat_tmr_min = cpu_to_le16(val); + + /* buf timer set to 1/4 of interrupt timer */ + val = max_t(u16, tmr / 4, 1); + req->cmpl_aggr_dma_tmr = cpu_to_le16(val); + + tmr = BNXT_USEC_TO_COAL_TIMER(hw_coal->coal_ticks_irq); + tmr = max_t(u16, tmr, 1); + req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(tmr); + + flags = RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET; + if (hw_coal->idle_thresh && hw_coal->coal_ticks < hw_coal->idle_thresh) + flags |= RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_RING_IDLE; req->flags = cpu_to_le16(flags); - req->num_cmpl_dma_aggr = cpu_to_le16((u16)max_bufs); - req->num_cmpl_dma_aggr_during_int = cpu_to_le16(max_bufs >> 16); - req->cmpl_aggr_dma_tmr = cpu_to_le16((u16)buf_tmrs); - req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(buf_tmrs >> 16); - /* Minimum time between 2 interrupts set to buf_tmr x 2 */ - req->int_lat_tmr_min = cpu_to_le16((u16)buf_tmrs * 2); - req->int_lat_tmr_max = cpu_to_le16((u16)buf_tmrs * 4); - req->num_cmpl_aggr_int = cpu_to_le16((u16)max_bufs * 4); } int bnxt_hwrm_set_coal(struct bnxt *bp) @@ -4548,51 +4584,14 @@ int bnxt_hwrm_set_coal(struct bnxt *bp) int i, rc = 0; struct hwrm_ring_cmpl_ring_cfg_aggint_params_input req_rx = {0}, req_tx = {0}, *req; - u16 max_buf, max_buf_irq; - u16 buf_tmr, buf_tmr_irq; - u32 flags; bnxt_hwrm_cmd_hdr_init(bp, &req_rx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1); bnxt_hwrm_cmd_hdr_init(bp, &req_tx, HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS, -1, -1); - /* Each rx completion (2 records) should be DMAed immediately. - * DMA 1/4 of the completion buffers at a time. - */ - max_buf = min_t(u16, bp->rx_coal_bufs / 4, 2); - /* max_buf must not be zero */ - max_buf = clamp_t(u16, max_buf, 1, 63); - max_buf_irq = clamp_t(u16, bp->rx_coal_bufs_irq, 1, 63); - buf_tmr = BNXT_USEC_TO_COAL_TIMER(bp->rx_coal_ticks); - /* buf timer set to 1/4 of interrupt timer */ - buf_tmr = max_t(u16, buf_tmr / 4, 1); - buf_tmr_irq = BNXT_USEC_TO_COAL_TIMER(bp->rx_coal_ticks_irq); - buf_tmr_irq = max_t(u16, buf_tmr_irq, 1); - - flags = RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET; - - /* RING_IDLE generates more IRQs for lower latency. Enable it only - * if coal_ticks is less than 25 us. - */ - if (bp->rx_coal_ticks < 25) - flags |= RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_RING_IDLE; - - bnxt_hwrm_set_coal_params(bp, max_buf_irq << 16 | max_buf, - buf_tmr_irq << 16 | buf_tmr, flags, &req_rx); - - /* max_buf must not be zero */ - max_buf = clamp_t(u16, bp->tx_coal_bufs, 1, 63); - max_buf_irq = clamp_t(u16, bp->tx_coal_bufs_irq, 1, 63); - buf_tmr = BNXT_USEC_TO_COAL_TIMER(bp->tx_coal_ticks); - /* buf timer set to 1/4 of interrupt timer */ - buf_tmr = max_t(u16, buf_tmr / 4, 1); - buf_tmr_irq = BNXT_USEC_TO_COAL_TIMER(bp->tx_coal_ticks_irq); - buf_tmr_irq = max_t(u16, buf_tmr_irq, 1); - - flags = RING_CMPL_RING_CFG_AGGINT_PARAMS_REQ_FLAGS_TIMER_RESET; - bnxt_hwrm_set_coal_params(bp, max_buf_irq << 16 | max_buf, - buf_tmr_irq << 16 | buf_tmr, flags, &req_tx); + bnxt_hwrm_set_coal_params(&bp->rx_coal, &req_rx); + bnxt_hwrm_set_coal_params(&bp->tx_coal, &req_tx); mutex_lock(&bp->hwrm_cmd_lock); for (i = 0; i < bp->cp_nr_rings; i++) { @@ -4724,6 +4723,10 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp) else bp->br_mode = BRIDGE_MODE_UNDEF; + bp->max_mtu = le16_to_cpu(resp->max_mtu_configured); + if (!bp->max_mtu) + bp->max_mtu = BNXT_MAX_MTU; + func_qcfg_exit: mutex_unlock(&bp->hwrm_cmd_lock); return rc; @@ -4884,9 +4887,9 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) resp->hwrm_intf_upd); netdev_warn(bp->dev, "Please update firmware with HWRM interface 1.0.0 or newer.\n"); } - snprintf(bp->fw_ver_str, BC_HWRM_STR_LEN, "%d.%d.%d/%d.%d.%d", + snprintf(bp->fw_ver_str, BC_HWRM_STR_LEN, "%d.%d.%d.%d", resp->hwrm_fw_maj, resp->hwrm_fw_min, resp->hwrm_fw_bld, - resp->hwrm_intf_maj, resp->hwrm_intf_min, resp->hwrm_intf_upd); + resp->hwrm_fw_rsvd); bp->hwrm_cmd_timeout = le16_to_cpu(resp->def_req_timeout); if (!bp->hwrm_cmd_timeout) @@ -4912,16 +4915,14 @@ hwrm_ver_get_exit: int bnxt_hwrm_fw_set_time(struct bnxt *bp) { -#if IS_ENABLED(CONFIG_RTC_LIB) struct hwrm_fw_set_time_input req = {0}; - struct rtc_time tm; - struct timeval tv; + struct tm tm; + time64_t now = ktime_get_real_seconds(); if (bp->hwrm_spec_code < 0x10400) return -EOPNOTSUPP; - do_gettimeofday(&tv); - rtc_time_to_tm(tv.tv_sec, &tm); + time64_to_tm(now, 0, &tm); bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_SET_TIME, -1, -1); req.year = cpu_to_le16(1900 + tm.tm_year); req.month = 1 + tm.tm_mon; @@ -4930,9 +4931,6 @@ int bnxt_hwrm_fw_set_time(struct bnxt *bp) req.minute = tm.tm_min; req.second = tm.tm_sec; return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); -#else - return -EOPNOTSUPP; -#endif } static int bnxt_hwrm_port_qstats(struct bnxt *bp) @@ -6980,6 +6978,11 @@ static void bnxt_timer(unsigned long data) set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event); bnxt_queue_sp_work(bp); } + + if (bnxt_tc_flower_enabled(bp)) { + set_bit(BNXT_FLOW_STATS_SP_EVENT, &bp->sp_event); + bnxt_queue_sp_work(bp); + } bnxt_restart_timer: mod_timer(&bp->timer, jiffies + bp->current_interval); } @@ -7070,6 +7073,10 @@ static void bnxt_sp_task(struct work_struct *work) bnxt_get_port_module_status(bp); mutex_unlock(&bp->link_lock); } + + if (test_and_clear_bit(BNXT_FLOW_STATS_SP_EVENT, &bp->sp_event)) + bnxt_tc_flow_stats_work(bp); + /* These functions below will clear BNXT_STATE_IN_SP_TASK. They * must be the last functions to be called before exiting. */ @@ -7133,6 +7140,32 @@ static void bnxt_cleanup_pci(struct bnxt *bp) pci_disable_device(bp->pdev); } +static void bnxt_init_dflt_coal(struct bnxt *bp) +{ + struct bnxt_coal *coal; + + /* Tick values in micro seconds. + * 1 coal_buf x bufs_per_record = 1 completion record. + */ + coal = &bp->rx_coal; + coal->coal_ticks = 14; + coal->coal_bufs = 30; + coal->coal_ticks_irq = 1; + coal->coal_bufs_irq = 2; + coal->idle_thresh = 25; + coal->bufs_per_record = 2; + coal->budget = 64; /* NAPI budget */ + + coal = &bp->tx_coal; + coal->coal_ticks = 28; + coal->coal_bufs = 30; + coal->coal_ticks_irq = 2; + coal->coal_bufs_irq = 2; + coal->bufs_per_record = 1; + + bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS; +} + static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) { int rc; @@ -7201,22 +7234,9 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) bp->rx_ring_size = BNXT_DEFAULT_RX_RING_SIZE; bp->tx_ring_size = BNXT_DEFAULT_TX_RING_SIZE; - /* tick values in micro seconds */ - bp->rx_coal_ticks = 12; - bp->rx_coal_bufs = 30; - bp->rx_coal_ticks_irq = 1; - bp->rx_coal_bufs_irq = 2; + bnxt_init_dflt_coal(bp); - bp->tx_coal_ticks = 25; - bp->tx_coal_bufs = 30; - bp->tx_coal_ticks_irq = 2; - bp->tx_coal_bufs_irq = 2; - - bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS; - - init_timer(&bp->timer); - bp->timer.data = (unsigned long)bp; - bp->timer.function = bnxt_timer; + setup_timer(&bp->timer, bnxt_timer, (unsigned long)bp); bp->current_interval = BNXT_TIMER_INTERVAL; clear_bit(BNXT_STATE_OPEN, &bp->state); @@ -7243,13 +7263,13 @@ static int bnxt_change_mac_addr(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + if (ether_addr_equal(addr->sa_data, dev->dev_addr)) + return 0; + rc = bnxt_approve_mac(bp, addr->sa_data); if (rc) return rc; - if (ether_addr_equal(addr->sa_data, dev->dev_addr)) - return 0; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); if (netif_running(dev)) { bnxt_close_nic(bp, false, false); @@ -7321,24 +7341,49 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc) return 0; } -static int bnxt_setup_flower(struct net_device *dev, - struct tc_cls_flower_offload *cls_flower) +static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct bnxt *bp = cb_priv; + + if (!bnxt_tc_flower_enabled(bp) || !tc_can_offload(bp->dev)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int bnxt_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) { struct bnxt *bp = netdev_priv(dev); - if (BNXT_VF(bp)) + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; - return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, cls_flower); + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, bnxt_setup_tc_block_cb, + bp, bp); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, bnxt_setup_tc_block_cb, bp); + return 0; + default: + return -EOPNOTSUPP; + } } static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_CLSFLOWER: - return bnxt_setup_flower(dev, type_data); - case TC_SETUP_MQPRIO: { + case TC_SETUP_BLOCK: + return bnxt_setup_tc_block(dev, type_data); + case TC_SETUP_QDISC_MQPRIO: { struct tc_mqprio_qopt *mqprio = type_data; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; @@ -7725,7 +7770,7 @@ static const struct net_device_ops bnxt_netdev_ops = { #endif .ndo_udp_tunnel_add = bnxt_udp_tunnel_add, .ndo_udp_tunnel_del = bnxt_udp_tunnel_del, - .ndo_xdp = bnxt_xdp, + .ndo_bpf = bnxt_xdp, .ndo_bridge_getlink = bnxt_bridge_getlink, .ndo_bridge_setlink = bnxt_bridge_setlink, .ndo_get_phys_port_name = bnxt_get_phys_port_name @@ -8064,10 +8109,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->features |= dev->hw_features | NETIF_F_HIGHDMA; dev->priv_flags |= IFF_UNICAST_FLT; - /* MTU range: 60 - 9500 */ - dev->min_mtu = ETH_ZLEN; - dev->max_mtu = BNXT_MAX_MTU; - #ifdef CONFIG_BNXT_SRIOV init_waitqueue_head(&bp->sriov_cfg_wait); mutex_init(&bp->sriov_lock); @@ -8115,6 +8156,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bnxt_ethtool_init(bp); bnxt_dcb_init(bp); + /* MTU range: 60 - FW defined max */ + dev->min_mtu = ETH_ZLEN; + dev->max_mtu = bp->max_mtu; + rc = bnxt_probe_phy(bp); if (rc) goto init_err_pci_clean; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index c911e69ff25f..5359a1f0045f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -944,6 +944,22 @@ struct bnxt_test_info { #define BNXT_CAG_REG_LEGACY_INT_STATUS 0x4014 #define BNXT_CAG_REG_BASE 0x300000 +struct bnxt_coal { + u16 coal_ticks; + u16 coal_ticks_irq; + u16 coal_bufs; + u16 coal_bufs_irq; + /* RING_IDLE enabled when coal ticks < idle_thresh */ + u16 idle_thresh; + u8 bufs_per_record; + u8 budget; +}; + +struct bnxt_tc_flow_stats { + u64 packets; + u64 bytes; +}; + struct bnxt_tc_info { bool enabled; @@ -954,12 +970,29 @@ struct bnxt_tc_info { /* hash table to store L2 keys of TC flows */ struct rhashtable l2_table; struct rhashtable_params l2_ht_params; + /* hash table to store L2 keys for TC tunnel decap */ + struct rhashtable decap_l2_table; + struct rhashtable_params decap_l2_ht_params; + /* hash table to store tunnel decap entries */ + struct rhashtable decap_table; + struct rhashtable_params decap_ht_params; + /* hash table to store tunnel encap entries */ + struct rhashtable encap_table; + struct rhashtable_params encap_ht_params; /* lock to atomically add/del an l2 node when a flow is * added or deleted. */ struct mutex lock; + /* Fields used for batching stats query */ + struct rhashtable_iter iter; +#define BNXT_FLOW_STATS_BATCH_MAX 10 + struct bnxt_tc_stats_batch { + void *flow_node; + struct bnxt_tc_flow_stats hw_stats; + } stats_batch[BNXT_FLOW_STATS_BATCH_MAX]; + /* Stat counter mask (width) */ u64 bytes_mask; u64 packets_mask; @@ -1013,6 +1046,7 @@ struct bnxt { #define CHIP_NUM_5745X 0xd730 #define CHIP_NUM_58802 0xd802 +#define CHIP_NUM_58804 0xd804 #define CHIP_NUM_58808 0xd808 #define BNXT_CHIP_NUM_5730X(chip_num) \ @@ -1048,6 +1082,7 @@ struct bnxt { #define BNXT_CHIP_NUM_588XX(chip_num) \ ((chip_num) == CHIP_NUM_58802 || \ + (chip_num) == CHIP_NUM_58804 || \ (chip_num) == CHIP_NUM_58808) struct net_device *dev; @@ -1170,6 +1205,7 @@ struct bnxt { int nr_vnics; u32 rss_hash_cfg; + u16 max_mtu; u8 max_tc; u8 max_lltc; /* lossless TCs */ struct bnxt_queue_info q_info[BNXT_MAX_QUEUE]; @@ -1232,14 +1268,8 @@ struct bnxt { u8 port_count; u16 br_mode; - u16 rx_coal_ticks; - u16 rx_coal_ticks_irq; - u16 rx_coal_bufs; - u16 rx_coal_bufs_irq; - u16 tx_coal_ticks; - u16 tx_coal_ticks_irq; - u16 tx_coal_bufs; - u16 tx_coal_bufs_irq; + struct bnxt_coal rx_coal; + struct bnxt_coal tx_coal; #define BNXT_USEC_TO_COAL_TIMER(x) ((x) * 25 / 2) @@ -1265,6 +1295,7 @@ struct bnxt { #define BNXT_GENEVE_ADD_PORT_SP_EVENT 12 #define BNXT_GENEVE_DEL_PORT_SP_EVENT 13 #define BNXT_LINK_SPEED_CHNG_SP_EVENT 14 +#define BNXT_FLOW_STATS_SP_EVENT 15 struct bnxt_pf_info pf; #ifdef CONFIG_BNXT_SRIOV @@ -1315,7 +1346,7 @@ struct bnxt { enum devlink_eswitch_mode eswitch_mode; struct bnxt_vf_rep **vf_reps; /* array of vf-rep ptrs */ u16 *cfa_code_map; /* cfa_code -> vf_idx map */ - struct bnxt_tc_info tc_info; + struct bnxt_tc_info *tc_info; }; #define BNXT_RX_STATS_OFFSET(counter) \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c new file mode 100644 index 000000000000..402fa32f7a88 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -0,0 +1,65 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2017 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#include <linux/pci.h> +#include <linux/netdevice.h> +#include "bnxt_hsi.h" +#include "bnxt.h" +#include "bnxt_vfr.h" +#include "bnxt_devlink.h" + +static const struct devlink_ops bnxt_dl_ops = { +#ifdef CONFIG_BNXT_SRIOV + .eswitch_mode_set = bnxt_dl_eswitch_mode_set, + .eswitch_mode_get = bnxt_dl_eswitch_mode_get, +#endif /* CONFIG_BNXT_SRIOV */ +}; + +int bnxt_dl_register(struct bnxt *bp) +{ + struct devlink *dl; + int rc; + + if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV)) + return 0; + + if (bp->hwrm_spec_code < 0x10803) { + netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n"); + return -ENOTSUPP; + } + + dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl)); + if (!dl) { + netdev_warn(bp->dev, "devlink_alloc failed"); + return -ENOMEM; + } + + bnxt_link_bp_to_dl(bp, dl); + bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; + rc = devlink_register(dl, &bp->pdev->dev); + if (rc) { + bnxt_link_bp_to_dl(bp, NULL); + devlink_free(dl); + netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc); + return rc; + } + + return 0; +} + +void bnxt_dl_unregister(struct bnxt *bp) +{ + struct devlink *dl = bp->dl; + + if (!dl) + return; + + devlink_unregister(dl); + devlink_free(dl); +} diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h new file mode 100644 index 000000000000..e92a35d8b642 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h @@ -0,0 +1,39 @@ +/* Broadcom NetXtreme-C/E network driver. + * + * Copyright (c) 2017 Broadcom Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + */ + +#ifndef BNXT_DEVLINK_H +#define BNXT_DEVLINK_H + +/* Struct to hold housekeeping info needed by devlink interface */ +struct bnxt_dl { + struct bnxt *bp; /* back ptr to the controlling dev */ +}; + +static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl) +{ + return ((struct bnxt_dl *)devlink_priv(dl))->bp; +} + +/* To clear devlink pointer from bp, pass NULL dl */ +static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl) +{ + bp->dl = dl; + + /* add a back pointer in dl to bp */ + if (dl) { + struct bnxt_dl *bp_dl = devlink_priv(dl); + + bp_dl->bp = bp; + } +} + +int bnxt_dl_register(struct bnxt *bp); +void bnxt_dl_unregister(struct bnxt *bp); + +#endif /* BNXT_DEVLINK_H */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 3cbe771b3352..7ce1d4b7e67d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -26,8 +26,6 @@ #define FLASH_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200) #define INSTALL_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200) -static char *bnxt_get_pkgver(struct net_device *dev, char *buf, size_t buflen); - static u32 bnxt_get_msglevel(struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); @@ -46,19 +44,24 @@ static int bnxt_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { struct bnxt *bp = netdev_priv(dev); + struct bnxt_coal *hw_coal; + u16 mult; memset(coal, 0, sizeof(*coal)); - coal->rx_coalesce_usecs = bp->rx_coal_ticks; - /* 2 completion records per rx packet */ - coal->rx_max_coalesced_frames = bp->rx_coal_bufs / 2; - coal->rx_coalesce_usecs_irq = bp->rx_coal_ticks_irq; - coal->rx_max_coalesced_frames_irq = bp->rx_coal_bufs_irq / 2; + hw_coal = &bp->rx_coal; + mult = hw_coal->bufs_per_record; + coal->rx_coalesce_usecs = hw_coal->coal_ticks; + coal->rx_max_coalesced_frames = hw_coal->coal_bufs / mult; + coal->rx_coalesce_usecs_irq = hw_coal->coal_ticks_irq; + coal->rx_max_coalesced_frames_irq = hw_coal->coal_bufs_irq / mult; - coal->tx_coalesce_usecs = bp->tx_coal_ticks; - coal->tx_max_coalesced_frames = bp->tx_coal_bufs; - coal->tx_coalesce_usecs_irq = bp->tx_coal_ticks_irq; - coal->tx_max_coalesced_frames_irq = bp->tx_coal_bufs_irq; + hw_coal = &bp->tx_coal; + mult = hw_coal->bufs_per_record; + coal->tx_coalesce_usecs = hw_coal->coal_ticks; + coal->tx_max_coalesced_frames = hw_coal->coal_bufs / mult; + coal->tx_coalesce_usecs_irq = hw_coal->coal_ticks_irq; + coal->tx_max_coalesced_frames_irq = hw_coal->coal_bufs_irq / mult; coal->stats_block_coalesce_usecs = bp->stats_coal_ticks; @@ -70,18 +73,23 @@ static int bnxt_set_coalesce(struct net_device *dev, { struct bnxt *bp = netdev_priv(dev); bool update_stats = false; + struct bnxt_coal *hw_coal; int rc = 0; - - bp->rx_coal_ticks = coal->rx_coalesce_usecs; - /* 2 completion records per rx packet */ - bp->rx_coal_bufs = coal->rx_max_coalesced_frames * 2; - bp->rx_coal_ticks_irq = coal->rx_coalesce_usecs_irq; - bp->rx_coal_bufs_irq = coal->rx_max_coalesced_frames_irq * 2; - - bp->tx_coal_ticks = coal->tx_coalesce_usecs; - bp->tx_coal_bufs = coal->tx_max_coalesced_frames; - bp->tx_coal_ticks_irq = coal->tx_coalesce_usecs_irq; - bp->tx_coal_bufs_irq = coal->tx_max_coalesced_frames_irq; + u16 mult; + + hw_coal = &bp->rx_coal; + mult = hw_coal->bufs_per_record; + hw_coal->coal_ticks = coal->rx_coalesce_usecs; + hw_coal->coal_bufs = coal->rx_max_coalesced_frames * mult; + hw_coal->coal_ticks_irq = coal->rx_coalesce_usecs_irq; + hw_coal->coal_bufs_irq = coal->rx_max_coalesced_frames_irq * mult; + + hw_coal = &bp->tx_coal; + mult = hw_coal->bufs_per_record; + hw_coal->coal_ticks = coal->tx_coalesce_usecs; + hw_coal->coal_bufs = coal->tx_max_coalesced_frames * mult; + hw_coal->coal_ticks_irq = coal->tx_coalesce_usecs_irq; + hw_coal->coal_bufs_irq = coal->tx_max_coalesced_frames_irq * mult; if (bp->stats_coal_ticks != coal->stats_block_coalesce_usecs) { u32 stats_ticks = coal->stats_block_coalesce_usecs; @@ -822,20 +830,10 @@ static void bnxt_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct bnxt *bp = netdev_priv(dev); - char *pkglog; - char *pkgver = NULL; - pkglog = kmalloc(BNX_PKG_LOG_MAX_LENGTH, GFP_KERNEL); - if (pkglog) - pkgver = bnxt_get_pkgver(dev, pkglog, BNX_PKG_LOG_MAX_LENGTH); strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver)); strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version)); - if (pkgver && *pkgver != 0 && isdigit(*pkgver)) - snprintf(info->fw_version, sizeof(info->fw_version) - 1, - "%s pkg %s", bp->fw_ver_str, pkgver); - else - strlcpy(info->fw_version, bp->fw_ver_str, - sizeof(info->fw_version)); + strlcpy(info->fw_version, bp->fw_ver_str, sizeof(info->fw_version)); strlcpy(info->bus_info, pci_name(bp->pdev), sizeof(info->bus_info)); info->n_stats = bnxt_get_num_stats(bp); info->testinfo_len = bp->num_tests; @@ -843,7 +841,6 @@ static void bnxt_get_drvinfo(struct net_device *dev, info->eedump_len = 0; /* TODO CHIMP FW: reg dump details */ info->regdump_len = 0; - kfree(pkglog); } static void bnxt_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) @@ -1350,7 +1347,6 @@ static int bnxt_firmware_reset(struct net_device *dev, bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1); - /* TODO: Support ASAP ChiMP self-reset (e.g. upon PF driver unload) */ /* TODO: Address self-reset of APE/KONG/BONO/TANG or ungraceful reset */ /* (e.g. when firmware isn't already running) */ switch (dir_type) { @@ -1376,6 +1372,10 @@ static int bnxt_firmware_reset(struct net_device *dev, case BNX_DIR_TYPE_BONO_PATCH: req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE; break; + case BNXT_FW_RESET_CHIP: + req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP; + req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP; + break; default: return -EINVAL; } @@ -1773,6 +1773,9 @@ static int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset, dma_addr_t dma_handle; struct hwrm_nvm_read_input req = {0}; + if (!length) + return -EINVAL; + buf = dma_alloc_coherent(&bp->pdev->dev, length, &dma_handle, GFP_KERNEL); if (!buf) { @@ -2495,13 +2498,59 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, } } +static int bnxt_reset(struct net_device *dev, u32 *flags) +{ + struct bnxt *bp = netdev_priv(dev); + int rc = 0; + + if (!BNXT_PF(bp)) { + netdev_err(dev, "Reset is not supported from a VF\n"); + return -EOPNOTSUPP; + } + + if (pci_vfs_assigned(bp->pdev)) { + netdev_err(dev, + "Reset not allowed when VFs are assigned to VMs\n"); + return -EBUSY; + } + + if (*flags == ETH_RESET_ALL) { + /* This feature is not supported in older firmware versions */ + if (bp->hwrm_spec_code < 0x10803) + return -EOPNOTSUPP; + + rc = bnxt_firmware_reset(dev, BNXT_FW_RESET_CHIP); + if (!rc) + netdev_info(dev, "Reset request successful. Reload driver to complete reset\n"); + } else { + rc = -EINVAL; + } + + return rc; +} + void bnxt_ethtool_init(struct bnxt *bp) { struct hwrm_selftest_qlist_output *resp = bp->hwrm_cmd_resp_addr; struct hwrm_selftest_qlist_input req = {0}; struct bnxt_test_info *test_info; + struct net_device *dev = bp->dev; + char *pkglog; int i, rc; + pkglog = kzalloc(BNX_PKG_LOG_MAX_LENGTH, GFP_KERNEL); + if (pkglog) { + char *pkgver; + int len; + + pkgver = bnxt_get_pkgver(dev, pkglog, BNX_PKG_LOG_MAX_LENGTH); + if (pkgver && *pkgver != 0 && isdigit(*pkgver)) { + len = strlen(bp->fw_ver_str); + snprintf(bp->fw_ver_str + len, FW_VER_STR_LEN - len - 1, + "/pkg %s", pkgver); + } + kfree(pkglog); + } if (bp->hwrm_spec_code < 0x10704 || !BNXT_SINGLE_PF(bp)) return; @@ -2592,4 +2641,5 @@ const struct ethtool_ops bnxt_ethtool_ops = { .nway_reset = bnxt_nway_reset, .set_phys_id = bnxt_set_phys_id, .self_test = bnxt_self_test, + .reset = bnxt_reset, }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h index f1bc90b6fb5b..ff601b42fcc8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h @@ -34,6 +34,8 @@ struct bnxt_led_cfg { #define BNXT_LED_DFLT_ENABLES(x) \ cpu_to_le32(BNXT_LED_DFLT_ENA << (BNXT_LED_DFLT_ENA_SHIFT * (x))) +#define BNXT_FW_RESET_CHIP 0xffff + extern const struct ethtool_ops bnxt_ethtool_ops; u32 _bnxt_fw_to_ethtool_adv_spds(u16, u8); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index cb04cc76e8ad..c99f4d0880e4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -11,21 +11,21 @@ #ifndef BNXT_HSI_H #define BNXT_HSI_H -/* HSI and HWRM Specification 1.8.1 */ +/* HSI and HWRM Specification 1.8.3 */ #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 8 -#define HWRM_VERSION_UPDATE 1 +#define HWRM_VERSION_UPDATE 3 -#define HWRM_VERSION_RSVD 4 /* non-zero means beta version */ +#define HWRM_VERSION_RSVD 1 /* non-zero means beta version */ -#define HWRM_VERSION_STR "1.8.1.4" +#define HWRM_VERSION_STR "1.8.3.1" /* * Following is the signature for HWRM message field that indicates not * applicable (All F's). Need to cast it the size of the field if needed. */ #define HWRM_NA_SIGNATURE ((__le32)(-1)) #define HWRM_MAX_REQ_LEN (128) /* hwrm_func_buf_rgtr */ -#define HWRM_MAX_RESP_LEN (248) /* hwrm_selftest_qlist */ +#define HWRM_MAX_RESP_LEN (280) /* hwrm_selftest_qlist */ #define HW_HASH_INDEX_SIZE 0x80 /* 7 bit indirection table index. */ #define HW_HASH_KEY_SIZE 40 #define HWRM_RESP_VALID_KEY 1 /* valid key for HWRM response */ @@ -111,6 +111,7 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL #define ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL #define ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE 0x33UL + #define ASYNC_EVENT_CMPL_EVENT_ID_LLFC_PFC_CHANGE 0x34UL #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL __le32 event_data2; u8 opaque_v; @@ -835,8 +836,7 @@ struct hwrm_func_qcfg_output { u8 port_pf_cnt; #define FUNC_QCFG_RESP_PORT_PF_CNT_UNAVAIL 0x0UL __le16 dflt_vnic_id; - u8 unused_0; - u8 unused_1; + __le16 max_mtu_configured; __le32 min_bw; #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_MASK 0xfffffffUL #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_SFT 0 @@ -873,12 +873,12 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB 0x0UL #define FUNC_QCFG_RESP_EVB_MODE_VEB 0x1UL #define FUNC_QCFG_RESP_EVB_MODE_VEPA 0x2UL - u8 unused_2; + u8 unused_0; __le16 alloc_vfs; __le32 alloc_mcast_filters; __le32 alloc_hw_ring_grps; __le16 alloc_sp_tx_rings; - u8 unused_3; + u8 unused_1; u8 valid; }; @@ -3407,6 +3407,7 @@ struct hwrm_vnic_cfg_input { #define VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE 0x8UL #define VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE 0x10UL #define VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE 0x20UL + #define VNIC_CFG_REQ_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_MODE 0x40UL __le32 enables; #define VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP 0x1UL #define VNIC_CFG_REQ_ENABLES_RSS_RULE 0x2UL @@ -3463,6 +3464,7 @@ struct hwrm_vnic_qcaps_output { #define VNIC_QCAPS_RESP_FLAGS_ROCE_DUAL_VNIC_CAP 0x8UL #define VNIC_QCAPS_RESP_FLAGS_ROCE_ONLY_VNIC_CAP 0x10UL #define VNIC_QCAPS_RESP_FLAGS_RSS_DFLT_CR_CAP 0x20UL + #define VNIC_QCAPS_RESP_FLAGS_ROCE_MIRROING_CAPABLE_VNIC_CAP 0x40UL __le32 unused_2; u8 unused_3; u8 unused_4; @@ -3994,6 +3996,7 @@ struct hwrm_cfa_l2_filter_alloc_input { #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 unused_7; __le16 dst_id; @@ -4122,6 +4125,14 @@ struct hwrm_cfa_l2_set_rx_mask_output { u8 valid; }; +/* Command specific Error Codes (8 bytes) */ +struct hwrm_cfa_l2_set_rx_mask_cmd_err { + u8 code; + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_UNKNOWN 0x0UL + #define CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR 0x1UL + u8 unused_0[7]; +}; + /* hwrm_cfa_tunnel_filter_alloc */ /* Input (88 bytes) */ struct hwrm_cfa_tunnel_filter_alloc_input { @@ -4161,6 +4172,7 @@ struct hwrm_cfa_tunnel_filter_alloc_input { #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 unused_0; __le32 vni; @@ -4323,6 +4335,7 @@ struct hwrm_cfa_ntuple_filter_alloc_input { #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 pri_hint; #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL @@ -4355,6 +4368,14 @@ struct hwrm_cfa_ntuple_filter_alloc_output { u8 valid; }; +/* Command specific Error Codes (8 bytes) */ +struct hwrm_cfa_ntuple_filter_alloc_cmd_err { + u8 code; + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR 0x1UL + u8 unused_0[7]; +}; + /* hwrm_cfa_ntuple_filter_free */ /* Input (24 bytes) */ struct hwrm_cfa_ntuple_filter_free_input { @@ -4413,6 +4434,116 @@ struct hwrm_cfa_ntuple_filter_cfg_output { u8 valid; }; +/* hwrm_cfa_decap_filter_alloc */ +/* Input (104 bytes) */ +struct hwrm_cfa_decap_filter_alloc_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 flags; + #define CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL 0x1UL + __le32 enables; + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE 0x1UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID 0x2UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR 0x4UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR 0x8UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_OVLAN_VID 0x10UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IVLAN_VID 0x20UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_OVLAN_VID 0x40UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID 0x80UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE 0x100UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR 0x200UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR 0x400UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE 0x800UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL 0x1000UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_PORT 0x2000UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT 0x4000UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_ID 0x8000UL + #define CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_MIRROR_VNIC_ID 0x10000UL + __be32 tunnel_id; + u8 tunnel_type; + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL + #define CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL + u8 unused_0; + __le16 unused_1; + u8 src_macaddr[6]; + u8 unused_2; + u8 unused_3; + u8 dst_macaddr[6]; + __be16 ovlan_vid; + __be16 ivlan_vid; + __be16 t_ovlan_vid; + __be16 t_ivlan_vid; + __be16 ethertype; + u8 ip_addr_type; + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN 0x0UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4 0x4UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 0x6UL + u8 ip_protocol; + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN 0x0UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP 0x6UL + #define CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP 0x11UL + u8 unused_4; + u8 unused_5; + u8 unused_6[3]; + u8 unused_7; + __be32 src_ipaddr[4]; + __be32 dst_ipaddr[4]; + __be16 src_port; + __be16 dst_port; + __le16 dst_id; + __le16 l2_ctxt_ref_id; +}; + +/* Output (16 bytes) */ +struct hwrm_cfa_decap_filter_alloc_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 decap_filter_id; + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 valid; +}; + +/* hwrm_cfa_decap_filter_free */ +/* Input (24 bytes) */ +struct hwrm_cfa_decap_filter_free_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 decap_filter_id; + __le32 unused_0; +}; + +/* Output (16 bytes) */ +struct hwrm_cfa_decap_filter_free_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + /* hwrm_cfa_flow_alloc */ /* Input (128 bytes) */ struct hwrm_cfa_flow_alloc_input { @@ -4634,6 +4765,7 @@ struct hwrm_tunnel_dst_port_query_input { u8 tunnel_type; #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL u8 unused_0[7]; }; @@ -4662,9 +4794,10 @@ struct hwrm_tunnel_dst_port_alloc_input { u8 tunnel_type; #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL u8 unused_0; __be16 tunnel_dst_port_val; - __le32 unused_1; + __be32 unused_1; }; /* Output (16 bytes) */ @@ -4693,6 +4826,7 @@ struct hwrm_tunnel_dst_port_free_input { u8 tunnel_type; #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN_V4 0x9UL u8 unused_0; __le16 tunnel_dst_port_id; __le32 unused_1; @@ -4848,6 +4982,8 @@ struct hwrm_fw_reset_input { #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_HOST 0x4UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP 0x5UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP 0x6UL u8 selfrst_status; #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE 0x0UL #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP 0x1UL @@ -4888,6 +5024,8 @@ struct hwrm_fw_qstatus_input { #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_HOST 0x4UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_AP 0x5UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_CHIP 0x6UL u8 unused_0[7]; }; @@ -5324,6 +5462,32 @@ struct hwrm_wol_reason_qcfg_output { u8 valid; }; +/* hwrm_dbg_read_direct */ +/* Input (32 bytes) */ +struct hwrm_dbg_read_direct_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 host_dest_addr; + __le32 read_addr; + __le32 read_len32; +}; + +/* Output (16 bytes) */ +struct hwrm_dbg_read_direct_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + /* hwrm_nvm_read */ /* Input (40 bytes) */ struct hwrm_nvm_read_input { @@ -5676,6 +5840,105 @@ struct hwrm_nvm_install_update_cmd_err { u8 unused_0[7]; }; +/* hwrm_nvm_get_variable */ +/* Input (40 bytes) */ +struct hwrm_nvm_get_variable_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 dest_data_addr; + __le16 data_len; + __le16 option_num; + #define NVM_GET_VARIABLE_REQ_OPTION_NUM_RSVD_0 0x0UL + #define NVM_GET_VARIABLE_REQ_OPTION_NUM_RSVD_FFFF 0xffffUL + __le16 dimensions; + __le16 index_0; + __le16 index_1; + __le16 index_2; + __le16 index_3; + u8 flags; + #define NVM_GET_VARIABLE_REQ_FLAGS_FACTORY_DFLT 0x1UL + u8 unused_0; +}; + +/* Output (16 bytes) */ +struct hwrm_nvm_get_variable_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 data_len; + __le16 option_num; + #define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_0 0x0UL + #define NVM_GET_VARIABLE_RESP_OPTION_NUM_RSVD_FFFF 0xffffUL + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 valid; +}; + +/* Command specific Error Codes (8 bytes) */ +struct hwrm_nvm_get_variable_cmd_err { + u8 code; + #define NVM_GET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL + #define NVM_GET_VARIABLE_CMD_ERR_CODE_LEN_TOO_SHORT 0x3UL + u8 unused_0[7]; +}; + +/* hwrm_nvm_set_variable */ +/* Input (40 bytes) */ +struct hwrm_nvm_set_variable_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 src_data_addr; + __le16 data_len; + __le16 option_num; + #define NVM_SET_VARIABLE_REQ_OPTION_NUM_RSVD_0 0x0UL + #define NVM_SET_VARIABLE_REQ_OPTION_NUM_RSVD_FFFF 0xffffUL + __le16 dimensions; + __le16 index_0; + __le16 index_1; + __le16 index_2; + __le16 index_3; + u8 flags; + #define NVM_SET_VARIABLE_REQ_FLAGS_FORCE_FLUSH 0x1UL + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_MASK 0xeUL + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_SFT 1 + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_NONE (0x0UL << 1) + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_HMAC_SHA1 (0x1UL << 1) + #define NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_LAST NVM_SET_VARIABLE_REQ_FLAGS_ENCRYPT_MODE_HMAC_SHA1 + u8 unused_0; +}; + +/* Output (16 bytes) */ +struct hwrm_nvm_set_variable_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + +/* Command specific Error Codes (8 bytes) */ +struct hwrm_nvm_set_variable_cmd_err { + u8 code; + #define NVM_SET_VARIABLE_CMD_ERR_CODE_UNKNOWN 0x0UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_VAR_NOT_EXIST 0x1UL + #define NVM_SET_VARIABLE_CMD_ERR_CODE_CORRUPT_VAR 0x2UL + u8 unused_0[7]; +}; + /* hwrm_selftest_qlist */ /* Input (16 bytes) */ struct hwrm_selftest_qlist_input { @@ -5686,7 +5949,7 @@ struct hwrm_selftest_qlist_input { __le64 resp_addr; }; -/* Output (248 bytes) */ +/* Output (280 bytes) */ struct hwrm_selftest_qlist_output { __le16 error_code; __le16 req_type; @@ -5698,15 +5961,15 @@ struct hwrm_selftest_qlist_output { #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_LINK_TEST 0x2UL #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_REGISTER_TEST 0x4UL #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_MEMORY_TEST 0x8UL - #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_PCIE_EYE_TEST 0x10UL - #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_ETHERNET_EYE_TEST 0x20UL + #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_QLIST_RESP_AVAILABLE_TESTS_ETHERNET_SERDES_TEST 0x20UL u8 offline_tests; #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_NVM_TEST 0x1UL #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_LINK_TEST 0x2UL #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_REGISTER_TEST 0x4UL #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_MEMORY_TEST 0x8UL - #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_PCIE_EYE_TEST 0x10UL - #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_ETHERNET_EYE_TEST 0x20UL + #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_QLIST_RESP_OFFLINE_TESTS_ETHERNET_SERDES_TEST 0x20UL u8 unused_0; __le16 test_timeout; u8 unused_1; @@ -5719,6 +5982,11 @@ struct hwrm_selftest_qlist_output { char test5_name[32]; char test6_name[32]; char test7_name[32]; + __le32 unused_3; + u8 unused_4; + u8 unused_5; + u8 unused_6; + u8 valid; }; /* hwrm_selftest_exec */ @@ -5734,8 +6002,8 @@ struct hwrm_selftest_exec_input { #define SELFTEST_EXEC_REQ_FLAGS_LINK_TEST 0x2UL #define SELFTEST_EXEC_REQ_FLAGS_REGISTER_TEST 0x4UL #define SELFTEST_EXEC_REQ_FLAGS_MEMORY_TEST 0x8UL - #define SELFTEST_EXEC_REQ_FLAGS_PCIE_EYE_TEST 0x10UL - #define SELFTEST_EXEC_REQ_FLAGS_ETHERNET_EYE_TEST 0x20UL + #define SELFTEST_EXEC_REQ_FLAGS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_EXEC_REQ_FLAGS_ETHERNET_SERDES_TEST 0x20UL u8 unused_0[7]; }; @@ -5750,16 +6018,21 @@ struct hwrm_selftest_exec_output { #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_LINK_TEST 0x2UL #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_REGISTER_TEST 0x4UL #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_MEMORY_TEST 0x8UL - #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_PCIE_EYE_TEST 0x10UL - #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_ETHERNET_EYE_TEST 0x20UL + #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_EXEC_RESP_REQUESTED_TESTS_ETHERNET_SERDES_TEST 0x20UL u8 test_success; #define SELFTEST_EXEC_RESP_TEST_SUCCESS_NVM_TEST 0x1UL #define SELFTEST_EXEC_RESP_TEST_SUCCESS_LINK_TEST 0x2UL #define SELFTEST_EXEC_RESP_TEST_SUCCESS_REGISTER_TEST 0x4UL #define SELFTEST_EXEC_RESP_TEST_SUCCESS_MEMORY_TEST 0x8UL - #define SELFTEST_EXEC_RESP_TEST_SUCCESS_PCIE_EYE_TEST 0x10UL - #define SELFTEST_EXEC_RESP_TEST_SUCCESS_ETHERNET_EYE_TEST 0x20UL - __le16 unused_0[3]; + #define SELFTEST_EXEC_RESP_TEST_SUCCESS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_EXEC_RESP_TEST_SUCCESS_ETHERNET_SERDES_TEST 0x20UL + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 unused_4; + u8 valid; }; /* hwrm_selftest_irq */ @@ -5772,12 +6045,50 @@ struct hwrm_selftest_irq_input { __le64 resp_addr; }; -/* Output (8 bytes) */ +/* Output (16 bytes) */ struct hwrm_selftest_irq_output { __le16 error_code; __le16 req_type; __le16 seq_id; __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + +/* hwrm_selftest_retrieve_serdes_data */ +/* Input (32 bytes) */ +struct hwrm_selftest_retrieve_serdes_data_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le64 resp_data_addr; + __le32 resp_data_offset; + __le16 data_len; + u8 flags; + #define SELFTEST_RETRIEVE_SERDES_DATA_REQ_FLAGS_UNUSED_TEST_MASK 0xfUL + #define SELFTEST_RETRIEVE_SERDES_DATA_REQ_FLAGS_UNUSED_TEST_SFT 0 + #define SELFTEST_RETRIEVE_SERDES_DATA_REQ_FLAGS_PCIE_SERDES_TEST 0x10UL + #define SELFTEST_RETRIEVE_SERDES_DATA_REQ_FLAGS_ETHERNET_SERDES_TEST 0x20UL + u8 unused_0; +}; + +/* Output (16 bytes) */ +struct hwrm_selftest_retrieve_serdes_data_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le16 total_data_len; + __le16 copied_data_len; + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 valid; }; /* Hardware Resource Manager Specification */ @@ -5938,10 +6249,16 @@ struct cmd_nums { #define HWRM_CFA_DECAP_FILTER_ALLOC (0x108UL) #define HWRM_CFA_DECAP_FILTER_FREE (0x109UL) #define HWRM_CFA_VLAN_ANTISPOOF_QCFG (0x10aUL) + #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC (0x10bUL) + #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE (0x10cUL) + #define HWRM_CFA_PAIR_ALLOC (0x10dUL) + #define HWRM_CFA_PAIR_FREE (0x10eUL) + #define HWRM_CFA_PAIR_INFO (0x10fUL) + #define HWRM_FW_IPC_MSG (0x110UL) #define HWRM_SELFTEST_QLIST (0x200UL) #define HWRM_SELFTEST_EXEC (0x201UL) #define HWRM_SELFTEST_IRQ (0x202UL) - #define HWRM_SELFTEST_RETREIVE_EYE_DATA (0x203UL) + #define HWRM_SELFTEST_RETRIEVE_SERDES_DATA (0x203UL) #define HWRM_DBG_READ_DIRECT (0xff10UL) #define HWRM_DBG_READ_INDIRECT (0xff11UL) #define HWRM_DBG_WRITE_DIRECT (0xff12UL) @@ -5949,6 +6266,9 @@ struct cmd_nums { #define HWRM_DBG_DUMP (0xff14UL) #define HWRM_DBG_ERASE_NVM (0xff15UL) #define HWRM_DBG_CFG (0xff16UL) + #define HWRM_DBG_COREDUMP_LIST (0xff17UL) + #define HWRM_DBG_COREDUMP_INITIATE (0xff18UL) + #define HWRM_DBG_COREDUMP_RETRIEVE (0xff19UL) #define HWRM_NVM_FACTORY_DEFAULTS (0xffeeUL) #define HWRM_NVM_VALIDATE_OPTION (0xffefUL) #define HWRM_NVM_FLUSH (0xfff0UL) @@ -6123,6 +6443,58 @@ struct rx_port_stats { __le64 rx_stat_err; }; +/* VXLAN IPv4 encapsulation structure (16 bytes) */ +struct hwrm_vxlan_ipv4_hdr { + u8 ver_hlen; + #define VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_MASK 0xfUL + #define VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT 0 + #define VXLAN_IPV4_HDR_VER_HLEN_VERSION_MASK 0xf0UL + #define VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT 4 + u8 tos; + __be16 ip_id; + __be16 flags_frag_offset; + u8 ttl; + u8 protocol; + __be32 src_ip_addr; + __be32 dest_ip_addr; +}; + +/* VXLAN IPv6 encapsulation structure (32 bytes) */ +struct hwrm_vxlan_ipv6_hdr { + __be32 ver_tc_flow_label; + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_VER_SFT 0x1cUL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_VER_MASK 0xf0000000UL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_TC_SFT 0x14UL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_TC_MASK 0xff00000UL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_SFT 0x0UL + #define VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_MASK 0xfffffUL + __be16 payload_len; + u8 next_hdr; + u8 ttl; + __be32 src_ip_addr[4]; + __be32 dest_ip_addr[4]; +}; + +/* VXLAN encapsulation structure (72 bytes) */ +struct hwrm_cfa_encap_data_vxlan { + u8 src_mac_addr[6]; + __le16 unused_0; + u8 dst_mac_addr[6]; + u8 num_vlan_tags; + u8 unused_1; + __be16 ovlan_tpid; + __be16 ovlan_tci; + __be16 ivlan_tpid; + __be16 ivlan_tci; + __le32 l3[10]; + #define CFA_ENCAP_DATA_VXLAN_L3_VER_MASK 0xfUL + #define CFA_ENCAP_DATA_VXLAN_L3_VER_IPV4 0x4UL + #define CFA_ENCAP_DATA_VXLAN_L3_VER_IPV6 0x6UL + __be16 src_port; + __be16 dst_port; + __be32 vni; +}; + /* Periodic Statistics Context DMA to host (160 bytes) */ struct ctx_hw_stats { __le64 rx_ucast_pkts; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 7dd3d131043a..d5031f436f83 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -16,6 +16,7 @@ #include <net/tc_act/tc_skbedit.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> +#include <net/tc_act/tc_tunnel_key.h> #include "bnxt_hsi.h" #include "bnxt.h" @@ -23,8 +24,6 @@ #include "bnxt_tc.h" #include "bnxt_vfr.h" -#ifdef CONFIG_BNXT_FLOWER_OFFLOAD - #define BNXT_FID_INVALID 0xffff #define VLAN_TCI(vid, prio) ((vid) | ((prio) << VLAN_PRIO_SHIFT)) @@ -91,6 +90,23 @@ static void bnxt_tc_parse_vlan(struct bnxt *bp, } } +static int bnxt_tc_parse_tunnel_set(struct bnxt *bp, + struct bnxt_tc_actions *actions, + const struct tc_action *tc_act) +{ + struct ip_tunnel_info *tun_info = tcf_tunnel_info(tc_act); + struct ip_tunnel_key *tun_key = &tun_info->key; + + if (ip_tunnel_info_af(tun_info) != AF_INET) { + netdev_info(bp->dev, "only IPv4 tunnel-encap is supported"); + return -EOPNOTSUPP; + } + + actions->tun_encap_key = *tun_key; + actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP; + return 0; +} + static int bnxt_tc_parse_actions(struct bnxt *bp, struct bnxt_tc_actions *actions, struct tcf_exts *tc_exts) @@ -125,9 +141,35 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, bnxt_tc_parse_vlan(bp, actions, tc_act); continue; } + + /* Tunnel encap */ + if (is_tcf_tunnel_set(tc_act)) { + rc = bnxt_tc_parse_tunnel_set(bp, actions, tc_act); + if (rc) + return rc; + continue; + } + + /* Tunnel decap */ + if (is_tcf_tunnel_release(tc_act)) { + actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP; + continue; + } } - return 0; + if (rc) + return rc; + + /* Tunnel encap/decap action must be accompanied by a redirect action */ + if ((actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP || + actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) && + !(actions->flags & BNXT_TC_ACTION_FLAG_FWD)) { + netdev_info(bp->dev, + "error: no redir action along with encap/decap"); + return -EINVAL; + } + + return rc; } #define GET_KEY(flow_cmd, key_type) \ @@ -254,6 +296,54 @@ static int bnxt_tc_parse_flow(struct bnxt *bp, flow->l4_mask.icmp.code = mask->code; } + if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_dissector_key_control *key = + GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_CONTROL); + + addr_type = key->addr_type; + } + + if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_dissector_key_ipv4_addrs *key = + GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); + struct flow_dissector_key_ipv4_addrs *mask = + GET_MASK(tc_flow_cmd, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); + + flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS; + flow->tun_key.u.ipv4.dst = key->dst; + flow->tun_mask.u.ipv4.dst = mask->dst; + flow->tun_key.u.ipv4.src = key->src; + flow->tun_mask.u.ipv4.src = mask->src; + } else if (dissector_uses_key(dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + return -EOPNOTSUPP; + } + + if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *key = + GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID); + struct flow_dissector_key_keyid *mask = + GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID); + + flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID; + flow->tun_key.tun_id = key32_to_tunnel_id(key->keyid); + flow->tun_mask.tun_id = key32_to_tunnel_id(mask->keyid); + } + + if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_dissector_key_ports *key = + GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS); + struct flow_dissector_key_ports *mask = + GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS); + + flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS; + flow->tun_key.tp_dst = key->dst; + flow->tun_mask.tp_dst = mask->dst; + flow->tun_key.tp_src = key->src; + flow->tun_mask.tp_src = mask->src; + } + return bnxt_tc_parse_actions(bp, &flow->actions, tc_flow_cmd->exts); } @@ -295,7 +385,8 @@ static bool is_wildcard(void *mask, int len) } static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, - __le16 ref_flow_handle, __le16 *flow_handle) + __le16 ref_flow_handle, + __le32 tunnel_handle, __le16 *flow_handle) { struct hwrm_cfa_flow_alloc_output *resp = bp->hwrm_cmd_resp_addr; struct bnxt_tc_actions *actions = &flow->actions; @@ -309,6 +400,14 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, req.src_fid = cpu_to_le16(flow->src_fid); req.ref_flow_handle = ref_flow_handle; + + if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP || + actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) { + req.tunnel_handle = tunnel_handle; + flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL; + action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL; + } + req.ethertype = flow->l2_key.ether_type; req.ip_proto = flow->l4_key.ip_proto; @@ -405,78 +504,153 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, return rc; } -/* Add val to accum while handling a possible wraparound - * of val. Eventhough val is of type u64, its actual width - * is denoted by mask and will wrap-around beyond that width. - */ -static void accumulate_val(u64 *accum, u64 val, u64 mask) +static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp, + struct bnxt_tc_flow *flow, + struct bnxt_tc_l2_key *l2_info, + __le32 ref_decap_handle, + __le32 *decap_filter_handle) { -#define low_bits(x, mask) ((x) & (mask)) -#define high_bits(x, mask) ((x) & ~(mask)) - bool wrapped = val < low_bits(*accum, mask); + struct hwrm_cfa_decap_filter_alloc_output *resp = + bp->hwrm_cmd_resp_addr; + struct hwrm_cfa_decap_filter_alloc_input req = { 0 }; + struct ip_tunnel_key *tun_key = &flow->tun_key; + u32 enables = 0; + int rc; - *accum = high_bits(*accum, mask) + val; - if (wrapped) - *accum += (mask + 1); -} + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_ALLOC, -1, -1); -/* The HW counters' width is much less than 64bits. - * Handle possible wrap-around while updating the stat counters - */ -static void bnxt_flow_stats_fix_wraparound(struct bnxt_tc_info *tc_info, - struct bnxt_tc_flow_stats *stats, - struct bnxt_tc_flow_stats *hw_stats) -{ - accumulate_val(&stats->bytes, hw_stats->bytes, tc_info->bytes_mask); - accumulate_val(&stats->packets, hw_stats->packets, - tc_info->packets_mask); + req.flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL); + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE | + CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL; + req.tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN; + req.ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP; + + if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) { + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID; + /* tunnel_id is wrongly defined in hsi defn. as __le32 */ + req.tunnel_id = tunnel_id_to_key32(tun_key->tun_id); + } + + if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) { + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR | + CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_MACADDR; + ether_addr_copy(req.dst_macaddr, l2_info->dmac); + ether_addr_copy(req.src_macaddr, l2_info->smac); + } + if (l2_info->num_vlans) { + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID; + req.t_ivlan_vid = l2_info->inner_vlan_tci; + } + + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE; + req.ethertype = htons(ETH_P_IP); + + if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) { + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR | + CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR | + CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE; + req.ip_addr_type = CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4; + req.dst_ipaddr[0] = tun_key->u.ipv4.dst; + req.src_ipaddr[0] = tun_key->u.ipv4.src; + } + + if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) { + enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT; + req.dst_port = tun_key->tp_dst; + } + + /* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc + * is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16. + */ + req.l2_ctxt_ref_id = (__force __le16)ref_decap_handle; + req.enables = cpu_to_le32(enables); + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) + *decap_filter_handle = resp->decap_filter_id; + else + netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + mutex_unlock(&bp->hwrm_cmd_lock); + + return rc; } -/* Fix possible wraparound of the stats queried from HW, calculate - * the delta from prev_stats, and also update the prev_stats. - * The HW flow stats are fetched under the hwrm_cmd_lock mutex. - * This routine is best called while under the mutex so that the - * stats processing happens atomically. - */ -static void bnxt_flow_stats_calc(struct bnxt_tc_info *tc_info, - struct bnxt_tc_flow *flow, - struct bnxt_tc_flow_stats *stats) +static int hwrm_cfa_decap_filter_free(struct bnxt *bp, + __le32 decap_filter_handle) { - struct bnxt_tc_flow_stats *acc_stats, *prev_stats; + struct hwrm_cfa_decap_filter_free_input req = { 0 }; + int rc; - acc_stats = &flow->stats; - bnxt_flow_stats_fix_wraparound(tc_info, acc_stats, stats); + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_FREE, -1, -1); + req.decap_filter_id = decap_filter_handle; - prev_stats = &flow->prev_stats; - stats->bytes = acc_stats->bytes - prev_stats->bytes; - stats->packets = acc_stats->packets - prev_stats->packets; - *prev_stats = *acc_stats; + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) + netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); + return rc; } -static int bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, - __le16 flow_handle, - struct bnxt_tc_flow *flow, - struct bnxt_tc_flow_stats *stats) +static int hwrm_cfa_encap_record_alloc(struct bnxt *bp, + struct ip_tunnel_key *encap_key, + struct bnxt_tc_l2_key *l2_info, + __le32 *encap_record_handle) { - struct hwrm_cfa_flow_stats_output *resp = bp->hwrm_cmd_resp_addr; - struct hwrm_cfa_flow_stats_input req = { 0 }; + struct hwrm_cfa_encap_record_alloc_output *resp = + bp->hwrm_cmd_resp_addr; + struct hwrm_cfa_encap_record_alloc_input req = { 0 }; + struct hwrm_cfa_encap_data_vxlan *encap = + (struct hwrm_cfa_encap_data_vxlan *)&req.encap_data; + struct hwrm_vxlan_ipv4_hdr *encap_ipv4 = + (struct hwrm_vxlan_ipv4_hdr *)encap->l3; int rc; - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1); - req.num_flows = cpu_to_le16(1); - req.flow_handle_0 = flow_handle; + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_ALLOC, -1, -1); - mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - if (!rc) { - stats->packets = le64_to_cpu(resp->packet_0); - stats->bytes = le64_to_cpu(resp->byte_0); - bnxt_flow_stats_calc(&bp->tc_info, flow, stats); - } else { - netdev_info(bp->dev, "error rc=%d", rc); + req.encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN; + + ether_addr_copy(encap->dst_mac_addr, l2_info->dmac); + ether_addr_copy(encap->src_mac_addr, l2_info->smac); + if (l2_info->num_vlans) { + encap->num_vlan_tags = l2_info->num_vlans; + encap->ovlan_tci = l2_info->inner_vlan_tci; + encap->ovlan_tpid = l2_info->inner_vlan_tpid; } + encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT; + encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT; + encap_ipv4->ttl = encap_key->ttl; + + encap_ipv4->dest_ip_addr = encap_key->u.ipv4.dst; + encap_ipv4->src_ip_addr = encap_key->u.ipv4.src; + encap_ipv4->protocol = IPPROTO_UDP; + + encap->dst_port = encap_key->tp_dst; + encap->vni = tunnel_id_to_key32(encap_key->tun_id); + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) + *encap_record_handle = resp->encap_record_id; + else + netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); mutex_unlock(&bp->hwrm_cmd_lock); + + return rc; +} + +static int hwrm_cfa_encap_record_free(struct bnxt *bp, + __le32 encap_record_handle) +{ + struct hwrm_cfa_encap_record_free_input req = { 0 }; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_FREE, -1, -1); + req.encap_record_id = encap_record_handle; + + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc) + netdev_info(bp->dev, "%s: Error rc=%d", __func__, rc); return rc; } @@ -484,7 +658,7 @@ static int bnxt_tc_put_l2_node(struct bnxt *bp, struct bnxt_tc_flow_node *flow_node) { struct bnxt_tc_l2_node *l2_node = flow_node->l2_node; - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; int rc; /* remove flow_node from the L2 shared flow list */ @@ -521,7 +695,7 @@ bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table, rc = rhashtable_insert_fast(l2_table, &l2_node->node, ht_params); if (rc) { - kfree(l2_node); + kfree_rcu(l2_node, rcu); netdev_err(bp->dev, "Error: %s: rhashtable_insert_fast: %d", __func__, rc); @@ -540,7 +714,7 @@ bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, struct bnxt_tc_flow_node *flow_node, __le16 *ref_flow_handle) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; struct bnxt_tc_flow_node *ref_flow_node; struct bnxt_tc_l2_node *l2_node; @@ -590,10 +764,386 @@ static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow) return true; } +/* Returns the final refcount of the node on success + * or a -ve error code on failure + */ +static int bnxt_tc_put_tunnel_node(struct bnxt *bp, + struct rhashtable *tunnel_table, + struct rhashtable_params *ht_params, + struct bnxt_tc_tunnel_node *tunnel_node) +{ + int rc; + + if (--tunnel_node->refcount == 0) { + rc = rhashtable_remove_fast(tunnel_table, &tunnel_node->node, + *ht_params); + if (rc) { + netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc); + rc = -1; + } + kfree_rcu(tunnel_node, rcu); + return rc; + } else { + return tunnel_node->refcount; + } +} + +/* Get (or add) either encap or decap tunnel node from/to the supplied + * hash table. + */ +static struct bnxt_tc_tunnel_node * +bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table, + struct rhashtable_params *ht_params, + struct ip_tunnel_key *tun_key) +{ + struct bnxt_tc_tunnel_node *tunnel_node; + int rc; + + tunnel_node = rhashtable_lookup_fast(tunnel_table, tun_key, *ht_params); + if (!tunnel_node) { + tunnel_node = kzalloc(sizeof(*tunnel_node), GFP_KERNEL); + if (!tunnel_node) { + rc = -ENOMEM; + goto err; + } + + tunnel_node->key = *tun_key; + tunnel_node->tunnel_handle = INVALID_TUNNEL_HANDLE; + rc = rhashtable_insert_fast(tunnel_table, &tunnel_node->node, + *ht_params); + if (rc) { + kfree_rcu(tunnel_node, rcu); + goto err; + } + } + tunnel_node->refcount++; + return tunnel_node; +err: + netdev_info(bp->dev, "error rc=%d", rc); + return NULL; +} + +static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp, + struct bnxt_tc_flow *flow, + struct bnxt_tc_l2_key *l2_key, + struct bnxt_tc_flow_node *flow_node, + __le32 *ref_decap_handle) +{ + struct bnxt_tc_info *tc_info = bp->tc_info; + struct bnxt_tc_flow_node *ref_flow_node; + struct bnxt_tc_l2_node *decap_l2_node; + + decap_l2_node = bnxt_tc_get_l2_node(bp, &tc_info->decap_l2_table, + tc_info->decap_l2_ht_params, + l2_key); + if (!decap_l2_node) + return -1; + + /* If any other flow is using this decap_l2_node, use it's decap_handle + * as the ref_decap_handle + */ + if (decap_l2_node->refcount > 0) { + ref_flow_node = + list_first_entry(&decap_l2_node->common_l2_flows, + struct bnxt_tc_flow_node, + decap_l2_list_node); + *ref_decap_handle = ref_flow_node->decap_node->tunnel_handle; + } else { + *ref_decap_handle = INVALID_TUNNEL_HANDLE; + } + + /* Insert the l2_node into the flow_node so that subsequent flows + * with a matching decap l2 key can use the decap_filter_handle of + * this flow as their ref_decap_handle + */ + flow_node->decap_l2_node = decap_l2_node; + list_add(&flow_node->decap_l2_list_node, + &decap_l2_node->common_l2_flows); + decap_l2_node->refcount++; + return 0; +} + +static void bnxt_tc_put_decap_l2_node(struct bnxt *bp, + struct bnxt_tc_flow_node *flow_node) +{ + struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node; + struct bnxt_tc_info *tc_info = bp->tc_info; + int rc; + + /* remove flow_node from the decap L2 sharing flow list */ + list_del(&flow_node->decap_l2_list_node); + if (--decap_l2_node->refcount == 0) { + rc = rhashtable_remove_fast(&tc_info->decap_l2_table, + &decap_l2_node->node, + tc_info->decap_l2_ht_params); + if (rc) + netdev_err(bp->dev, "rhashtable_remove_fast rc=%d", rc); + kfree_rcu(decap_l2_node, rcu); + } +} + +static void bnxt_tc_put_decap_handle(struct bnxt *bp, + struct bnxt_tc_flow_node *flow_node) +{ + __le32 decap_handle = flow_node->decap_node->tunnel_handle; + struct bnxt_tc_info *tc_info = bp->tc_info; + int rc; + + if (flow_node->decap_l2_node) + bnxt_tc_put_decap_l2_node(bp, flow_node); + + rc = bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table, + &tc_info->decap_ht_params, + flow_node->decap_node); + if (!rc && decap_handle != INVALID_TUNNEL_HANDLE) + hwrm_cfa_decap_filter_free(bp, decap_handle); +} + +static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp, + struct ip_tunnel_key *tun_key, + struct bnxt_tc_l2_key *l2_info, + struct net_device *real_dst_dev) +{ +#ifdef CONFIG_INET + struct flowi4 flow = { {0} }; + struct net_device *dst_dev; + struct neighbour *nbr; + struct rtable *rt; + int rc; + + flow.flowi4_proto = IPPROTO_UDP; + flow.fl4_dport = tun_key->tp_dst; + flow.daddr = tun_key->u.ipv4.dst; + + rt = ip_route_output_key(dev_net(real_dst_dev), &flow); + if (IS_ERR(rt)) { + netdev_info(bp->dev, "no route to %pI4b", &flow.daddr); + return -EOPNOTSUPP; + } + + /* The route must either point to the real_dst_dev or a dst_dev that + * uses the real_dst_dev. + */ + dst_dev = rt->dst.dev; + if (is_vlan_dev(dst_dev)) { +#if IS_ENABLED(CONFIG_VLAN_8021Q) + struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev); + + if (vlan->real_dev != real_dst_dev) { + netdev_info(bp->dev, + "dst_dev(%s) doesn't use PF-if(%s)", + netdev_name(dst_dev), + netdev_name(real_dst_dev)); + rc = -EOPNOTSUPP; + goto put_rt; + } + l2_info->inner_vlan_tci = htons(vlan->vlan_id); + l2_info->inner_vlan_tpid = vlan->vlan_proto; + l2_info->num_vlans = 1; +#endif + } else if (dst_dev != real_dst_dev) { + netdev_info(bp->dev, + "dst_dev(%s) for %pI4b is not PF-if(%s)", + netdev_name(dst_dev), &flow.daddr, + netdev_name(real_dst_dev)); + rc = -EOPNOTSUPP; + goto put_rt; + } + + nbr = dst_neigh_lookup(&rt->dst, &flow.daddr); + if (!nbr) { + netdev_info(bp->dev, "can't lookup neighbor for %pI4b", + &flow.daddr); + rc = -EOPNOTSUPP; + goto put_rt; + } + + tun_key->u.ipv4.src = flow.saddr; + tun_key->ttl = ip4_dst_hoplimit(&rt->dst); + neigh_ha_snapshot(l2_info->dmac, nbr, dst_dev); + ether_addr_copy(l2_info->smac, dst_dev->dev_addr); + neigh_release(nbr); + ip_rt_put(rt); + + return 0; +put_rt: + ip_rt_put(rt); + return rc; +#else + return -EOPNOTSUPP; +#endif +} + +static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, + struct bnxt_tc_flow_node *flow_node, + __le32 *decap_filter_handle) +{ + struct ip_tunnel_key *decap_key = &flow->tun_key; + struct bnxt_tc_info *tc_info = bp->tc_info; + struct bnxt_tc_l2_key l2_info = { {0} }; + struct bnxt_tc_tunnel_node *decap_node; + struct ip_tunnel_key tun_key = { 0 }; + struct bnxt_tc_l2_key *decap_l2_info; + __le32 ref_decap_handle; + int rc; + + /* Check if there's another flow using the same tunnel decap. + * If not, add this tunnel to the table and resolve the other + * tunnel header fileds + */ + decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table, + &tc_info->decap_ht_params, + decap_key); + if (!decap_node) + return -ENOMEM; + + flow_node->decap_node = decap_node; + + if (decap_node->tunnel_handle != INVALID_TUNNEL_HANDLE) + goto done; + + /* Resolve the L2 fields for tunnel decap + * Resolve the route for remote vtep (saddr) of the decap key + * Find it's next-hop mac addrs + */ + tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src; + tun_key.tp_dst = flow->tun_key.tp_dst; + rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info, bp->dev); + if (rc) + goto put_decap; + + decap_key->ttl = tun_key.ttl; + decap_l2_info = &decap_node->l2_info; + ether_addr_copy(decap_l2_info->dmac, l2_info.smac); + ether_addr_copy(decap_l2_info->smac, l2_info.dmac); + if (l2_info.num_vlans) { + decap_l2_info->num_vlans = l2_info.num_vlans; + decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid; + decap_l2_info->inner_vlan_tci = l2_info.inner_vlan_tci; + } + flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS; + + /* For getting a decap_filter_handle we first need to check if + * there are any other decap flows that share the same tunnel L2 + * key and if so, pass that flow's decap_filter_handle as the + * ref_decap_handle for this flow. + */ + rc = bnxt_tc_get_ref_decap_handle(bp, flow, decap_l2_info, flow_node, + &ref_decap_handle); + if (rc) + goto put_decap; + + /* Issue the hwrm cmd to allocate a decap filter handle */ + rc = hwrm_cfa_decap_filter_alloc(bp, flow, decap_l2_info, + ref_decap_handle, + &decap_node->tunnel_handle); + if (rc) + goto put_decap_l2; + +done: + *decap_filter_handle = decap_node->tunnel_handle; + return 0; + +put_decap_l2: + bnxt_tc_put_decap_l2_node(bp, flow_node); +put_decap: + bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table, + &tc_info->decap_ht_params, + flow_node->decap_node); + return rc; +} + +static void bnxt_tc_put_encap_handle(struct bnxt *bp, + struct bnxt_tc_tunnel_node *encap_node) +{ + __le32 encap_handle = encap_node->tunnel_handle; + struct bnxt_tc_info *tc_info = bp->tc_info; + int rc; + + rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table, + &tc_info->encap_ht_params, encap_node); + if (!rc && encap_handle != INVALID_TUNNEL_HANDLE) + hwrm_cfa_encap_record_free(bp, encap_handle); +} + +/* Lookup the tunnel encap table and check if there's an encap_handle + * alloc'd already. + * If not, query L2 info via a route lookup and issue an encap_record_alloc + * cmd to FW. + */ +static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow, + struct bnxt_tc_flow_node *flow_node, + __le32 *encap_handle) +{ + struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key; + struct bnxt_tc_info *tc_info = bp->tc_info; + struct bnxt_tc_tunnel_node *encap_node; + int rc; + + /* Check if there's another flow using the same tunnel encap. + * If not, add this tunnel to the table and resolve the other + * tunnel header fileds + */ + encap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->encap_table, + &tc_info->encap_ht_params, + encap_key); + if (!encap_node) + return -ENOMEM; + + flow_node->encap_node = encap_node; + + if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE) + goto done; + + rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info, + flow->actions.dst_dev); + if (rc) + goto put_encap; + + /* Allocate a new tunnel encap record */ + rc = hwrm_cfa_encap_record_alloc(bp, encap_key, &encap_node->l2_info, + &encap_node->tunnel_handle); + if (rc) + goto put_encap; + +done: + *encap_handle = encap_node->tunnel_handle; + return 0; + +put_encap: + bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table, + &tc_info->encap_ht_params, encap_node); + return rc; +} + +static void bnxt_tc_put_tunnel_handle(struct bnxt *bp, + struct bnxt_tc_flow *flow, + struct bnxt_tc_flow_node *flow_node) +{ + if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) + bnxt_tc_put_decap_handle(bp, flow_node); + else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) + bnxt_tc_put_encap_handle(bp, flow_node->encap_node); +} + +static int bnxt_tc_get_tunnel_handle(struct bnxt *bp, + struct bnxt_tc_flow *flow, + struct bnxt_tc_flow_node *flow_node, + __le32 *tunnel_handle) +{ + if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP) + return bnxt_tc_get_decap_handle(bp, flow, flow_node, + tunnel_handle); + else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) + return bnxt_tc_get_encap_handle(bp, flow, flow_node, + tunnel_handle); + else + return 0; +} static int __bnxt_tc_del_flow(struct bnxt *bp, struct bnxt_tc_flow_node *flow_node) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; int rc; /* send HWRM cmd to free the flow-id */ @@ -601,6 +1151,9 @@ static int __bnxt_tc_del_flow(struct bnxt *bp, mutex_lock(&tc_info->lock); + /* release references to any tunnel encap/decap nodes */ + bnxt_tc_put_tunnel_handle(bp, &flow_node->flow, flow_node); + /* release reference to l2 node */ bnxt_tc_put_l2_node(bp, flow_node); @@ -633,8 +1186,9 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, struct tc_cls_flower_offload *tc_flow_cmd) { struct bnxt_tc_flow_node *new_node, *old_node; - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; struct bnxt_tc_flow *flow; + __le32 tunnel_handle = 0; __le16 ref_flow_handle; int rc; @@ -672,12 +1226,19 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, if (rc) goto unlock; + /* If the flow involves tunnel encap/decap, get tunnel_handle */ + rc = bnxt_tc_get_tunnel_handle(bp, flow, new_node, &tunnel_handle); + if (rc) + goto put_l2; + /* send HWRM cmd to alloc the flow */ rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle, - &new_node->flow_handle); + tunnel_handle, &new_node->flow_handle); if (rc) - goto put_l2; + goto put_tunnel; + flow->lastused = jiffies; + spin_lock_init(&flow->stats_lock); /* add new flow to flow-table */ rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node, tc_info->flow_ht_params); @@ -689,12 +1250,14 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, hwrm_flow_free: bnxt_hwrm_cfa_flow_free(bp, new_node->flow_handle); +put_tunnel: + bnxt_tc_put_tunnel_handle(bp, flow, new_node); put_l2: bnxt_tc_put_l2_node(bp, new_node); unlock: mutex_unlock(&tc_info->lock); free_node: - kfree(new_node); + kfree_rcu(new_node, rcu); done: netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d", __func__, tc_flow_cmd->cookie, rc); @@ -704,7 +1267,7 @@ done: static int bnxt_tc_del_flow(struct bnxt *bp, struct tc_cls_flower_offload *tc_flow_cmd) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; struct bnxt_tc_flow_node *flow_node; flow_node = rhashtable_lookup_fast(&tc_info->flow_table, @@ -722,10 +1285,11 @@ static int bnxt_tc_del_flow(struct bnxt *bp, static int bnxt_tc_get_flow_stats(struct bnxt *bp, struct tc_cls_flower_offload *tc_flow_cmd) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats; + struct bnxt_tc_info *tc_info = bp->tc_info; struct bnxt_tc_flow_node *flow_node; - struct bnxt_tc_flow_stats stats; - int rc; + struct bnxt_tc_flow *flow; + unsigned long lastused; flow_node = rhashtable_lookup_fast(&tc_info->flow_table, &tc_flow_cmd->cookie, @@ -736,22 +1300,189 @@ static int bnxt_tc_get_flow_stats(struct bnxt *bp, return -1; } - rc = bnxt_hwrm_cfa_flow_stats_get(bp, flow_node->flow_handle, - &flow_node->flow, &stats); + flow = &flow_node->flow; + curr_stats = &flow->stats; + prev_stats = &flow->prev_stats; + + spin_lock(&flow->stats_lock); + stats.packets = curr_stats->packets - prev_stats->packets; + stats.bytes = curr_stats->bytes - prev_stats->bytes; + *prev_stats = *curr_stats; + lastused = flow->lastused; + spin_unlock(&flow->stats_lock); + + tcf_exts_stats_update(tc_flow_cmd->exts, stats.bytes, stats.packets, + lastused); + return 0; +} + +static int +bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows, + struct bnxt_tc_stats_batch stats_batch[]) +{ + struct hwrm_cfa_flow_stats_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_cfa_flow_stats_input req = { 0 }; + __le16 *req_flow_handles = &req.flow_handle_0; + int rc, i; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1); + req.num_flows = cpu_to_le16(num_flows); + for (i = 0; i < num_flows; i++) { + struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node; + + req_flow_handles[i] = flow_node->flow_handle; + } + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) { + __le64 *resp_packets = &resp->packet_0; + __le64 *resp_bytes = &resp->byte_0; + + for (i = 0; i < num_flows; i++) { + stats_batch[i].hw_stats.packets = + le64_to_cpu(resp_packets[i]); + stats_batch[i].hw_stats.bytes = + le64_to_cpu(resp_bytes[i]); + } + } else { + netdev_info(bp->dev, "error rc=%d", rc); + } + + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + +/* Add val to accum while handling a possible wraparound + * of val. Eventhough val is of type u64, its actual width + * is denoted by mask and will wrap-around beyond that width. + */ +static void accumulate_val(u64 *accum, u64 val, u64 mask) +{ +#define low_bits(x, mask) ((x) & (mask)) +#define high_bits(x, mask) ((x) & ~(mask)) + bool wrapped = val < low_bits(*accum, mask); + + *accum = high_bits(*accum, mask) + val; + if (wrapped) + *accum += (mask + 1); +} + +/* The HW counters' width is much less than 64bits. + * Handle possible wrap-around while updating the stat counters + */ +static void bnxt_flow_stats_accum(struct bnxt_tc_info *tc_info, + struct bnxt_tc_flow_stats *acc_stats, + struct bnxt_tc_flow_stats *hw_stats) +{ + accumulate_val(&acc_stats->bytes, hw_stats->bytes, tc_info->bytes_mask); + accumulate_val(&acc_stats->packets, hw_stats->packets, + tc_info->packets_mask); +} + +static int +bnxt_tc_flow_stats_batch_update(struct bnxt *bp, int num_flows, + struct bnxt_tc_stats_batch stats_batch[]) +{ + struct bnxt_tc_info *tc_info = bp->tc_info; + int rc, i; + + rc = bnxt_hwrm_cfa_flow_stats_get(bp, num_flows, stats_batch); if (rc) return rc; - tcf_exts_stats_update(tc_flow_cmd->exts, stats.bytes, stats.packets, 0); + for (i = 0; i < num_flows; i++) { + struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node; + struct bnxt_tc_flow *flow = &flow_node->flow; + + spin_lock(&flow->stats_lock); + bnxt_flow_stats_accum(tc_info, &flow->stats, + &stats_batch[i].hw_stats); + if (flow->stats.packets != flow->prev_stats.packets) + flow->lastused = jiffies; + spin_unlock(&flow->stats_lock); + } + return 0; } +static int +bnxt_tc_flow_stats_batch_prep(struct bnxt *bp, + struct bnxt_tc_stats_batch stats_batch[], + int *num_flows) +{ + struct bnxt_tc_info *tc_info = bp->tc_info; + struct rhashtable_iter *iter = &tc_info->iter; + void *flow_node; + int rc, i; + + rc = rhashtable_walk_start(iter); + if (rc && rc != -EAGAIN) { + i = 0; + goto done; + } + + rc = 0; + for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) { + flow_node = rhashtable_walk_next(iter); + if (IS_ERR(flow_node)) { + i = 0; + if (PTR_ERR(flow_node) == -EAGAIN) { + continue; + } else { + rc = PTR_ERR(flow_node); + goto done; + } + } + + /* No more flows */ + if (!flow_node) + goto done; + + stats_batch[i].flow_node = flow_node; + } +done: + rhashtable_walk_stop(iter); + *num_flows = i; + return rc; +} + +void bnxt_tc_flow_stats_work(struct bnxt *bp) +{ + struct bnxt_tc_info *tc_info = bp->tc_info; + int num_flows, rc; + + num_flows = atomic_read(&tc_info->flow_table.nelems); + if (!num_flows) + return; + + rhashtable_walk_enter(&tc_info->flow_table, &tc_info->iter); + + for (;;) { + rc = bnxt_tc_flow_stats_batch_prep(bp, tc_info->stats_batch, + &num_flows); + if (rc) { + if (rc == -EAGAIN) + continue; + break; + } + + if (!num_flows) + break; + + bnxt_tc_flow_stats_batch_update(bp, num_flows, + tc_info->stats_batch); + } + + rhashtable_walk_exit(&tc_info->iter); +} + int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid, struct tc_cls_flower_offload *cls_flower) { int rc = 0; - if (!is_classid_clsact_ingress(cls_flower->common.classid) || - cls_flower->common.chain_index) + if (cls_flower->common.chain_index) return -EOPNOTSUPP; switch (cls_flower->command) { @@ -784,19 +1515,37 @@ static const struct rhashtable_params bnxt_tc_l2_ht_params = { .automatic_shrinking = true }; +static const struct rhashtable_params bnxt_tc_decap_l2_ht_params = { + .head_offset = offsetof(struct bnxt_tc_l2_node, node), + .key_offset = offsetof(struct bnxt_tc_l2_node, key), + .key_len = BNXT_TC_L2_KEY_LEN, + .automatic_shrinking = true +}; + +static const struct rhashtable_params bnxt_tc_tunnel_ht_params = { + .head_offset = offsetof(struct bnxt_tc_tunnel_node, node), + .key_offset = offsetof(struct bnxt_tc_tunnel_node, key), + .key_len = sizeof(struct ip_tunnel_key), + .automatic_shrinking = true +}; + /* convert counter width in bits to a mask */ #define mask(width) ((u64)~0 >> (64 - (width))) int bnxt_init_tc(struct bnxt *bp) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info; int rc; - if (bp->hwrm_spec_code < 0x10800) { + if (bp->hwrm_spec_code < 0x10803) { netdev_warn(bp->dev, "Firmware does not support TC flower offload.\n"); return -ENOTSUPP; } + + tc_info = kzalloc(sizeof(*tc_info), GFP_KERNEL); + if (!tc_info) + return -ENOMEM; mutex_init(&tc_info->lock); /* Counter widths are programmed by FW */ @@ -806,33 +1555,62 @@ int bnxt_init_tc(struct bnxt *bp) tc_info->flow_ht_params = bnxt_tc_flow_ht_params; rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params); if (rc) - return rc; + goto free_tc_info; tc_info->l2_ht_params = bnxt_tc_l2_ht_params; rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params); if (rc) goto destroy_flow_table; + tc_info->decap_l2_ht_params = bnxt_tc_decap_l2_ht_params; + rc = rhashtable_init(&tc_info->decap_l2_table, + &tc_info->decap_l2_ht_params); + if (rc) + goto destroy_l2_table; + + tc_info->decap_ht_params = bnxt_tc_tunnel_ht_params; + rc = rhashtable_init(&tc_info->decap_table, + &tc_info->decap_ht_params); + if (rc) + goto destroy_decap_l2_table; + + tc_info->encap_ht_params = bnxt_tc_tunnel_ht_params; + rc = rhashtable_init(&tc_info->encap_table, + &tc_info->encap_ht_params); + if (rc) + goto destroy_decap_table; + tc_info->enabled = true; bp->dev->hw_features |= NETIF_F_HW_TC; bp->dev->features |= NETIF_F_HW_TC; + bp->tc_info = tc_info; return 0; +destroy_decap_table: + rhashtable_destroy(&tc_info->decap_table); +destroy_decap_l2_table: + rhashtable_destroy(&tc_info->decap_l2_table); +destroy_l2_table: + rhashtable_destroy(&tc_info->l2_table); destroy_flow_table: rhashtable_destroy(&tc_info->flow_table); +free_tc_info: + kfree(tc_info); return rc; } void bnxt_shutdown_tc(struct bnxt *bp) { - struct bnxt_tc_info *tc_info = &bp->tc_info; + struct bnxt_tc_info *tc_info = bp->tc_info; - if (!tc_info->enabled) + if (!bnxt_tc_flower_enabled(bp)) return; rhashtable_destroy(&tc_info->flow_table); rhashtable_destroy(&tc_info->l2_table); + rhashtable_destroy(&tc_info->decap_l2_table); + rhashtable_destroy(&tc_info->decap_table); + rhashtable_destroy(&tc_info->encap_table); + kfree(tc_info); + bp->tc_info = NULL; } - -#else -#endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h index 6c4c1ed279ef..97e09a880693 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h @@ -12,6 +12,8 @@ #ifdef CONFIG_BNXT_FLOWER_OFFLOAD +#include <net/ip_tunnels.h> + /* Structs used for storing the filter/actions of the TC cmd. */ struct bnxt_tc_l2_key { @@ -50,6 +52,13 @@ struct bnxt_tc_l4_key { }; }; +struct bnxt_tc_tunnel_key { + struct bnxt_tc_l2_key l2; + struct bnxt_tc_l3_key l3; + struct bnxt_tc_l4_key l4; + __be32 id; +}; + struct bnxt_tc_actions { u32 flags; #define BNXT_TC_ACTION_FLAG_FWD BIT(0) @@ -57,16 +66,16 @@ struct bnxt_tc_actions { #define BNXT_TC_ACTION_FLAG_PUSH_VLAN BIT(3) #define BNXT_TC_ACTION_FLAG_POP_VLAN BIT(4) #define BNXT_TC_ACTION_FLAG_DROP BIT(5) +#define BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP BIT(6) +#define BNXT_TC_ACTION_FLAG_TUNNEL_DECAP BIT(7) u16 dst_fid; struct net_device *dst_dev; __be16 push_vlan_tpid; __be16 push_vlan_tci; -}; -struct bnxt_tc_flow_stats { - u64 packets; - u64 bytes; + /* tunnel encap */ + struct ip_tunnel_key tun_encap_key; }; struct bnxt_tc_flow { @@ -76,6 +85,16 @@ struct bnxt_tc_flow { #define BNXT_TC_FLOW_FLAGS_IPV6_ADDRS BIT(3) #define BNXT_TC_FLOW_FLAGS_PORTS BIT(4) #define BNXT_TC_FLOW_FLAGS_ICMP BIT(5) +#define BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS BIT(6) +#define BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS BIT(7) +#define BNXT_TC_FLOW_FLAGS_TUNL_IPV6_ADDRS BIT(8) +#define BNXT_TC_FLOW_FLAGS_TUNL_PORTS BIT(9) +#define BNXT_TC_FLOW_FLAGS_TUNL_ID BIT(10) +#define BNXT_TC_FLOW_FLAGS_TUNNEL (BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS | \ + BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS | \ + BNXT_TC_FLOW_FLAGS_TUNL_IPV6_ADDRS |\ + BNXT_TC_FLOW_FLAGS_TUNL_PORTS |\ + BNXT_TC_FLOW_FLAGS_TUNL_ID) /* flow applicable to pkts ingressing on this fid */ u16 src_fid; @@ -85,6 +104,8 @@ struct bnxt_tc_flow { struct bnxt_tc_l3_key l3_mask; struct bnxt_tc_l4_key l4_key; struct bnxt_tc_l4_key l4_mask; + struct ip_tunnel_key tun_key; + struct ip_tunnel_key tun_mask; struct bnxt_tc_actions actions; @@ -93,13 +114,39 @@ struct bnxt_tc_flow { /* previous snap-shot of stats */ struct bnxt_tc_flow_stats prev_stats; unsigned long lastused; /* jiffies */ + /* for calculating delta from prev_stats and + * updating prev_stats atomically. + */ + spinlock_t stats_lock; +}; + +/* Tunnel encap/decap hash table + * This table is used to maintain a list of flows that use + * the same tunnel encap/decap params (ip_daddrs, vni, udp_dport) + * and the FW returned handle. + * A separate table is maintained for encap and decap + */ +struct bnxt_tc_tunnel_node { + struct ip_tunnel_key key; + struct rhash_head node; + + /* tunnel l2 info */ + struct bnxt_tc_l2_key l2_info; + +#define INVALID_TUNNEL_HANDLE cpu_to_le32(0xffffffff) + /* tunnel handle returned by FW */ + __le32 tunnel_handle; + + u32 refcount; + struct rcu_head rcu; }; /* L2 hash table - * This data-struct is used for L2-flow table. - * The L2 part of a flow is stored in a hash table. + * The same data-struct is used for L2-flow table and L2-tunnel table. + * The L2 part of a flow or tunnel is stored in a hash table. * A flow that shares the same L2 key/mask with an - * already existing flow must refer to it's flow handle. + * already existing flow/tunnel must refer to it's flow handle or + * decap_filter_id respectively. */ struct bnxt_tc_l2_node { /* hash key: first 16b of key */ @@ -110,7 +157,7 @@ struct bnxt_tc_l2_node { /* a linked list of flows that share the same l2 key */ struct list_head common_l2_flows; - /* number of flows sharing the l2 key */ + /* number of flows/tunnels sharing the l2 key */ u16 refcount; struct rcu_head rcu; @@ -130,6 +177,16 @@ struct bnxt_tc_flow_node { /* for the shared_flows list maintained in l2_node */ struct list_head l2_list_node; + /* tunnel encap related */ + struct bnxt_tc_tunnel_node *encap_node; + + /* tunnel decap related */ + struct bnxt_tc_tunnel_node *decap_node; + /* L2 node in tunnel-l2 hashtable that shares flow's tunnel l2 key */ + struct bnxt_tc_l2_node *decap_l2_node; + /* for the shared_flows list maintained in tunnel decap l2_node */ + struct list_head decap_l2_list_node; + struct rcu_head rcu; }; @@ -137,6 +194,12 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid, struct tc_cls_flower_offload *cls_flower); int bnxt_init_tc(struct bnxt *bp); void bnxt_shutdown_tc(struct bnxt *bp); +void bnxt_tc_flow_stats_work(struct bnxt *bp); + +static inline bool bnxt_tc_flower_enabled(struct bnxt *bp) +{ + return bp->tc_info && bp->tc_info->enabled; +} #else /* CONFIG_BNXT_FLOWER_OFFLOAD */ @@ -154,5 +217,14 @@ static inline int bnxt_init_tc(struct bnxt *bp) static inline void bnxt_shutdown_tc(struct bnxt *bp) { } + +static inline void bnxt_tc_flow_stats_work(struct bnxt *bp) +{ +} + +static inline bool bnxt_tc_flower_enabled(struct bnxt *bp) +{ + return false; +} #endif /* CONFIG_BNXT_FLOWER_OFFLOAD */ #endif /* BNXT_TC_H */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index e75db04c6cdc..69186d188c43 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -16,6 +16,7 @@ #include "bnxt_hsi.h" #include "bnxt.h" #include "bnxt_vfr.h" +#include "bnxt_devlink.h" #include "bnxt_tc.h" #ifdef CONFIG_BNXT_SRIOV @@ -115,13 +116,17 @@ bnxt_vf_rep_get_stats64(struct net_device *dev, stats->tx_bytes = vf_rep->tx_stats.bytes; } -static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) +static int bnxt_vf_rep_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, + void *cb_priv) { - struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + struct bnxt_vf_rep *vf_rep = cb_priv; struct bnxt *bp = vf_rep->bp; int vf_fid = bp->pf.vf[vf_rep->vf_idx].fw_fid; + if (!bnxt_tc_flower_enabled(vf_rep->bp) || !tc_can_offload(bp->dev)) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_CLSFLOWER: return bnxt_tc_setup_flower(bp, vf_fid, type_data); @@ -130,6 +135,39 @@ static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, } } +static int bnxt_vf_rep_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, + bnxt_vf_rep_setup_tc_block_cb, + vf_rep, vf_rep); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, + bnxt_vf_rep_setup_tc_block_cb, vf_rep); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return bnxt_vf_rep_setup_tc_block(dev, type_data); + default: + return -EOPNOTSUPP; + } +} + struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code) { u16 vf_idx; @@ -416,7 +454,7 @@ err: } /* Devlink related routines */ -static int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode) +int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct bnxt *bp = bnxt_get_bp_from_dl(devlink); @@ -424,7 +462,7 @@ static int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode) return 0; } -static int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode) +int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode) { struct bnxt *bp = bnxt_get_bp_from_dl(devlink); int rc = 0; @@ -462,52 +500,4 @@ done: return rc; } -static const struct devlink_ops bnxt_dl_ops = { - .eswitch_mode_set = bnxt_dl_eswitch_mode_set, - .eswitch_mode_get = bnxt_dl_eswitch_mode_get -}; - -int bnxt_dl_register(struct bnxt *bp) -{ - struct devlink *dl; - int rc; - - if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV)) - return 0; - - if (bp->hwrm_spec_code < 0x10800) { - netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n"); - return -ENOTSUPP; - } - - dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl)); - if (!dl) { - netdev_warn(bp->dev, "devlink_alloc failed"); - return -ENOMEM; - } - - bnxt_link_bp_to_dl(bp, dl); - bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; - rc = devlink_register(dl, &bp->pdev->dev); - if (rc) { - bnxt_link_bp_to_dl(bp, NULL); - devlink_free(dl); - netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc); - return rc; - } - - return 0; -} - -void bnxt_dl_unregister(struct bnxt *bp) -{ - struct devlink *dl = bp->dl; - - if (!dl) - return; - - devlink_unregister(dl); - devlink_free(dl); -} - #endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h index 7787cd24606a..fb06bbe70e42 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h @@ -14,31 +14,6 @@ #define MAX_CFA_CODE 65536 -/* Struct to hold housekeeping info needed by devlink interface */ -struct bnxt_dl { - struct bnxt *bp; /* back ptr to the controlling dev */ -}; - -static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl) -{ - return ((struct bnxt_dl *)devlink_priv(dl))->bp; -} - -/* To clear devlink pointer from bp, pass NULL dl */ -static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl) -{ - bp->dl = dl; - - /* add a back pointer in dl to bp */ - if (dl) { - struct bnxt_dl *bp_dl = devlink_priv(dl); - - bp_dl->bp = bp; - } -} - -int bnxt_dl_register(struct bnxt *bp); -void bnxt_dl_unregister(struct bnxt *bp); void bnxt_vf_reps_destroy(struct bnxt *bp); void bnxt_vf_reps_close(struct bnxt *bp); void bnxt_vf_reps_open(struct bnxt *bp); @@ -53,16 +28,10 @@ static inline u16 bnxt_vf_rep_get_fid(struct net_device *dev) return bp->pf.vf[vf_rep->vf_idx].fw_fid; } -#else - -static inline int bnxt_dl_register(struct bnxt *bp) -{ - return 0; -} +int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode); +int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode); -static inline void bnxt_dl_unregister(struct bnxt *bp) -{ -} +#else static inline void bnxt_vf_reps_close(struct bnxt *bp) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index d8f0c837b72c..261e5847557a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -94,6 +94,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, xdp.data_hard_start = *data_ptr - offset; xdp.data = *data_ptr; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = *data_ptr + *len; orig_data = xdp.data; mapping = rx_buf->mapping - bp->rx_dma_offset; @@ -207,7 +208,7 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) return 0; } -int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp) +int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct bnxt *bp = netdev_priv(dev); int rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 12a5ad66b564..414b748038ca 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -16,6 +16,6 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts); bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, struct page *page, u8 **data_ptr, unsigned int *len, u8 *event); -int bnxt_xdp(struct net_device *dev, struct netdev_xdp *xdp); +int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp); #endif diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 9cebca896913..24b4f4ceceef 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -488,15 +488,13 @@ static void bcmgenet_complete(struct net_device *dev) static int bcmgenet_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { - struct bcmgenet_priv *priv = netdev_priv(dev); - if (!netif_running(dev)) return -EINVAL; - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - phy_ethtool_ksettings_get(priv->phydev, cmd); + phy_ethtool_ksettings_get(dev->phydev, cmd); return 0; } @@ -504,15 +502,13 @@ static int bcmgenet_get_link_ksettings(struct net_device *dev, static int bcmgenet_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { - struct bcmgenet_priv *priv = netdev_priv(dev); - if (!netif_running(dev)) return -EINVAL; - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_ksettings_set(priv->phydev, cmd); + return phy_ethtool_ksettings_set(dev->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -1042,11 +1038,14 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) if (GENET_IS_V1(priv)) return -EOPNOTSUPP; + if (!dev->phydev) + return -ENODEV; + e->eee_enabled = p->eee_enabled; e->eee_active = p->eee_active; e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER); - return phy_ethtool_get_eee(priv->phydev, e); + return phy_ethtool_get_eee(dev->phydev, e); } static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) @@ -1058,12 +1057,15 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) if (GENET_IS_V1(priv)) return -EOPNOTSUPP; + if (!dev->phydev) + return -ENODEV; + p->eee_enabled = e->eee_enabled; if (!p->eee_enabled) { bcmgenet_eee_enable_set(dev, false); } else { - ret = phy_init_eee(priv->phydev, 0); + ret = phy_init_eee(dev->phydev, 0); if (ret) { netif_err(priv, hw, dev, "EEE initialization failed\n"); return ret; @@ -1073,7 +1075,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) bcmgenet_eee_enable_set(dev, true); } - return phy_ethtool_set_eee(priv->phydev, e); + return phy_ethtool_set_eee(dev->phydev, e); } /* standard ethtool support functions. */ @@ -1107,7 +1109,7 @@ static int bcmgenet_power_down(struct bcmgenet_priv *priv, switch (mode) { case GENET_POWER_CABLE_SENSE: - phy_detach(priv->phydev); + phy_detach(priv->dev->phydev); break; case GENET_POWER_WOL_MAGIC: @@ -1172,7 +1174,6 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, } bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT); bcmgenet_phy_power_set(priv->dev, true); - bcmgenet_mii_reset(priv->dev); break; case GENET_POWER_CABLE_SENSE: @@ -1193,15 +1194,13 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, /* ioctl handle special commands that are not present in ethtool. */ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - struct bcmgenet_priv *priv = netdev_priv(dev); - if (!netif_running(dev)) return -EINVAL; - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_mii_ioctl(priv->phydev, rq, cmd); + return phy_mii_ioctl(dev->phydev, rq, cmd); } static struct enet_cb *bcmgenet_get_txcb(struct bcmgenet_priv *priv, @@ -1405,11 +1404,10 @@ static unsigned int bcmgenet_tx_reclaim(struct net_device *dev, struct bcmgenet_tx_ring *ring) { unsigned int released; - unsigned long flags; - spin_lock_irqsave(&ring->lock, flags); + spin_lock_bh(&ring->lock); released = __bcmgenet_tx_reclaim(dev, ring); - spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock_bh(&ring->lock); return released; } @@ -1420,15 +1418,14 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget) container_of(napi, struct bcmgenet_tx_ring, napi); unsigned int work_done = 0; struct netdev_queue *txq; - unsigned long flags; - spin_lock_irqsave(&ring->lock, flags); + spin_lock(&ring->lock); work_done = __bcmgenet_tx_reclaim(ring->priv->dev, ring); if (ring->free_bds > (MAX_SKB_FRAGS + 1)) { txq = netdev_get_tx_queue(ring->priv->dev, ring->queue); netif_tx_wake_queue(txq); } - spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock(&ring->lock); if (work_done == 0) { napi_complete(napi); @@ -1523,7 +1520,6 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) struct bcmgenet_tx_ring *ring = NULL; struct enet_cb *tx_cb_ptr; struct netdev_queue *txq; - unsigned long flags = 0; int nr_frags, index; dma_addr_t mapping; unsigned int size; @@ -1550,7 +1546,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) nr_frags = skb_shinfo(skb)->nr_frags; - spin_lock_irqsave(&ring->lock, flags); + spin_lock(&ring->lock); if (ring->free_bds <= (nr_frags + 1)) { if (!netif_tx_queue_stopped(txq)) { netif_tx_stop_queue(txq); @@ -1584,8 +1580,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) for (i = 0; i <= nr_frags; i++) { tx_cb_ptr = bcmgenet_get_txcb(priv, ring); - if (unlikely(!tx_cb_ptr)) - BUG(); + BUG_ON(!tx_cb_ptr); if (!i) { /* Transmit single SKB or head of fragment list */ @@ -1645,7 +1640,7 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) bcmgenet_tdma_ring_writel(priv, ring->index, ring->prod_index, TDMA_PROD_INDEX); out: - spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock(&ring->lock); return ret; @@ -1935,12 +1930,8 @@ static void umac_enable_set(struct bcmgenet_priv *priv, u32 mask, bool enable) usleep_range(1000, 2000); } -static int reset_umac(struct bcmgenet_priv *priv) +static void reset_umac(struct bcmgenet_priv *priv) { - struct device *kdev = &priv->pdev->dev; - unsigned int timeout = 0; - u32 reg; - /* 7358a0/7552a0: bad default in RBUF_FLUSH_CTRL.umac_sw_rst */ bcmgenet_rbuf_ctrl_set(priv, 0); udelay(10); @@ -1948,23 +1939,10 @@ static int reset_umac(struct bcmgenet_priv *priv) /* disable MAC while updating its registers */ bcmgenet_umac_writel(priv, 0, UMAC_CMD); - /* issue soft reset, wait for it to complete */ - bcmgenet_umac_writel(priv, CMD_SW_RESET, UMAC_CMD); - while (timeout++ < 1000) { - reg = bcmgenet_umac_readl(priv, UMAC_CMD); - if (!(reg & CMD_SW_RESET)) - return 0; - - udelay(1); - } - - if (timeout == 1000) { - dev_err(kdev, - "timeout waiting for MAC to come out of reset\n"); - return -ETIMEDOUT; - } - - return 0; + /* issue soft reset with (rg)mii loopback to ensure a stable rxclk */ + bcmgenet_umac_writel(priv, CMD_SW_RESET | CMD_LCL_LOOP_EN, UMAC_CMD); + udelay(2); + bcmgenet_umac_writel(priv, 0, UMAC_CMD); } static void bcmgenet_intr_disable(struct bcmgenet_priv *priv) @@ -1994,20 +1972,16 @@ static void bcmgenet_link_intr_enable(struct bcmgenet_priv *priv) bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); } -static int init_umac(struct bcmgenet_priv *priv) +static void init_umac(struct bcmgenet_priv *priv) { struct device *kdev = &priv->pdev->dev; - int ret; u32 reg; u32 int0_enable = 0; dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n"); - ret = reset_umac(priv); - if (ret) - return ret; + reset_umac(priv); - bcmgenet_umac_writel(priv, 0, UMAC_CMD); /* clear tx/rx counter */ bcmgenet_umac_writel(priv, MIB_RESET_RX | MIB_RESET_TX | MIB_RESET_RUNT, @@ -2046,8 +2020,6 @@ static int init_umac(struct bcmgenet_priv *priv) bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); dev_dbg(kdev, "done init umac\n"); - - return 0; } /* Initialize a Tx ring along with corresponding hardware registers */ @@ -2104,6 +2076,10 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, TDMA_WRITE_PTR); bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1, DMA_END_ADDR); + + /* Initialize Tx NAPI */ + netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, + NAPI_POLL_WEIGHT); } /* Initialize a RDMA ring */ @@ -2135,6 +2111,10 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv, if (ret) return ret; + /* Initialize Rx NAPI */ + netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, + NAPI_POLL_WEIGHT); + bcmgenet_rdma_ring_writel(priv, index, 0, RDMA_PROD_INDEX); bcmgenet_rdma_ring_writel(priv, index, 0, RDMA_CONS_INDEX); bcmgenet_rdma_ring_writel(priv, index, 1, DMA_MBUF_DONE_THRESH); @@ -2159,50 +2139,27 @@ static int bcmgenet_init_rx_ring(struct bcmgenet_priv *priv, return ret; } -static void bcmgenet_init_tx_napi(struct bcmgenet_priv *priv) -{ - unsigned int i; - struct bcmgenet_tx_ring *ring; - - for (i = 0; i < priv->hw_params->tx_queues; ++i) { - ring = &priv->tx_rings[i]; - netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); - } - - ring = &priv->tx_rings[DESC_INDEX]; - netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); -} - static void bcmgenet_enable_tx_napi(struct bcmgenet_priv *priv) { unsigned int i; - u32 int0_enable = UMAC_IRQ_TXDMA_DONE; - u32 int1_enable = 0; struct bcmgenet_tx_ring *ring; for (i = 0; i < priv->hw_params->tx_queues; ++i) { ring = &priv->tx_rings[i]; napi_enable(&ring->napi); - int1_enable |= (1 << i); + ring->int_enable(ring); } ring = &priv->tx_rings[DESC_INDEX]; napi_enable(&ring->napi); - - bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); - bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR); + ring->int_enable(ring); } static void bcmgenet_disable_tx_napi(struct bcmgenet_priv *priv) { unsigned int i; - u32 int0_disable = UMAC_IRQ_TXDMA_DONE; - u32 int1_disable = 0xffff; struct bcmgenet_tx_ring *ring; - bcmgenet_intrl2_0_writel(priv, int0_disable, INTRL2_CPU_MASK_SET); - bcmgenet_intrl2_1_writel(priv, int1_disable, INTRL2_CPU_MASK_SET); - for (i = 0; i < priv->hw_params->tx_queues; ++i) { ring = &priv->tx_rings[i]; napi_disable(&ring->napi); @@ -2286,9 +2243,6 @@ static void bcmgenet_init_tx_queues(struct net_device *dev) bcmgenet_tdma_writel(priv, dma_priority[1], DMA_PRIORITY_1); bcmgenet_tdma_writel(priv, dma_priority[2], DMA_PRIORITY_2); - /* Initialize Tx NAPI */ - bcmgenet_init_tx_napi(priv); - /* Enable Tx queues */ bcmgenet_tdma_writel(priv, ring_cfg, DMA_RING_CFG); @@ -2298,50 +2252,27 @@ static void bcmgenet_init_tx_queues(struct net_device *dev) bcmgenet_tdma_writel(priv, dma_ctrl, DMA_CTRL); } -static void bcmgenet_init_rx_napi(struct bcmgenet_priv *priv) -{ - unsigned int i; - struct bcmgenet_rx_ring *ring; - - for (i = 0; i < priv->hw_params->rx_queues; ++i) { - ring = &priv->rx_rings[i]; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64); - } - - ring = &priv->rx_rings[DESC_INDEX]; - netif_napi_add(priv->dev, &ring->napi, bcmgenet_rx_poll, 64); -} - static void bcmgenet_enable_rx_napi(struct bcmgenet_priv *priv) { unsigned int i; - u32 int0_enable = UMAC_IRQ_RXDMA_DONE; - u32 int1_enable = 0; struct bcmgenet_rx_ring *ring; for (i = 0; i < priv->hw_params->rx_queues; ++i) { ring = &priv->rx_rings[i]; napi_enable(&ring->napi); - int1_enable |= (1 << (UMAC_IRQ1_RX_INTR_SHIFT + i)); + ring->int_enable(ring); } ring = &priv->rx_rings[DESC_INDEX]; napi_enable(&ring->napi); - - bcmgenet_intrl2_0_writel(priv, int0_enable, INTRL2_CPU_MASK_CLEAR); - bcmgenet_intrl2_1_writel(priv, int1_enable, INTRL2_CPU_MASK_CLEAR); + ring->int_enable(ring); } static void bcmgenet_disable_rx_napi(struct bcmgenet_priv *priv) { unsigned int i; - u32 int0_disable = UMAC_IRQ_RXDMA_DONE; - u32 int1_disable = 0xffff << UMAC_IRQ1_RX_INTR_SHIFT; struct bcmgenet_rx_ring *ring; - bcmgenet_intrl2_0_writel(priv, int0_disable, INTRL2_CPU_MASK_SET); - bcmgenet_intrl2_1_writel(priv, int1_disable, INTRL2_CPU_MASK_SET); - for (i = 0; i < priv->hw_params->rx_queues; ++i) { ring = &priv->rx_rings[i]; napi_disable(&ring->napi); @@ -2414,9 +2345,6 @@ static int bcmgenet_init_rx_queues(struct net_device *dev) ring_cfg |= (1 << DESC_INDEX); dma_ctrl |= (1 << (DESC_INDEX + DMA_RING_BUF_EN_SHIFT)); - /* Initialize Rx NAPI */ - bcmgenet_init_rx_napi(priv); - /* Enable rings */ bcmgenet_rdma_writel(priv, ring_cfg, DMA_RING_CFG); @@ -2505,9 +2433,6 @@ static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) bcmgenet_fini_rx_napi(priv); bcmgenet_fini_tx_napi(priv); - /* disable DMA */ - bcmgenet_dma_teardown(priv); - for (i = 0; i < priv->num_tx_bds; i++) { cb = priv->tx_cbs + i; skb = bcmgenet_free_tx_cb(&priv->pdev->dev, cb); @@ -2590,27 +2515,20 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) /* Interrupt bottom half */ static void bcmgenet_irq_task(struct work_struct *work) { - unsigned long flags; unsigned int status; struct bcmgenet_priv *priv = container_of( work, struct bcmgenet_priv, bcmgenet_irq_work); netif_dbg(priv, intr, priv->dev, "%s\n", __func__); - spin_lock_irqsave(&priv->lock, flags); + spin_lock_irq(&priv->lock); status = priv->irq0_stat; priv->irq0_stat = 0; - spin_unlock_irqrestore(&priv->lock, flags); - - if (status & UMAC_IRQ_MPD_R) { - netif_dbg(priv, wol, priv->dev, - "magic packet detected, waking up\n"); - bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC); - } + spin_unlock_irq(&priv->lock); /* Link UP/DOWN event */ if (status & UMAC_IRQ_LINK_EVENT) - phy_mac_interrupt(priv->phydev, + phy_mac_interrupt(priv->dev->phydev, !!(status & UMAC_IRQ_LINK_UP)); } @@ -2698,23 +2616,13 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } } - if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R | - UMAC_IRQ_PHY_DET_F | - UMAC_IRQ_LINK_EVENT | - UMAC_IRQ_HFB_SM | - UMAC_IRQ_HFB_MM)) { - /* all other interested interrupts handled in bottom half */ - schedule_work(&priv->bcmgenet_irq_work); - } - if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) && status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) { wake_up(&priv->wq); } /* all other interested interrupts handled in bottom half */ - status &= (UMAC_IRQ_LINK_EVENT | - UMAC_IRQ_MPD_R); + status &= UMAC_IRQ_LINK_EVENT; if (status) { /* Save irq status for bottom-half processing. */ spin_lock_irqsave(&priv->lock, flags); @@ -2849,16 +2757,16 @@ static void bcmgenet_netif_start(struct net_device *dev) /* Start the network engine */ bcmgenet_enable_rx_napi(priv); - bcmgenet_enable_tx_napi(priv); umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true); netif_tx_start_all_queues(dev); + bcmgenet_enable_tx_napi(priv); /* Monitor link interrupts now */ bcmgenet_link_intr_enable(priv); - phy_start(priv->phydev); + phy_start(dev->phydev); } static int bcmgenet_open(struct net_device *dev) @@ -2882,12 +2790,7 @@ static int bcmgenet_open(struct net_device *dev) /* take MAC out of reset */ bcmgenet_umac_reset(priv); - ret = init_umac(priv); - if (ret) - goto err_clk_disable; - - /* disable ethernet MAC while updating its registers */ - umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, false); + init_umac(priv); /* Make sure we reflect the value of CRC_CMD_FWD */ reg = bcmgenet_umac_readl(priv, UMAC_CMD); @@ -2946,6 +2849,7 @@ err_irq1: err_irq0: free_irq(priv->irq0, priv); err_fini_dma: + bcmgenet_dma_teardown(priv); bcmgenet_fini_dma(priv); err_clk_disable: if (priv->internal_phy) @@ -2958,11 +2862,20 @@ static void bcmgenet_netif_stop(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); + bcmgenet_disable_tx_napi(priv); netif_tx_stop_all_queues(dev); - phy_stop(priv->phydev); - bcmgenet_intr_disable(priv); + + /* Disable MAC receive */ + umac_enable_set(priv, CMD_RX_EN, false); + + bcmgenet_dma_teardown(priv); + + /* Disable MAC transmit. TX DMA disabled must be done before this */ + umac_enable_set(priv, CMD_TX_EN, false); + + phy_stop(dev->phydev); bcmgenet_disable_rx_napi(priv); - bcmgenet_disable_tx_napi(priv); + bcmgenet_intr_disable(priv); /* Wait for pending work items to complete. Since interrupts are * disabled no new work will be scheduled. @@ -2973,33 +2886,23 @@ static void bcmgenet_netif_stop(struct net_device *dev) priv->old_speed = -1; priv->old_duplex = -1; priv->old_pause = -1; + + /* tx reclaim */ + bcmgenet_tx_reclaim_all(dev); + bcmgenet_fini_dma(priv); } static int bcmgenet_close(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - int ret; + int ret = 0; netif_dbg(priv, ifdown, dev, "bcmgenet_close\n"); bcmgenet_netif_stop(dev); /* Really kill the PHY state machine and disconnect from it */ - phy_disconnect(priv->phydev); - - /* Disable MAC receive */ - umac_enable_set(priv, CMD_RX_EN, false); - - ret = bcmgenet_dma_teardown(priv); - if (ret) - return ret; - - /* Disable MAC transmit. TX DMA disabled must be done before this */ - umac_enable_set(priv, CMD_TX_EN, false); - - /* tx reclaim */ - bcmgenet_tx_reclaim_all(dev); - bcmgenet_fini_dma(priv); + phy_disconnect(dev->phydev); free_irq(priv->irq0, priv); free_irq(priv->irq1, priv); @@ -3018,7 +2921,6 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) u32 p_index, c_index, intsts, intmsk; struct netdev_queue *txq; unsigned int free_bds; - unsigned long flags; bool txq_stopped; if (!netif_msg_tx_err(priv)) @@ -3026,7 +2928,7 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) txq = netdev_get_tx_queue(priv->dev, ring->queue); - spin_lock_irqsave(&ring->lock, flags); + spin_lock(&ring->lock); if (ring->index == DESC_INDEX) { intsts = ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS); intmsk = UMAC_IRQ_TXDMA_DONE | UMAC_IRQ_TXDMA_MBDONE; @@ -3038,7 +2940,7 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring) p_index = bcmgenet_tdma_ring_readl(priv, ring->index, TDMA_PROD_INDEX); txq_stopped = netif_tx_queue_stopped(txq); free_bds = ring->free_bds; - spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock(&ring->lock); netif_err(priv, tx_err, priv->dev, "Ring %d queue %d status summary\n" "TX queue status: %s, interrupts: %s\n" @@ -3564,9 +3466,7 @@ static int bcmgenet_probe(struct platform_device *pdev) !strcasecmp(phy_mode_str, "internal")) bcmgenet_power_up(priv, GENET_POWER_PASSIVE); - err = reset_umac(priv); - if (err) - goto err_clk_disable; + reset_umac(priv); err = bcmgenet_mii_init(dev); if (err) @@ -3614,7 +3514,7 @@ static int bcmgenet_suspend(struct device *d) { struct net_device *dev = dev_get_drvdata(d); struct bcmgenet_priv *priv = netdev_priv(dev); - int ret; + int ret = 0; if (!netif_running(dev)) return 0; @@ -3622,24 +3522,10 @@ static int bcmgenet_suspend(struct device *d) bcmgenet_netif_stop(dev); if (!device_may_wakeup(d)) - phy_suspend(priv->phydev); + phy_suspend(dev->phydev); netif_device_detach(dev); - /* Disable MAC receive */ - umac_enable_set(priv, CMD_RX_EN, false); - - ret = bcmgenet_dma_teardown(priv); - if (ret) - return ret; - - /* Disable MAC transmit. TX DMA disabled must be done before this */ - umac_enable_set(priv, CMD_TX_EN, false); - - /* tx reclaim */ - bcmgenet_tx_reclaim_all(dev); - bcmgenet_fini_dma(priv); - /* Prepare the device for Wake-on-LAN and switch to the slow clock */ if (device_may_wakeup(d) && priv->wolopts) { ret = bcmgenet_power_down(priv, GENET_POWER_WOL_MAGIC); @@ -3678,21 +3564,17 @@ static int bcmgenet_resume(struct device *d) bcmgenet_umac_reset(priv); - ret = init_umac(priv); - if (ret) - goto out_clk_disable; + init_umac(priv); /* From WOL-enabled suspend, switch to regular clock */ if (priv->wolopts) clk_disable_unprepare(priv->clk_wol); - phy_init_hw(priv->phydev); + phy_init_hw(dev->phydev); + /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev, false); - /* disable ethernet MAC while updating its registers */ - umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, false); - bcmgenet_set_hw_addr(priv, dev->dev_addr); if (priv->internal_phy) { @@ -3720,7 +3602,7 @@ static int bcmgenet_resume(struct device *d) netif_device_attach(dev); if (!device_may_wakeup(d)) - phy_resume(priv->phydev); + phy_resume(dev->phydev); if (priv->eee.eee_enabled) bcmgenet_eee_enable_set(dev, true); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 4c49d0b97748..3c50431ccd2a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -617,7 +617,6 @@ struct bcmgenet_priv { /* MDIO bus variables */ wait_queue_head_t wq; - struct phy_device *phydev; bool internal_phy; struct device_node *phy_dn; struct device_node *mdio_dn; @@ -711,7 +710,6 @@ int bcmgenet_mii_init(struct net_device *dev); int bcmgenet_mii_config(struct net_device *dev, bool init); int bcmgenet_mii_probe(struct net_device *dev); void bcmgenet_mii_exit(struct net_device *dev); -void bcmgenet_mii_reset(struct net_device *dev); void bcmgenet_phy_power_set(struct net_device *dev, bool enable); void bcmgenet_mii_setup(struct net_device *dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 18f5723be2c9..5333274a283c 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -34,7 +34,7 @@ void bcmgenet_mii_setup(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = priv->phydev; + struct phy_device *phydev = dev->phydev; u32 reg, cmd_bits = 0; bool status_changed = false; @@ -121,22 +121,6 @@ static int bcmgenet_fixed_phy_link_update(struct net_device *dev, return 0; } -/* Perform a voluntary PHY software reset, since the EPHY is very finicky about - * not doing it and will start corrupting packets - */ -void bcmgenet_mii_reset(struct net_device *dev) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - - if (GENET_IS_V4(priv)) - return; - - if (priv->phydev) { - phy_init_hw(priv->phydev); - phy_start_aneg(priv->phydev); - } -} - void bcmgenet_phy_power_set(struct net_device *dev, bool enable) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -182,14 +166,14 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) } if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET) - fixed_phy_set_link_update(priv->phydev, + fixed_phy_set_link_update(priv->dev->phydev, bcmgenet_fixed_phy_link_update); } int bcmgenet_mii_config(struct net_device *dev, bool init) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = priv->phydev; + struct phy_device *phydev = dev->phydev; struct device *kdev = &priv->pdev->dev; const char *phy_name = NULL; u32 id_mode_dis = 0; @@ -236,7 +220,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) * capabilities, use that knowledge to also configure the * Reverse MII interface correctly. */ - if ((priv->phydev->supported & PHY_BASIC_FEATURES) == + if ((dev->phydev->supported & PHY_BASIC_FEATURES) == PHY_BASIC_FEATURES) port_ctrl = PORT_MODE_EXT_RVMII_25; else @@ -306,7 +290,7 @@ int bcmgenet_mii_probe(struct net_device *dev) return -ENODEV; } } else { - phydev = priv->phydev; + phydev = dev->phydev; phydev->dev_flags = phy_flags; ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, @@ -317,8 +301,6 @@ int bcmgenet_mii_probe(struct net_device *dev) } } - priv->phydev = phydev; - /* Configure port multiplexer based on what the probed PHY device since * reading the 'max-speed' property determines the maximum supported * PHY speed which is needed for bcmgenet_mii_config() to configure @@ -326,7 +308,7 @@ int bcmgenet_mii_probe(struct net_device *dev) */ ret = bcmgenet_mii_config(dev, true); if (ret) { - phy_disconnect(priv->phydev); + phy_disconnect(dev->phydev); return ret; } @@ -336,7 +318,7 @@ int bcmgenet_mii_probe(struct net_device *dev) * Ethernet MAC ISRs */ if (priv->internal_phy) - priv->phydev->irq = PHY_IGNORE_INTERRUPT; + dev->phydev->irq = PHY_IGNORE_INTERRUPT; return 0; } @@ -545,7 +527,6 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) } - priv->phydev = phydev; priv->phy_interface = pd->phy_interface; return 0; @@ -590,5 +571,4 @@ void bcmgenet_mii_exit(struct net_device *dev) of_phy_deregister_fixed_link(dn); of_node_put(priv->phy_dn); platform_device_unregister(priv->mii_pdev); - platform_device_put(priv->mii_pdev); } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 656e6af70f0a..d8d5f207c759 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -11087,9 +11087,7 @@ static void tg3_timer_init(struct tg3 *tp) tp->asf_multiplier = (HZ / tp->timer_offset) * TG3_FW_UPDATE_FREQ_SEC; - init_timer(&tp->timer); - tp->timer.data = (unsigned long) tp; - tp->timer.function = tg3_timer; + setup_timer(&tp->timer, tg3_timer, (unsigned long)tp); } static void tg3_timer_start(struct tg3 *tp) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 6e13c937d715..a843076597ec 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -1693,9 +1693,9 @@ err_return: /* Timer callbacks */ /* a) IOC timer */ static void -bnad_ioc_timeout(unsigned long data) +bnad_ioc_timeout(struct timer_list *t) { - struct bnad *bnad = (struct bnad *)data; + struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.ioc_timer); unsigned long flags; spin_lock_irqsave(&bnad->bna_lock, flags); @@ -1704,9 +1704,9 @@ bnad_ioc_timeout(unsigned long data) } static void -bnad_ioc_hb_check(unsigned long data) +bnad_ioc_hb_check(struct timer_list *t) { - struct bnad *bnad = (struct bnad *)data; + struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.hb_timer); unsigned long flags; spin_lock_irqsave(&bnad->bna_lock, flags); @@ -1715,9 +1715,9 @@ bnad_ioc_hb_check(unsigned long data) } static void -bnad_iocpf_timeout(unsigned long data) +bnad_iocpf_timeout(struct timer_list *t) { - struct bnad *bnad = (struct bnad *)data; + struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.iocpf_timer); unsigned long flags; spin_lock_irqsave(&bnad->bna_lock, flags); @@ -1726,9 +1726,9 @@ bnad_iocpf_timeout(unsigned long data) } static void -bnad_iocpf_sem_timeout(unsigned long data) +bnad_iocpf_sem_timeout(struct timer_list *t) { - struct bnad *bnad = (struct bnad *)data; + struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.sem_timer); unsigned long flags; spin_lock_irqsave(&bnad->bna_lock, flags); @@ -1748,9 +1748,9 @@ bnad_iocpf_sem_timeout(unsigned long data) /* b) Dynamic Interrupt Moderation Timer */ static void -bnad_dim_timeout(unsigned long data) +bnad_dim_timeout(struct timer_list *t) { - struct bnad *bnad = (struct bnad *)data; + struct bnad *bnad = from_timer(bnad, t, dim_timer); struct bnad_rx_info *rx_info; struct bnad_rx_ctrl *rx_ctrl; int i, j; @@ -1781,9 +1781,9 @@ bnad_dim_timeout(unsigned long data) /* c) Statistics Timer */ static void -bnad_stats_timeout(unsigned long data) +bnad_stats_timeout(struct timer_list *t) { - struct bnad *bnad = (struct bnad *)data; + struct bnad *bnad = from_timer(bnad, t, stats_timer); unsigned long flags; if (!netif_running(bnad->netdev) || @@ -1804,8 +1804,7 @@ bnad_dim_timer_start(struct bnad *bnad) { if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED && !test_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags)) { - setup_timer(&bnad->dim_timer, bnad_dim_timeout, - (unsigned long)bnad); + timer_setup(&bnad->dim_timer, bnad_dim_timeout, 0); set_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags); mod_timer(&bnad->dim_timer, jiffies + msecs_to_jiffies(BNAD_DIM_TIMER_FREQ)); @@ -1823,8 +1822,7 @@ bnad_stats_timer_start(struct bnad *bnad) spin_lock_irqsave(&bnad->bna_lock, flags); if (!test_and_set_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) { - setup_timer(&bnad->stats_timer, bnad_stats_timeout, - (unsigned long)bnad); + timer_setup(&bnad->stats_timer, bnad_stats_timeout, 0); mod_timer(&bnad->stats_timer, jiffies + msecs_to_jiffies(BNAD_STATS_TIMER_FREQ)); } @@ -3692,14 +3690,11 @@ bnad_pci_probe(struct pci_dev *pdev, goto res_free; /* Set up timers */ - setup_timer(&bnad->bna.ioceth.ioc.ioc_timer, bnad_ioc_timeout, - (unsigned long)bnad); - setup_timer(&bnad->bna.ioceth.ioc.hb_timer, bnad_ioc_hb_check, - (unsigned long)bnad); - setup_timer(&bnad->bna.ioceth.ioc.iocpf_timer, bnad_iocpf_timeout, - (unsigned long)bnad); - setup_timer(&bnad->bna.ioceth.ioc.sem_timer, bnad_iocpf_sem_timeout, - (unsigned long)bnad); + timer_setup(&bnad->bna.ioceth.ioc.ioc_timer, bnad_ioc_timeout, 0); + timer_setup(&bnad->bna.ioceth.ioc.hb_timer, bnad_ioc_hb_check, 0); + timer_setup(&bnad->bna.ioceth.ioc.iocpf_timer, bnad_iocpf_timeout, 0); + timer_setup(&bnad->bna.ioceth.ioc.sem_timer, bnad_iocpf_sem_timeout, + 0); /* * Start the chip diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6df2cad61647..72a67f74b97b 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -611,6 +611,9 @@ static int macb_mii_init(struct macb *bp) err_out_unregister_bus: mdiobus_unregister(bp->mii_bus); err_out_free_mdiobus: + of_node_put(bp->phy_node); + if (np && of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); mdiobus_free(bp->mii_bus); err_out: return err; @@ -1218,8 +1221,6 @@ static int macb_poll(struct napi_struct *napi, int budget) status = macb_readl(bp, RSR); macb_writel(bp, RSR, status); - work_done = 0; - netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n", (unsigned long)status, budget); @@ -3552,6 +3553,9 @@ static int macb_probe(struct platform_device *pdev) err_out_unregister_mdio: phy_disconnect(dev->phydev); mdiobus_unregister(bp->mii_bus); + of_node_put(bp->phy_node); + if (np && of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); mdiobus_free(bp->mii_bus); /* Shutdown the PHY if there is a GPIO reset */ @@ -3574,6 +3578,7 @@ static int macb_remove(struct platform_device *pdev) { struct net_device *dev; struct macb *bp; + struct device_node *np = pdev->dev.of_node; dev = platform_get_drvdata(pdev); @@ -3582,6 +3587,8 @@ static int macb_remove(struct platform_device *pdev) if (dev->phydev) phy_disconnect(dev->phydev); mdiobus_unregister(bp->mii_bus); + if (np && of_phy_is_fixed_link(np)) + of_phy_deregister_fixed_link(np); dev->phydev = NULL; mdiobus_free(bp->mii_bus); diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index dcbce6cac63e..63be75eb34d2 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -53,6 +53,7 @@ config THUNDER_NIC_RGX config LIQUIDIO tristate "Cavium LiquidIO support" depends on 64BIT + depends on MAY_USE_DEVLINK imply PTP_1588_CLOCK select FW_LOADER select LIBCRC32C diff --git a/drivers/net/ethernet/cavium/liquidio/Makefile b/drivers/net/ethernet/cavium/liquidio/Makefile index b802896bb2e0..e3fc4645cd8a 100644 --- a/drivers/net/ethernet/cavium/liquidio/Makefile +++ b/drivers/net/ethernet/cavium/liquidio/Makefile @@ -18,7 +18,7 @@ liquidio-$(CONFIG_LIQUIDIO) += lio_ethtool.o \ octeon_droq.o \ octeon_nic.o -liquidio-objs := lio_main.o octeon_console.o $(liquidio-y) +liquidio-objs := lio_main.o octeon_console.o lio_vf_rep.o $(liquidio-y) obj-$(CONFIG_LIQUIDIO_VF) += liquidio_vf.o diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 23f6b60030c5..32ae63b6f20e 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -91,7 +91,7 @@ void octeon_update_tx_completion_counters(void *buf, int reqtype, *bytes_compl += skb->len; } -void octeon_report_sent_bytes_to_bql(void *buf, int reqtype) +int octeon_report_sent_bytes_to_bql(void *buf, int reqtype) { struct octnet_buf_free_info *finfo; struct sk_buff *skb; @@ -112,11 +112,13 @@ void octeon_report_sent_bytes_to_bql(void *buf, int reqtype) break; default: - return; + return 0; } txq = netdev_get_tx_queue(skb->dev, skb_get_queue_mapping(skb)); netdev_tx_sent_queue(txq, skb->len); + + return netif_xmit_stopped(txq); } void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) @@ -141,6 +143,7 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) switch (nctrl->ncmd.s.cmd) { case OCTNET_CMD_CHANGE_DEVFLAGS: case OCTNET_CMD_SET_MULTI_LIST: + case OCTNET_CMD_SET_UC_LIST: break; case OCTNET_CMD_CHANGE_MACADDR: @@ -464,7 +467,6 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)), if (netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; - int packet_was_received; /* Do not proceed if the interface is not in RUNNING state. */ if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) { @@ -567,18 +569,10 @@ liquidio_push_packet(u32 octeon_id __attribute__((unused)), __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag); } - packet_was_received = (napi_gro_receive(napi, skb) != GRO_DROP); - - if (packet_was_received) { - droq->stats.rx_bytes_received += len; - droq->stats.rx_pkts_received++; - } else { - droq->stats.rx_dropped++; - netif_info(lio, rx_err, lio->netdev, - "droq:%d error rx_dropped:%llu\n", - droq->q_no, droq->stats.rx_dropped); - } + napi_gro_receive(napi, skb); + droq->stats.rx_bytes_received += len; + droq->stats.rx_pkts_received++; } else { recv_buffer_free(skb); } diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 5b19826a7e16..6aa0eee88ea5 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -21,6 +21,7 @@ #include <linux/firmware.h> #include <net/vxlan.h> #include <linux/kthread.h> +#include <net/switchdev.h> #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" @@ -34,6 +35,7 @@ #include "cn68xx_device.h" #include "cn23xx_pf_device.h" #include "liquidio_image.h" +#include "lio_vf_rep.h" MODULE_AUTHOR("Cavium Networks, <support@cavium.com>"); MODULE_DESCRIPTION("Cavium LiquidIO Intelligent Server Adapter Driver"); @@ -59,9 +61,9 @@ static int debug = -1; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "NETIF_MSG debug bits"); -static char fw_type[LIO_MAX_FW_TYPE_LEN] = LIO_FW_NAME_TYPE_NIC; +static char fw_type[LIO_MAX_FW_TYPE_LEN] = LIO_FW_NAME_TYPE_AUTO; module_param_string(fw_type, fw_type, sizeof(fw_type), 0444); -MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded. Default \"nic\". Use \"none\" to load firmware from flash."); +MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded (default is \"auto\"), which uses firmware in flash, if present, else loads \"nic\"."); static u32 console_bitmask; module_param(console_bitmask, int, 0644); @@ -83,6 +85,11 @@ static int octeon_console_debug_enabled(u32 console) /* runtime link query interval */ #define LIQUIDIO_LINK_QUERY_INTERVAL_MS 1000 +/* update localtime to octeon firmware every 60 seconds. + * make firmware to use same time reference, so that it will be easy to + * correlate firmware logged events/errors with host events, for debugging. + */ +#define LIO_SYNC_OCTEON_TIME_INTERVAL_MS 60000 struct liquidio_if_cfg_context { int octeon_id; @@ -901,6 +908,121 @@ static inline void update_link_status(struct net_device *netdev, } } +/** + * lio_sync_octeon_time_cb - callback that is invoked when soft command + * sent by lio_sync_octeon_time() has completed successfully or failed + * + * @oct - octeon device structure + * @status - indicates success or failure + * @buf - pointer to the command that was sent to firmware + **/ +static void lio_sync_octeon_time_cb(struct octeon_device *oct, + u32 status, void *buf) +{ + struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; + + if (status) + dev_err(&oct->pci_dev->dev, + "Failed to sync time to octeon; error=%d\n", status); + + octeon_free_soft_command(oct, sc); +} + +/** + * lio_sync_octeon_time - send latest localtime to octeon firmware so that + * firmware will correct it's time, in case there is a time skew + * + * @work: work scheduled to send time update to octeon firmware + **/ +static void lio_sync_octeon_time(struct work_struct *work) +{ + struct cavium_wk *wk = (struct cavium_wk *)work; + struct lio *lio = (struct lio *)wk->ctxptr; + struct octeon_device *oct = lio->oct_dev; + struct octeon_soft_command *sc; + struct timespec64 ts; + struct lio_time *lt; + int ret; + + sc = octeon_alloc_soft_command(oct, sizeof(struct lio_time), 0, 0); + if (!sc) { + dev_err(&oct->pci_dev->dev, + "Failed to sync time to octeon: soft command allocation failed\n"); + return; + } + + lt = (struct lio_time *)sc->virtdptr; + + /* Get time of the day */ + getnstimeofday64(&ts); + lt->sec = ts.tv_sec; + lt->nsec = ts.tv_nsec; + octeon_swap_8B_data((u64 *)lt, (sizeof(struct lio_time)) / 8); + + sc->iq_no = lio->linfo.txpciq[0].s.q_no; + octeon_prepare_soft_command(oct, sc, OPCODE_NIC, + OPCODE_NIC_SYNC_OCTEON_TIME, 0, 0, 0); + + sc->callback = lio_sync_octeon_time_cb; + sc->callback_arg = sc; + sc->wait_time = 1000; + + ret = octeon_send_soft_command(oct, sc); + if (ret == IQ_SEND_FAILED) { + dev_err(&oct->pci_dev->dev, + "Failed to sync time to octeon: failed to send soft command\n"); + octeon_free_soft_command(oct, sc); + } + + queue_delayed_work(lio->sync_octeon_time_wq.wq, + &lio->sync_octeon_time_wq.wk.work, + msecs_to_jiffies(LIO_SYNC_OCTEON_TIME_INTERVAL_MS)); +} + +/** + * setup_sync_octeon_time_wq - Sets up the work to periodically update + * local time to octeon firmware + * + * @netdev - network device which should send time update to firmware + **/ +static inline int setup_sync_octeon_time_wq(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + + lio->sync_octeon_time_wq.wq = + alloc_workqueue("update-octeon-time", WQ_MEM_RECLAIM, 0); + if (!lio->sync_octeon_time_wq.wq) { + dev_err(&oct->pci_dev->dev, "Unable to create wq to update octeon time\n"); + return -1; + } + INIT_DELAYED_WORK(&lio->sync_octeon_time_wq.wk.work, + lio_sync_octeon_time); + lio->sync_octeon_time_wq.wk.ctxptr = lio; + queue_delayed_work(lio->sync_octeon_time_wq.wq, + &lio->sync_octeon_time_wq.wk.work, + msecs_to_jiffies(LIO_SYNC_OCTEON_TIME_INTERVAL_MS)); + + return 0; +} + +/** + * cleanup_sync_octeon_time_wq - stop scheduling and destroy the work created + * to periodically update local time to octeon firmware + * + * @netdev - network device which should send time update to firmware + **/ +static inline void cleanup_sync_octeon_time_wq(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + struct cavium_wq *time_wq = &lio->sync_octeon_time_wq; + + if (time_wq->wq) { + cancel_delayed_work_sync(&time_wq->wk.work); + destroy_workqueue(time_wq->wq); + } +} + static struct octeon_device *get_other_octeon_device(struct octeon_device *oct) { struct octeon_device *other_oct; @@ -1076,19 +1198,13 @@ liquidio_probe(struct pci_dev *pdev, } if (OCTEON_CN23XX_PF(oct_dev)) { - u64 scratch1; u8 bus, device, function; - scratch1 = octeon_read_csr64(oct_dev, CN23XX_SLI_SCRATCH1); - if (!(scratch1 & 4ULL)) { - /* Bit 2 of SLI_SCRATCH_1 is a flag that indicates that - * the lio watchdog kernel thread is running for this - * NIC. Each NIC gets one watchdog kernel thread. + if (atomic_read(oct_dev->adapter_refcount) == 1) { + /* Each NIC gets one watchdog kernel thread. The first + * PF (of each NIC) that gets pci_driver->probe()'d + * creates that thread. */ - scratch1 |= 4ULL; - octeon_write_csr64(oct_dev, CN23XX_SLI_SCRATCH1, - scratch1); - bus = pdev->bus->number; device = PCI_SLOT(pdev->devfn); function = PCI_FUNC(pdev->devfn); @@ -1115,10 +1231,10 @@ liquidio_probe(struct pci_dev *pdev, return 0; } -static bool fw_type_is_none(void) +static bool fw_type_is_auto(void) { - return strncmp(fw_type, LIO_FW_NAME_TYPE_NONE, - sizeof(LIO_FW_NAME_TYPE_NONE)) == 0; + return strncmp(fw_type, LIO_FW_NAME_TYPE_AUTO, + sizeof(LIO_FW_NAME_TYPE_AUTO)) == 0; } /** @@ -1302,7 +1418,7 @@ static void octeon_destroy_resources(struct octeon_device *oct) * Implementation note: only soft-reset the device * if it is a CN6XXX OR the LAST CN23XX device. */ - if (fw_type_is_none()) + if (atomic_read(oct->adapter_fw_state) == FW_IS_PRELOADED) octeon_pci_flr(oct); else if (OCTEON_CN6XXX(oct) || !refcount) oct->fn_list.soft_reset(oct); @@ -1455,6 +1571,7 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) unregister_netdev(netdev); + cleanup_sync_octeon_time_wq(netdev); cleanup_link_status_change_wq(netdev); cleanup_rx_oom_poll_fn(netdev); @@ -1487,6 +1604,8 @@ static int liquidio_stop_nic_module(struct octeon_device *oct) oct->cmd_resp_state = OCT_DRV_OFFLINE; spin_unlock_bh(&oct->cmd_resp_wqlock); + lio_vf_rep_destroy(oct); + for (i = 0; i < oct->ifcount; i++) { lio = GET_LIO(oct->props[i].netdev); for (j = 0; j < oct->num_oqs; j++) @@ -1497,6 +1616,12 @@ static int liquidio_stop_nic_module(struct octeon_device *oct) for (i = 0; i < oct->ifcount; i++) liquidio_destroy_nic_device(oct, i); + if (oct->devlink) { + devlink_unregister(oct->devlink); + devlink_free(oct->devlink); + oct->devlink = NULL; + } + dev_dbg(&oct->pci_dev->dev, "Network interfaces stopped\n"); return 0; } @@ -1514,6 +1639,10 @@ static void liquidio_remove(struct pci_dev *pdev) if (oct_dev->watchdog_task) kthread_stop(oct_dev->watchdog_task); + if (!oct_dev->octeon_id && + oct_dev->fw_info.app_cap_flags & LIQUIDIO_SWITCHDEV_CAP) + lio_vf_rep_modexit(); + if (oct_dev->app_mode && (oct_dev->app_mode == CVM_DRV_NIC_APP)) liquidio_stop_nic_module(oct_dev); @@ -1934,10 +2063,12 @@ static int load_firmware(struct octeon_device *oct) char fw_name[LIO_MAX_FW_FILENAME_LEN]; char *tmp_fw_type; - if (fw_type[0] == '\0') + if (fw_type_is_auto()) { tmp_fw_type = LIO_FW_NAME_TYPE_NIC; - else + strncpy(fw_type, tmp_fw_type, sizeof(fw_type)); + } else { tmp_fw_type = fw_type; + } sprintf(fw_name, "%s%s%s_%s%s", LIO_FW_DIR, LIO_FW_BASE_NAME, octeon_get_conf(oct)->card_name, tmp_fw_type, @@ -2477,7 +2608,8 @@ static void handle_timestamp(struct octeon_device *oct, */ static inline int send_nic_timestamp_pkt(struct octeon_device *oct, struct octnic_data_pkt *ndata, - struct octnet_buf_free_info *finfo) + struct octnet_buf_free_info *finfo, + int xmit_more) { int retval; struct octeon_soft_command *sc; @@ -2512,7 +2644,7 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct, len = (u32)((struct octeon_instr_ih2 *) (&sc->cmd.cmd2.ih2))->dlengsz; - ring_doorbell = 1; + ring_doorbell = !xmit_more; retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd, sc, len, ndata->reqtype); @@ -2546,7 +2678,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) union tx_info *tx_info; int status = 0; int q_idx = 0, iq_no = 0; - int j; + int j, xmit_more = 0; u64 dptr = 0; u32 tag = 0; @@ -2751,17 +2883,19 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) irh->vlan = skb_vlan_tag_get(skb) & 0xfff; } + xmit_more = skb->xmit_more; + if (unlikely(cmdsetup.s.timestamp)) - status = send_nic_timestamp_pkt(oct, &ndata, finfo); + status = send_nic_timestamp_pkt(oct, &ndata, finfo, xmit_more); else - status = octnet_send_nic_data_pkt(oct, &ndata); + status = octnet_send_nic_data_pkt(oct, &ndata, xmit_more); if (status == IQ_SEND_FAILED) goto lio_xmit_failed; netif_info(lio, tx_queued, lio->netdev, "Transmit queued successfully\n"); if (status == IQ_SEND_STOP) - stop_q(lio->netdev, q_idx); + stop_q(netdev, q_idx); netif_trans_update(netdev); @@ -2780,6 +2914,9 @@ lio_xmit_failed: if (dptr) dma_unmap_single(&oct->pci_dev->dev, dptr, ndata.datasize, DMA_TO_DEVICE); + + octeon_ring_doorbell_locked(oct, iq_no); + tx_buffer_free(skb); return NETDEV_TX_OK; } @@ -3186,6 +3323,86 @@ static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx, return 0; } +static int +liquidio_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct lio_devlink_priv *priv; + struct octeon_device *oct; + + priv = devlink_priv(devlink); + oct = priv->oct; + + *mode = oct->eswitch_mode; + + return 0; +} + +static int +liquidio_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + struct lio_devlink_priv *priv; + struct octeon_device *oct; + int ret = 0; + + priv = devlink_priv(devlink); + oct = priv->oct; + + if (!(oct->fw_info.app_cap_flags & LIQUIDIO_SWITCHDEV_CAP)) + return -EINVAL; + + if (oct->eswitch_mode == mode) + return 0; + + switch (mode) { + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + oct->eswitch_mode = mode; + ret = lio_vf_rep_create(oct); + break; + + case DEVLINK_ESWITCH_MODE_LEGACY: + lio_vf_rep_destroy(oct); + oct->eswitch_mode = mode; + break; + + default: + ret = -EINVAL; + } + + return ret; +} + +static const struct devlink_ops liquidio_devlink_ops = { + .eswitch_mode_get = liquidio_eswitch_mode_get, + .eswitch_mode_set = liquidio_eswitch_mode_set, +}; + +static int +lio_pf_switchdev_attr_get(struct net_device *dev, struct switchdev_attr *attr) +{ + struct lio *lio = GET_LIO(dev); + struct octeon_device *oct = lio->oct_dev; + + if (oct->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return -EOPNOTSUPP; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + attr->u.ppid.id_len = ETH_ALEN; + ether_addr_copy(attr->u.ppid.id, + (void *)&lio->linfo.hw_addr + 2); + break; + + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static const struct switchdev_ops lio_pf_switchdev_ops = { + .switchdev_port_attr_get = lio_pf_switchdev_attr_get, +}; + static const struct net_device_ops lionetdevops = { .ndo_open = liquidio_open, .ndo_stop = liquidio_stop, @@ -3303,7 +3520,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) { struct lio *lio = NULL; struct net_device *netdev; - u8 mac[6], i, j; + u8 mac[6], i, j, *fw_ver; struct octeon_soft_command *sc; struct liquidio_if_cfg_context *ctx; struct liquidio_if_cfg_resp *resp; @@ -3315,6 +3532,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) u32 resp_size, ctx_size, data_size; u32 ifidx_or_pfnum; struct lio_version *vdata; + struct devlink *devlink; + struct lio_devlink_priv *lio_devlink; /* This is to handle link status changes */ octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC, @@ -3414,6 +3633,22 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) goto setup_nic_dev_fail; } + /* Verify f/w version (in case of 'auto' loading from flash) */ + fw_ver = octeon_dev->fw_info.liquidio_firmware_version; + if (memcmp(LIQUIDIO_BASE_VERSION, + fw_ver, + strlen(LIQUIDIO_BASE_VERSION))) { + dev_err(&octeon_dev->pci_dev->dev, + "Unmatched firmware version. Expected %s.x, got %s.\n", + LIQUIDIO_BASE_VERSION, fw_ver); + goto setup_nic_dev_fail; + } else if (atomic_read(octeon_dev->adapter_fw_state) == + FW_IS_PRELOADED) { + dev_info(&octeon_dev->pci_dev->dev, + "Using auto-loaded firmware version %s.\n", + fw_ver); + } + octeon_swap_8B_data((u64 *)(&resp->cfg_info), (sizeof(struct liquidio_if_cfg_info)) >> 3); @@ -3444,6 +3679,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) * netdev tasks. */ netdev->netdev_ops = &lionetdevops; + SWITCHDEV_SET_OPS(netdev, &lio_pf_switchdev_ops); lio = GET_LIO(netdev); @@ -3593,6 +3829,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) if (setup_link_status_change_wq(netdev)) goto setup_nic_dev_fail; + if ((octeon_dev->fw_info.app_cap_flags & + LIQUIDIO_TIME_SYNC_CAP) && + setup_sync_octeon_time_wq(netdev)) + goto setup_nic_dev_fail; + if (setup_rx_oom_poll_fn(netdev)) goto setup_nic_dev_fail; @@ -3625,6 +3866,26 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) octeon_free_soft_command(octeon_dev, sc); } + devlink = devlink_alloc(&liquidio_devlink_ops, + sizeof(struct lio_devlink_priv)); + if (!devlink) { + dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n"); + goto setup_nic_wait_intr; + } + + lio_devlink = devlink_priv(devlink); + lio_devlink->oct = octeon_dev; + + if (devlink_register(devlink, &octeon_dev->pci_dev->dev)) { + devlink_free(devlink); + dev_err(&octeon_dev->pci_dev->dev, + "devlink registration failed\n"); + goto setup_nic_wait_intr; + } + + octeon_dev->devlink = devlink; + octeon_dev->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; + return 0; setup_nic_dev_fail: @@ -3719,6 +3980,7 @@ static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs) } if (!num_vfs) { + lio_vf_rep_destroy(oct); ret = lio_pci_sriov_disable(oct); } else if (num_vfs > oct->sriov_info.max_vfs) { dev_err(&oct->pci_dev->dev, @@ -3730,6 +3992,10 @@ static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs) ret = octeon_enable_sriov(oct); dev_info(&oct->pci_dev->dev, "oct->pf_num:%d num_vfs:%d\n", oct->pf_num, num_vfs); + ret = lio_vf_rep_create(oct); + if (ret) + dev_info(&oct->pci_dev->dev, + "vf representor create failed"); } return ret; @@ -3767,6 +4033,18 @@ static int liquidio_init_nic_module(struct octeon_device *oct) goto octnet_init_failure; } + /* Call vf_rep_modinit if the firmware is switchdev capable + * and do it from the first liquidio function probed. + */ + if (!oct->octeon_id && + oct->fw_info.app_cap_flags & LIQUIDIO_SWITCHDEV_CAP) { + retval = lio_vf_rep_modinit(); + if (retval) { + liquidio_stop_nic_module(oct); + goto octnet_init_failure; + } + } + liquidio_ptp_init(oct); dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n"); @@ -3882,9 +4160,9 @@ octeon_recv_vf_drv_notice(struct octeon_recv_info *recv_info, void *buf) static int octeon_device_init(struct octeon_device *octeon_dev) { int j, ret; - int fw_loaded = 0; char bootcmd[] = "\n"; char *dbg_enb = NULL; + enum lio_fw_state fw_state; struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)octeon_dev->priv; atomic_set(&octeon_dev->status, OCT_DEV_BEGIN_STATE); @@ -3916,24 +4194,40 @@ static int octeon_device_init(struct octeon_device *octeon_dev) octeon_dev->app_mode = CVM_DRV_INVALID_APP; - if (OCTEON_CN23XX_PF(octeon_dev)) { - if (!cn23xx_fw_loaded(octeon_dev) && !fw_type_is_none()) { - fw_loaded = 0; - /* Do a soft reset of the Octeon device. */ - if (octeon_dev->fn_list.soft_reset(octeon_dev)) - return 1; - /* things might have changed */ - if (!cn23xx_fw_loaded(octeon_dev)) - fw_loaded = 0; - else - fw_loaded = 1; - } else { - fw_loaded = 1; - } - } else if (octeon_dev->fn_list.soft_reset(octeon_dev)) { - return 1; + /* CN23XX supports preloaded firmware if the following is true: + * + * The adapter indicates that firmware is currently running AND + * 'fw_type' is 'auto'. + * + * (default state is NEEDS_TO_BE_LOADED, override it if appropriate). + */ + if (OCTEON_CN23XX_PF(octeon_dev) && + cn23xx_fw_loaded(octeon_dev) && fw_type_is_auto()) { + atomic_cmpxchg(octeon_dev->adapter_fw_state, + FW_NEEDS_TO_BE_LOADED, FW_IS_PRELOADED); } + /* If loading firmware, only first device of adapter needs to do so. */ + fw_state = atomic_cmpxchg(octeon_dev->adapter_fw_state, + FW_NEEDS_TO_BE_LOADED, + FW_IS_BEING_LOADED); + + /* Here, [local variable] 'fw_state' is set to one of: + * + * FW_IS_PRELOADED: No firmware is to be loaded (see above) + * FW_NEEDS_TO_BE_LOADED: The driver's first instance will load + * firmware to the adapter. + * FW_IS_BEING_LOADED: The driver's second instance will not load + * firmware to the adapter. + */ + + /* Prior to f/w load, perform a soft reset of the Octeon device; + * if error resetting, return w/error. + */ + if (fw_state == FW_NEEDS_TO_BE_LOADED) + if (octeon_dev->fn_list.soft_reset(octeon_dev)) + return 1; + /* Initialize the dispatch mechanism used to push packets arriving on * Octeon Output queues. */ @@ -4063,7 +4357,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev) atomic_set(&octeon_dev->status, OCT_DEV_IO_QUEUES_DONE); - if ((!OCTEON_CN23XX_PF(octeon_dev)) || !fw_loaded) { + if (fw_state == FW_NEEDS_TO_BE_LOADED) { dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n"); if (!ddr_timeout) { dev_info(&octeon_dev->pci_dev->dev, @@ -4125,6 +4419,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev) dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n"); return 1; } + + atomic_set(octeon_dev->adapter_fw_state, FW_HAS_BEEN_LOADED); } handshake[octeon_dev->octeon_id].init_ok = 1; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 2e993ce43b66..fd70a4844e2d 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -435,8 +435,7 @@ static void delete_glists(struct lio *lio) do { g = (struct octnic_gather *) list_delete_head(&lio->glist[i]); - if (g) - kfree(g); + kfree(g); } while (g); if (lio->glists_virt_base && lio->glists_virt_base[i] && @@ -748,7 +747,7 @@ static void octeon_destroy_resources(struct octeon_device *oct) if (lio_wait_for_oq_pkts(oct)) dev_err(&oct->pci_dev->dev, "OQ had pending packets\n"); - + /* fall through */ case OCT_DEV_INTR_SET_DONE: /* Disable interrupts */ oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR); @@ -1289,6 +1288,9 @@ static int liquidio_stop(struct net_device *netdev) struct octeon_device *oct = lio->oct_dev; struct napi_struct *napi, *n; + /* tell Octeon to stop forwarding packets to host */ + send_rx_ctrl_cmd(lio, 0); + if (oct->props[lio->ifidx].napi_enabled) { list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) napi_disable(napi); @@ -1306,9 +1308,6 @@ static int liquidio_stop(struct net_device *netdev) netif_carrier_off(netdev); lio->link_changes++; - /* tell Octeon to stop forwarding packets to host */ - send_rx_ctrl_cmd(lio, 0); - ifstate_reset(lio, LIO_IFSTATE_RUNNING); txqs_stop(netdev); @@ -1691,7 +1690,8 @@ static void handle_timestamp(struct octeon_device *oct, u32 status, void *buf) */ static int send_nic_timestamp_pkt(struct octeon_device *oct, struct octnic_data_pkt *ndata, - struct octnet_buf_free_info *finfo) + struct octnet_buf_free_info *finfo, + int xmit_more) { struct octeon_soft_command *sc; int ring_doorbell; @@ -1721,7 +1721,7 @@ static int send_nic_timestamp_pkt(struct octeon_device *oct, len = (u32)((struct octeon_instr_ih3 *)(&sc->cmd.cmd3.ih3))->dlengsz; - ring_doorbell = 1; + ring_doorbell = !xmit_more; retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd, sc, len, ndata->reqtype); @@ -1753,6 +1753,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) struct octeon_device *oct; int q_idx = 0, iq_no = 0; union tx_info *tx_info; + int xmit_more = 0; struct lio *lio; int status = 0; u64 dptr = 0; @@ -1941,10 +1942,12 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) irh->vlan = skb_vlan_tag_get(skb) & VLAN_VID_MASK; } + xmit_more = skb->xmit_more; + if (unlikely(cmdsetup.s.timestamp)) - status = send_nic_timestamp_pkt(oct, &ndata, finfo); + status = send_nic_timestamp_pkt(oct, &ndata, finfo, xmit_more); else - status = octnet_send_nic_data_pkt(oct, &ndata); + status = octnet_send_nic_data_pkt(oct, &ndata, xmit_more); if (status == IQ_SEND_FAILED) goto lio_xmit_failed; @@ -1953,7 +1956,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) if (status == IQ_SEND_STOP) { dev_err(&oct->pci_dev->dev, "Rcvd IQ_SEND_STOP signal; stopping IQ-%d\n", iq_no); - stop_q(lio->netdev, q_idx); + stop_q(netdev, q_idx); } netif_trans_update(netdev); @@ -1973,6 +1976,9 @@ lio_xmit_failed: if (dptr) dma_unmap_single(&oct->pci_dev->dev, dptr, ndata.datasize, DMA_TO_DEVICE); + + octeon_ring_doorbell_locked(oct, iq_no); + tx_buffer_free(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c new file mode 100644 index 000000000000..2adafa366d3f --- /dev/null +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -0,0 +1,695 @@ +/********************************************************************** + * Author: Cavium, Inc. + * + * Contact: support@cavium.com + * Please include "LiquidIO" in the subject. + * + * Copyright (c) 2003-2017 Cavium, Inc. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, Version 2, as + * published by the Free Software Foundation. + * + * This file is distributed in the hope that it will be useful, but + * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or + * NONINFRINGEMENT. See the GNU General Public License for more details. + ***********************************************************************/ +#include <linux/pci.h> +#include <linux/if_vlan.h> +#include "liquidio_common.h" +#include "octeon_droq.h" +#include "octeon_iq.h" +#include "response_manager.h" +#include "octeon_device.h" +#include "octeon_nic.h" +#include "octeon_main.h" +#include "octeon_network.h" +#include <net/switchdev.h> +#include "lio_vf_rep.h" +#include "octeon_network.h" + +static int lio_vf_rep_open(struct net_device *ndev); +static int lio_vf_rep_stop(struct net_device *ndev); +static int lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev); +static void lio_vf_rep_tx_timeout(struct net_device *netdev); +static int lio_vf_rep_phys_port_name(struct net_device *dev, + char *buf, size_t len); +static void lio_vf_rep_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats64); +static int lio_vf_rep_change_mtu(struct net_device *ndev, int new_mtu); + +static const struct net_device_ops lio_vf_rep_ndev_ops = { + .ndo_open = lio_vf_rep_open, + .ndo_stop = lio_vf_rep_stop, + .ndo_start_xmit = lio_vf_rep_pkt_xmit, + .ndo_tx_timeout = lio_vf_rep_tx_timeout, + .ndo_get_phys_port_name = lio_vf_rep_phys_port_name, + .ndo_get_stats64 = lio_vf_rep_get_stats64, + .ndo_change_mtu = lio_vf_rep_change_mtu, +}; + +static void +lio_vf_rep_send_sc_complete(struct octeon_device *oct, + u32 status, void *ptr) +{ + struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr; + struct lio_vf_rep_sc_ctx *ctx = + (struct lio_vf_rep_sc_ctx *)sc->ctxptr; + struct lio_vf_rep_resp *resp = + (struct lio_vf_rep_resp *)sc->virtrptr; + + if (status != OCTEON_REQUEST_TIMEOUT && READ_ONCE(resp->status)) + WRITE_ONCE(resp->status, 0); + + complete(&ctx->complete); +} + +static int +lio_vf_rep_send_soft_command(struct octeon_device *oct, + void *req, int req_size, + void *resp, int resp_size) +{ + int tot_resp_size = sizeof(struct lio_vf_rep_resp) + resp_size; + int ctx_size = sizeof(struct lio_vf_rep_sc_ctx); + struct octeon_soft_command *sc = NULL; + struct lio_vf_rep_resp *rep_resp; + struct lio_vf_rep_sc_ctx *ctx; + void *sc_req; + int err; + + sc = (struct octeon_soft_command *) + octeon_alloc_soft_command(oct, req_size, + tot_resp_size, ctx_size); + if (!sc) + return -ENOMEM; + + ctx = (struct lio_vf_rep_sc_ctx *)sc->ctxptr; + memset(ctx, 0, ctx_size); + init_completion(&ctx->complete); + + sc_req = (struct lio_vf_rep_req *)sc->virtdptr; + memcpy(sc_req, req, req_size); + + rep_resp = (struct lio_vf_rep_resp *)sc->virtrptr; + memset(rep_resp, 0, tot_resp_size); + WRITE_ONCE(rep_resp->status, 1); + + sc->iq_no = 0; + octeon_prepare_soft_command(oct, sc, OPCODE_NIC, + OPCODE_NIC_VF_REP_CMD, 0, 0, 0); + sc->callback = lio_vf_rep_send_sc_complete; + sc->callback_arg = sc; + sc->wait_time = LIO_VF_REP_REQ_TMO_MS; + + err = octeon_send_soft_command(oct, sc); + if (err == IQ_SEND_FAILED) + goto free_buff; + + wait_for_completion_timeout(&ctx->complete, + msecs_to_jiffies + (2 * LIO_VF_REP_REQ_TMO_MS)); + err = READ_ONCE(rep_resp->status) ? -EBUSY : 0; + if (err) + dev_err(&oct->pci_dev->dev, "VF rep send config failed\n"); + + if (resp) + memcpy(resp, (rep_resp + 1), resp_size); +free_buff: + octeon_free_soft_command(oct, sc); + + return err; +} + +static int +lio_vf_rep_open(struct net_device *ndev) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev); + struct lio_vf_rep_req rep_cfg; + struct octeon_device *oct; + int ret; + + oct = vf_rep->oct; + + memset(&rep_cfg, 0, sizeof(rep_cfg)); + rep_cfg.req_type = LIO_VF_REP_REQ_STATE; + rep_cfg.ifidx = vf_rep->ifidx; + rep_cfg.rep_state.state = LIO_VF_REP_STATE_UP; + + ret = lio_vf_rep_send_soft_command(oct, &rep_cfg, + sizeof(rep_cfg), NULL, 0); + + if (ret) { + dev_err(&oct->pci_dev->dev, + "VF_REP open failed with err %d\n", ret); + return -EIO; + } + + atomic_set(&vf_rep->ifstate, (atomic_read(&vf_rep->ifstate) | + LIO_IFSTATE_RUNNING)); + + netif_carrier_on(ndev); + netif_start_queue(ndev); + + return 0; +} + +static int +lio_vf_rep_stop(struct net_device *ndev) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev); + struct lio_vf_rep_req rep_cfg; + struct octeon_device *oct; + int ret; + + oct = vf_rep->oct; + + memset(&rep_cfg, 0, sizeof(rep_cfg)); + rep_cfg.req_type = LIO_VF_REP_REQ_STATE; + rep_cfg.ifidx = vf_rep->ifidx; + rep_cfg.rep_state.state = LIO_VF_REP_STATE_DOWN; + + ret = lio_vf_rep_send_soft_command(oct, &rep_cfg, + sizeof(rep_cfg), NULL, 0); + + if (ret) { + dev_err(&oct->pci_dev->dev, + "VF_REP dev stop failed with err %d\n", ret); + return -EIO; + } + + atomic_set(&vf_rep->ifstate, (atomic_read(&vf_rep->ifstate) & + ~LIO_IFSTATE_RUNNING)); + + netif_tx_disable(ndev); + netif_carrier_off(ndev); + + return 0; +} + +static void +lio_vf_rep_tx_timeout(struct net_device *ndev) +{ + netif_trans_update(ndev); + + netif_wake_queue(ndev); +} + +static void +lio_vf_rep_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats64) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(dev); + + stats64->tx_packets = vf_rep->stats.tx_packets; + stats64->tx_bytes = vf_rep->stats.tx_bytes; + stats64->tx_dropped = vf_rep->stats.tx_dropped; + + stats64->rx_packets = vf_rep->stats.rx_packets; + stats64->rx_bytes = vf_rep->stats.rx_bytes; + stats64->rx_dropped = vf_rep->stats.rx_dropped; +} + +static int +lio_vf_rep_change_mtu(struct net_device *ndev, int new_mtu) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev); + struct lio_vf_rep_req rep_cfg; + struct octeon_device *oct; + int ret; + + oct = vf_rep->oct; + + memset(&rep_cfg, 0, sizeof(rep_cfg)); + rep_cfg.req_type = LIO_VF_REP_REQ_MTU; + rep_cfg.ifidx = vf_rep->ifidx; + rep_cfg.rep_mtu.mtu = cpu_to_be32(new_mtu); + + ret = lio_vf_rep_send_soft_command(oct, &rep_cfg, + sizeof(rep_cfg), NULL, 0); + if (ret) { + dev_err(&oct->pci_dev->dev, + "Change MTU failed with err %d\n", ret); + return -EIO; + } + + ndev->mtu = new_mtu; + + return 0; +} + +static int +lio_vf_rep_phys_port_name(struct net_device *dev, + char *buf, size_t len) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(dev); + struct octeon_device *oct = vf_rep->oct; + int ret; + + ret = snprintf(buf, len, "pf%dvf%d", oct->pf_num, + vf_rep->ifidx - oct->pf_num * 64 - 1); + if (ret >= len) + return -EOPNOTSUPP; + + return 0; +} + +static struct net_device * +lio_vf_rep_get_ndev(struct octeon_device *oct, int ifidx) +{ + int vf_id, max_vfs = CN23XX_MAX_VFS_PER_PF + 1; + int vfid_mask = max_vfs - 1; + + if (ifidx <= oct->pf_num * max_vfs || + ifidx >= oct->pf_num * max_vfs + max_vfs) + return NULL; + + /* ifidx 1-63 for PF0 VFs + * ifidx 65-127 for PF1 VFs + */ + vf_id = (ifidx & vfid_mask) - 1; + + return oct->vf_rep_list.ndev[vf_id]; +} + +static void +lio_vf_rep_copy_packet(struct octeon_device *oct, + struct sk_buff *skb, + int len) +{ + if (likely(len > MIN_SKB_SIZE)) { + struct octeon_skb_page_info *pg_info; + unsigned char *va; + + pg_info = ((struct octeon_skb_page_info *)(skb->cb)); + if (pg_info->page) { + va = page_address(pg_info->page) + + pg_info->page_offset; + memcpy(skb->data, va, MIN_SKB_SIZE); + skb_put(skb, MIN_SKB_SIZE); + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + pg_info->page, + pg_info->page_offset + MIN_SKB_SIZE, + len - MIN_SKB_SIZE, + LIO_RXBUFFER_SZ); + } else { + struct octeon_skb_page_info *pg_info = + ((struct octeon_skb_page_info *)(skb->cb)); + + skb_copy_to_linear_data(skb, page_address(pg_info->page) + + pg_info->page_offset, len); + skb_put(skb, len); + put_page(pg_info->page); + } +} + +static int +lio_vf_rep_pkt_recv(struct octeon_recv_info *recv_info, void *buf) +{ + struct octeon_recv_pkt *recv_pkt = recv_info->recv_pkt; + struct lio_vf_rep_desc *vf_rep; + struct net_device *vf_ndev; + struct octeon_device *oct; + union octeon_rh *rh; + struct sk_buff *skb; + int i, ifidx; + + oct = lio_get_device(recv_pkt->octeon_id); + if (!oct) + goto free_buffers; + + skb = recv_pkt->buffer_ptr[0]; + rh = &recv_pkt->rh; + ifidx = rh->r.ossp; + + vf_ndev = lio_vf_rep_get_ndev(oct, ifidx); + if (!vf_ndev) + goto free_buffers; + + vf_rep = netdev_priv(vf_ndev); + if (!(atomic_read(&vf_rep->ifstate) & LIO_IFSTATE_RUNNING) || + recv_pkt->buffer_count > 1) + goto free_buffers; + + skb->dev = vf_ndev; + + /* Multiple buffers are not used for vf_rep packets. + * So just buffer_size[0] is valid. + */ + lio_vf_rep_copy_packet(oct, skb, recv_pkt->buffer_size[0]); + + skb_pull(skb, rh->r_dh.len * BYTES_PER_DHLEN_UNIT); + skb->protocol = eth_type_trans(skb, skb->dev); + skb->ip_summed = CHECKSUM_NONE; + + netif_rx(skb); + + octeon_free_recv_info(recv_info); + + return 0; + +free_buffers: + for (i = 0; i < recv_pkt->buffer_count; i++) + recv_buffer_free(recv_pkt->buffer_ptr[i]); + + octeon_free_recv_info(recv_info); + + return 0; +} + +static void +lio_vf_rep_packet_sent_callback(struct octeon_device *oct, + u32 status, void *buf) +{ + struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; + struct sk_buff *skb = sc->ctxptr; + struct net_device *ndev = skb->dev; + + dma_unmap_single(&oct->pci_dev->dev, sc->dmadptr, + sc->datasize, DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + octeon_free_soft_command(oct, sc); + + if (octnet_iq_is_full(oct, sc->iq_no)) + return; + + if (netif_queue_stopped(ndev)) + netif_wake_queue(ndev); +} + +static int +lio_vf_rep_pkt_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(ndev); + struct net_device *parent_ndev = vf_rep->parent_ndev; + struct octeon_device *oct = vf_rep->oct; + struct octeon_instr_pki_ih3 *pki_ih3; + struct octeon_soft_command *sc; + struct lio *parent_lio; + int status; + + parent_lio = GET_LIO(parent_ndev); + + if (!(atomic_read(&vf_rep->ifstate) & LIO_IFSTATE_RUNNING) || + skb->len <= 0) + goto xmit_failed; + + if (octnet_iq_is_full(vf_rep->oct, parent_lio->txq)) { + dev_err(&oct->pci_dev->dev, "VF rep: Device IQ full\n"); + netif_stop_queue(ndev); + return NETDEV_TX_BUSY; + } + + sc = (struct octeon_soft_command *) + octeon_alloc_soft_command(oct, 0, 0, 0); + if (!sc) { + dev_err(&oct->pci_dev->dev, "VF rep: Soft command alloc failed\n"); + goto xmit_failed; + } + + /* Multiple buffers are not used for vf_rep packets. */ + if (skb_shinfo(skb)->nr_frags != 0) { + dev_err(&oct->pci_dev->dev, "VF rep: nr_frags != 0. Dropping packet\n"); + goto xmit_failed; + } + + sc->dmadptr = dma_map_single(&oct->pci_dev->dev, + skb->data, skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(&oct->pci_dev->dev, sc->dmadptr)) { + dev_err(&oct->pci_dev->dev, "VF rep: DMA mapping failed\n"); + goto xmit_failed; + } + + sc->virtdptr = skb->data; + sc->datasize = skb->len; + sc->ctxptr = skb; + sc->iq_no = parent_lio->txq; + + octeon_prepare_soft_command(oct, sc, OPCODE_NIC, OPCODE_NIC_VF_REP_PKT, + vf_rep->ifidx, 0, 0); + pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3; + pki_ih3->tagtype = ORDERED_TAG; + + sc->callback = lio_vf_rep_packet_sent_callback; + sc->callback_arg = sc; + + status = octeon_send_soft_command(oct, sc); + if (status == IQ_SEND_FAILED) { + dma_unmap_single(&oct->pci_dev->dev, sc->dmadptr, + sc->datasize, DMA_TO_DEVICE); + goto xmit_failed; + } + + if (status == IQ_SEND_STOP) + netif_stop_queue(ndev); + + netif_trans_update(ndev); + + return NETDEV_TX_OK; + +xmit_failed: + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; +} + +static int +lio_vf_rep_attr_get(struct net_device *dev, struct switchdev_attr *attr) +{ + struct lio_vf_rep_desc *vf_rep = netdev_priv(dev); + struct net_device *parent_ndev = vf_rep->parent_ndev; + struct lio *lio = GET_LIO(parent_ndev); + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + attr->u.ppid.id_len = ETH_ALEN; + ether_addr_copy(attr->u.ppid.id, + (void *)&lio->linfo.hw_addr + 2); + break; + + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static const struct switchdev_ops lio_vf_rep_switchdev_ops = { + .switchdev_port_attr_get = lio_vf_rep_attr_get, +}; + +static void +lio_vf_rep_fetch_stats(struct work_struct *work) +{ + struct cavium_wk *wk = (struct cavium_wk *)work; + struct lio_vf_rep_desc *vf_rep = wk->ctxptr; + struct lio_vf_rep_stats stats; + struct lio_vf_rep_req rep_cfg; + struct octeon_device *oct; + int ret; + + oct = vf_rep->oct; + + memset(&rep_cfg, 0, sizeof(rep_cfg)); + rep_cfg.req_type = LIO_VF_REP_REQ_STATS; + rep_cfg.ifidx = vf_rep->ifidx; + + ret = lio_vf_rep_send_soft_command(oct, &rep_cfg, sizeof(rep_cfg), + &stats, sizeof(stats)); + + if (!ret) { + octeon_swap_8B_data((u64 *)&stats, (sizeof(stats) >> 3)); + memcpy(&vf_rep->stats, &stats, sizeof(stats)); + } + + schedule_delayed_work(&vf_rep->stats_wk.work, + msecs_to_jiffies(LIO_VF_REP_STATS_POLL_TIME_MS)); +} + +int +lio_vf_rep_create(struct octeon_device *oct) +{ + struct lio_vf_rep_desc *vf_rep; + struct net_device *ndev; + int i, num_vfs; + + if (oct->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return 0; + + if (!oct->sriov_info.sriov_enabled) + return 0; + + num_vfs = oct->sriov_info.num_vfs_alloced; + + oct->vf_rep_list.num_vfs = 0; + for (i = 0; i < num_vfs; i++) { + ndev = alloc_etherdev(sizeof(struct lio_vf_rep_desc)); + + if (!ndev) { + dev_err(&oct->pci_dev->dev, + "VF rep device %d creation failed\n", i); + goto cleanup; + } + + ndev->min_mtu = LIO_MIN_MTU_SIZE; + ndev->max_mtu = LIO_MAX_MTU_SIZE; + ndev->netdev_ops = &lio_vf_rep_ndev_ops; + SWITCHDEV_SET_OPS(ndev, &lio_vf_rep_switchdev_ops); + + vf_rep = netdev_priv(ndev); + memset(vf_rep, 0, sizeof(*vf_rep)); + + vf_rep->ndev = ndev; + vf_rep->oct = oct; + vf_rep->parent_ndev = oct->props[0].netdev; + vf_rep->ifidx = (oct->pf_num * 64) + i + 1; + + eth_hw_addr_random(ndev); + + if (register_netdev(ndev)) { + dev_err(&oct->pci_dev->dev, "VF rep nerdev registration failed\n"); + + free_netdev(ndev); + goto cleanup; + } + + netif_carrier_off(ndev); + + INIT_DELAYED_WORK(&vf_rep->stats_wk.work, + lio_vf_rep_fetch_stats); + vf_rep->stats_wk.ctxptr = (void *)vf_rep; + schedule_delayed_work(&vf_rep->stats_wk.work, + msecs_to_jiffies + (LIO_VF_REP_STATS_POLL_TIME_MS)); + oct->vf_rep_list.num_vfs++; + oct->vf_rep_list.ndev[i] = ndev; + } + + if (octeon_register_dispatch_fn(oct, OPCODE_NIC, + OPCODE_NIC_VF_REP_PKT, + lio_vf_rep_pkt_recv, oct)) { + dev_err(&oct->pci_dev->dev, "VF rep Dispatch func registration failed\n"); + + goto cleanup; + } + + return 0; + +cleanup: + for (i = 0; i < oct->vf_rep_list.num_vfs; i++) { + ndev = oct->vf_rep_list.ndev[i]; + oct->vf_rep_list.ndev[i] = NULL; + if (ndev) { + vf_rep = netdev_priv(ndev); + cancel_delayed_work_sync + (&vf_rep->stats_wk.work); + unregister_netdev(ndev); + free_netdev(ndev); + } + } + + oct->vf_rep_list.num_vfs = 0; + + return -1; +} + +void +lio_vf_rep_destroy(struct octeon_device *oct) +{ + struct lio_vf_rep_desc *vf_rep; + struct net_device *ndev; + int i; + + if (oct->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) + return; + + if (!oct->sriov_info.sriov_enabled) + return; + + for (i = 0; i < oct->vf_rep_list.num_vfs; i++) { + ndev = oct->vf_rep_list.ndev[i]; + oct->vf_rep_list.ndev[i] = NULL; + if (ndev) { + vf_rep = netdev_priv(ndev); + cancel_delayed_work_sync + (&vf_rep->stats_wk.work); + netif_tx_disable(ndev); + netif_carrier_off(ndev); + + unregister_netdev(ndev); + free_netdev(ndev); + } + } + + oct->vf_rep_list.num_vfs = 0; +} + +static int +lio_vf_rep_netdev_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct lio_vf_rep_desc *vf_rep; + struct lio_vf_rep_req rep_cfg; + struct octeon_device *oct; + int ret; + + switch (event) { + case NETDEV_REGISTER: + case NETDEV_CHANGENAME: + break; + + default: + return NOTIFY_DONE; + } + + if (ndev->netdev_ops != &lio_vf_rep_ndev_ops) + return NOTIFY_DONE; + + vf_rep = netdev_priv(ndev); + oct = vf_rep->oct; + + if (strlen(ndev->name) > LIO_IF_NAME_SIZE) { + dev_err(&oct->pci_dev->dev, + "Device name change sync failed as the size is > %d\n", + LIO_IF_NAME_SIZE); + return NOTIFY_DONE; + } + + memset(&rep_cfg, 0, sizeof(rep_cfg)); + rep_cfg.req_type = LIO_VF_REP_REQ_DEVNAME; + rep_cfg.ifidx = vf_rep->ifidx; + strncpy(rep_cfg.rep_name.name, ndev->name, LIO_IF_NAME_SIZE); + + ret = lio_vf_rep_send_soft_command(oct, &rep_cfg, + sizeof(rep_cfg), NULL, 0); + if (ret) + dev_err(&oct->pci_dev->dev, + "vf_rep netdev name change failed with err %d\n", ret); + + return NOTIFY_DONE; +} + +static struct notifier_block lio_vf_rep_netdev_notifier = { + .notifier_call = lio_vf_rep_netdev_event, +}; + +int +lio_vf_rep_modinit(void) +{ + if (register_netdevice_notifier(&lio_vf_rep_netdev_notifier)) { + pr_err("netdev notifier registration failed\n"); + return -EFAULT; + } + + return 0; +} + +void +lio_vf_rep_modexit(void) +{ + if (unregister_netdevice_notifier(&lio_vf_rep_netdev_notifier)) + pr_err("netdev notifier unregister failed\n"); +} diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h new file mode 100644 index 000000000000..bb3cedc63c63 --- /dev/null +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.h @@ -0,0 +1,49 @@ +/********************************************************************** + * Author: Cavium, Inc. + * + * Contact: support@cavium.com + * Please include "LiquidIO" in the subject. + * + * Copyright (c) 2003-2017 Cavium, Inc. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, Version 2, as + * published by the Free Software Foundation. + * + * This file is distributed in the hope that it will be useful, but + * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or + * NONINFRINGEMENT. See the GNU General Public License for more + * details. + * + * This file may also be available under a different license from Cavium. + * Contact Cavium, Inc. for more information + **********************************************************************/ + +/*! \file octeon_vf_main.h + * \brief Host Driver: This file defines vf_rep related macros and structures + */ +#ifndef __LIO_VF_REP_H__ +#define __LIO_VF_REP_H__ +#define LIO_VF_REP_REQ_TMO_MS 5000 +#define LIO_VF_REP_STATS_POLL_TIME_MS 200 + +struct lio_vf_rep_desc { + struct net_device *parent_ndev; + struct net_device *ndev; + struct octeon_device *oct; + struct lio_vf_rep_stats stats; + struct cavium_wk stats_wk; + atomic_t ifstate; + int ifidx; +}; + +struct lio_vf_rep_sc_ctx { + struct completion complete; +}; + +int lio_vf_rep_create(struct octeon_device *oct); +void lio_vf_rep_destroy(struct octeon_device *oct); +int lio_vf_rep_modinit(void); +void lio_vf_rep_modexit(void); +#endif diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 3788c8cd082a..522dcc4dcff7 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -27,8 +27,8 @@ #define LIQUIDIO_PACKAGE "" #define LIQUIDIO_BASE_MAJOR_VERSION 1 -#define LIQUIDIO_BASE_MINOR_VERSION 6 -#define LIQUIDIO_BASE_MICRO_VERSION 1 +#define LIQUIDIO_BASE_MINOR_VERSION 7 +#define LIQUIDIO_BASE_MICRO_VERSION 0 #define LIQUIDIO_BASE_VERSION __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \ __stringify(LIQUIDIO_BASE_MINOR_VERSION) #define LIQUIDIO_MICRO_VERSION "." __stringify(LIQUIDIO_BASE_MICRO_VERSION) @@ -84,10 +84,14 @@ enum octeon_tag_type { #define OPCODE_NIC_IF_CFG 0x09 #define OPCODE_NIC_VF_DRV_NOTICE 0x0A #define OPCODE_NIC_INTRMOD_PARAMS 0x0B +#define OPCODE_NIC_SYNC_OCTEON_TIME 0x14 #define VF_DRV_LOADED 1 #define VF_DRV_REMOVED -1 #define VF_DRV_MACADDR_CHANGED 2 +#define OPCODE_NIC_VF_REP_PKT 0x15 +#define OPCODE_NIC_VF_REP_CMD 0x16 + #define CORE_DRV_TEST_SCATTER_OP 0xFFF5 /* Application codes advertised by the core driver initialization packet. */ @@ -108,6 +112,10 @@ enum octeon_tag_type { #define SCR2_BIT_FW_LOADED 63 +/* App specific capabilities from firmware to pf driver */ +#define LIQUIDIO_TIME_SYNC_CAP 0x1 +#define LIQUIDIO_SWITCHDEV_CAP 0x2 + static inline u32 incr_index(u32 index, u32 count, u32 max) { if ((index + count) >= max) @@ -901,4 +909,60 @@ union oct_nic_if_cfg { } s; }; +struct lio_time { + s64 sec; /* seconds */ + s64 nsec; /* nanoseconds */ +}; + +struct lio_vf_rep_stats { + u64 tx_packets; + u64 tx_bytes; + u64 tx_dropped; + + u64 rx_packets; + u64 rx_bytes; + u64 rx_dropped; +}; + +enum lio_vf_rep_req_type { + LIO_VF_REP_REQ_NONE, + LIO_VF_REP_REQ_STATE, + LIO_VF_REP_REQ_MTU, + LIO_VF_REP_REQ_STATS, + LIO_VF_REP_REQ_DEVNAME +}; + +enum { + LIO_VF_REP_STATE_DOWN, + LIO_VF_REP_STATE_UP +}; + +#define LIO_IF_NAME_SIZE 16 +struct lio_vf_rep_req { + u8 req_type; + u8 ifidx; + u8 rsvd[6]; + + union { + struct lio_vf_rep_name { + char name[LIO_IF_NAME_SIZE]; + } rep_name; + + struct lio_vf_rep_mtu { + u32 mtu; + u32 rsvd; + } rep_mtu; + + struct lio_vf_rep_state { + u8 state; + u8 rsvd[7]; + } rep_state; + }; +}; + +struct lio_vf_rep_resp { + u64 rh; + u8 status; + u8 rsvd[7]; +}; #endif diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_image.h b/drivers/net/ethernet/cavium/liquidio/liquidio_image.h index 78a3685f6fe0..5bf5e8791dfb 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_image.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_image.h @@ -24,6 +24,7 @@ #define LIO_FW_BASE_NAME "lio_" #define LIO_FW_NAME_SUFFIX ".bin" #define LIO_FW_NAME_TYPE_NIC "nic" +#define LIO_FW_NAME_TYPE_AUTO "auto" #define LIO_FW_NAME_TYPE_NONE "none" #define LIO_MAX_FIRMWARE_VERSION_LEN 16 diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h index 63bd9c94e547..ceac74388e09 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h @@ -37,6 +37,8 @@ #define MAX_OCTEON_LINKS MAX_OCTEON_NICIF #define MAX_OCTEON_MULTICAST_ADDR 32 +#define MAX_OCTEON_FILL_COUNT 8 + /* CN6xxx IQ configuration macros */ #define CN6XXX_MAX_INPUT_QUEUES 32 #define CN6XXX_MAX_IQ_DESCRIPTORS 2048 diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c index ec3dd69cd6b2..7f97ae48efed 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c @@ -803,15 +803,18 @@ static int octeon_console_read(struct octeon_device *oct, u32 console_num, } #define FBUF_SIZE (4 * 1024 * 1024) +#define MAX_BOOTTIME_SIZE 80 int octeon_download_firmware(struct octeon_device *oct, const u8 *data, size_t size) { - int ret = 0; + struct octeon_firmware_file_header *h; + char boottime[MAX_BOOTTIME_SIZE]; + struct timespec64 ts; u32 crc32_result; u64 load_addr; u32 image_len; - struct octeon_firmware_file_header *h; + int ret = 0; u32 i, rem; if (size < sizeof(struct octeon_firmware_file_header)) { @@ -890,11 +893,34 @@ int octeon_download_firmware(struct octeon_device *oct, const u8 *data, load_addr += size; } } + + /* Pass date and time information to NIC at the time of loading + * firmware and periodically update the host time to NIC firmware. + * This is to make NIC firmware use the same time reference as Host, + * so that it is easy to correlate logs from firmware and host for + * debugging. + * + * Octeon always uses UTC time. so timezone information is not sent. + */ + getnstimeofday64(&ts); + ret = snprintf(boottime, MAX_BOOTTIME_SIZE, + " time_sec=%lld time_nsec=%ld", + (s64)ts.tv_sec, ts.tv_nsec); + if ((sizeof(h->bootcmd) - strnlen(h->bootcmd, sizeof(h->bootcmd))) < + ret) { + dev_err(&oct->pci_dev->dev, "Boot command buffer too small\n"); + return -EINVAL; + } + strncat(h->bootcmd, boottime, + sizeof(h->bootcmd) - strnlen(h->bootcmd, sizeof(h->bootcmd))); + dev_info(&oct->pci_dev->dev, "Writing boot command: %s\n", h->bootcmd); /* Invoke the bootcmd */ ret = octeon_console_send_cmd(oct, h->bootcmd, 50); + if (ret) + dev_info(&oct->pci_dev->dev, "Boot command send failed\n"); - return 0; + return ret; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 29d53b1763a7..2c615ab09e64 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -541,6 +541,7 @@ static char oct_dev_app_str[CVM_DRV_APP_COUNT + 1][32] = { static struct octeon_device *octeon_device[MAX_OCTEON_DEVICES]; static atomic_t adapter_refcounts[MAX_OCTEON_DEVICES]; +static atomic_t adapter_fw_states[MAX_OCTEON_DEVICES]; static u32 octeon_device_count; /* locks device array (i.e. octeon_device[]) */ @@ -770,6 +771,10 @@ int octeon_register_device(struct octeon_device *oct, oct->adapter_refcount = &adapter_refcounts[oct->octeon_id]; atomic_set(oct->adapter_refcount, 0); + /* Like the reference count, the f/w state is shared 'per-adapter' */ + oct->adapter_fw_state = &adapter_fw_states[oct->octeon_id]; + atomic_set(oct->adapter_fw_state, FW_NEEDS_TO_BE_LOADED); + spin_lock(&octeon_devices_lock); for (idx = (int)oct->octeon_id - 1; idx >= 0; idx--) { if (!octeon_device[idx]) { @@ -780,11 +785,15 @@ int octeon_register_device(struct octeon_device *oct, atomic_inc(oct->adapter_refcount); return 1; /* here, refcount is guaranteed to be 1 */ } - /* if another device is at same bus/dev, use its refcounter */ + /* If another device is at same bus/dev, use its refcounter + * (and f/w state variable). + */ if ((octeon_device[idx]->loc.bus == bus) && (octeon_device[idx]->loc.dev == dev)) { oct->adapter_refcount = octeon_device[idx]->adapter_refcount; + oct->adapter_fw_state = + octeon_device[idx]->adapter_fw_state; break; } } @@ -1171,6 +1180,10 @@ octeon_register_dispatch_fn(struct octeon_device *oct, spin_unlock_bh(&oct->dispatch.lock); } else { + if (pfn == fn && + octeon_get_dispatch_arg(oct, opcode, subcode) == fn_arg) + return 0; + dev_err(&oct->pci_dev->dev, "Found previously registered dispatch fn for opcode/subcode: %x/%x\n", opcode, subcode); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 894af199ddef..63b0c758a0a6 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -23,6 +23,7 @@ #define _OCTEON_DEVICE_H_ #include <linux/interrupt.h> +#include <net/devlink.h> /** PCI VendorId Device Id */ #define OCTEON_CN68XX_PCIID 0x91177d @@ -50,6 +51,13 @@ enum octeon_pci_swap_mode { OCTEON_PCI_32BIT_LW_SWAP = 3 }; +enum lio_fw_state { + FW_IS_PRELOADED = 0, + FW_NEEDS_TO_BE_LOADED = 1, + FW_IS_BEING_LOADED = 2, + FW_HAS_BEEN_LOADED = 3, +}; + enum { OCTEON_CONFIG_TYPE_DEFAULT = 0, NUM_OCTEON_CONFS, @@ -384,6 +392,15 @@ struct octeon_ioq_vector { u32 ioq_num; }; +struct lio_vf_rep_list { + int num_vfs; + struct net_device *ndev[CN23XX_MAX_VFS_PER_PF]; +}; + +struct lio_devlink_priv { + struct octeon_device *oct; +}; + /** The Octeon device. * Each Octeon device has this structure to represent all its * components. @@ -557,7 +574,14 @@ struct octeon_device { } loc; atomic_t *adapter_refcount; /* reference count of adapter */ + + atomic_t *adapter_fw_state; /* per-adapter, lio_fw_state */ + bool ptp_enable; + + struct lio_vf_rep_list vf_rep_list; + struct devlink *devlink; + enum devlink_eswitch_mode eswitch_mode; }; #define OCT_DRV_ONLINE 1 diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index 9372d4ce9954..3461d65ff4eb 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -52,8 +52,8 @@ struct __dispatch { * @return Failure: NULL * */ -static inline void *octeon_get_dispatch_arg(struct octeon_device *octeon_dev, - u16 opcode, u16 subcode) +void *octeon_get_dispatch_arg(struct octeon_device *octeon_dev, + u16 opcode, u16 subcode) { int idx; struct list_head *dispatch; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h index f91bc84d1719..815a9f56fd59 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h @@ -400,6 +400,9 @@ int octeon_register_dispatch_fn(struct octeon_device *oct, u16 subcode, octeon_dispatch_fn_t fn, void *fn_arg); +void *octeon_get_dispatch_arg(struct octeon_device *oct, + u16 opcode, u16 subcode); + void octeon_droq_print_stats(void); u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h index 5c3c8da976f7..81c987682941 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h @@ -343,6 +343,9 @@ int octeon_delete_instr_queue(struct octeon_device *octeon_dev, u32 iq_no); int lio_wait_for_instr_fetch(struct octeon_device *oct); +void +octeon_ring_doorbell_locked(struct octeon_device *oct, u32 iq_no); + int octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype, void (*fn)(void *)); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h index 32ef3a7d88d8..c846eec11a45 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h @@ -63,7 +63,7 @@ struct octnet_buf_free_info { }; /* BQL-related functions */ -void octeon_report_sent_bytes_to_bql(void *buf, int reqtype); +int octeon_report_sent_bytes_to_bql(void *buf, int reqtype); void octeon_update_tx_completion_counters(void *buf, int reqtype, unsigned int *pkts_compl, unsigned int *bytes_compl); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index 9e36319cead6..f2d1a076a038 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h @@ -136,6 +136,9 @@ struct lio { /* work queue for link status */ struct cavium_wq link_status_wq; + /* work queue to regularly send local time to octeon firmware */ + struct cavium_wq sync_octeon_time_wq; + int netdev_uc_count; }; @@ -195,7 +198,7 @@ static inline void struct sk_buff *skb; struct octeon_skb_page_info *skb_pg_info; - page = alloc_page(GFP_ATOMIC | __GFP_COLD); + page = alloc_page(GFP_ATOMIC); if (unlikely(!page)) return NULL; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c index b457cf23fce6..150609bd8849 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c @@ -82,9 +82,10 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, } int octnet_send_nic_data_pkt(struct octeon_device *oct, - struct octnic_data_pkt *ndata) + struct octnic_data_pkt *ndata, + int xmit_more) { - int ring_doorbell = 1; + int ring_doorbell = !xmit_more; return octeon_send_command(oct, ndata->q_no, ring_doorbell, &ndata->cmd, ndata->buf, ndata->datasize, diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h index 6480ef863441..de4130d26a98 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h @@ -279,7 +279,8 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, * queue should be stopped, and IQ_SEND_OK if it sent okay. */ int octnet_send_nic_data_pkt(struct octeon_device *oct, - struct octnic_data_pkt *ndata); + struct octnic_data_pkt *ndata, + int xmit_more); /** Send a NIC control packet to the device * @param oct - octeon device pointer diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 1e0fbce86d60..e07d2093b971 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -278,6 +278,18 @@ ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq) } } +void +octeon_ring_doorbell_locked(struct octeon_device *oct, u32 iq_no) +{ + struct octeon_instr_queue *iq; + + iq = oct->instr_queue[iq_no]; + spin_lock(&iq->post_lock); + if (iq->fill_cnt) + ring_doorbell(oct, iq); + spin_unlock(&iq->post_lock); +} + static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq, u8 *cmd) { @@ -477,8 +489,6 @@ octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq, } tot_inst_processed += inst_processed; - inst_processed = 0; - } while (tot_inst_processed < napi_budget); if (napi_budget && (tot_inst_processed >= napi_budget)) @@ -543,6 +553,7 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no, u32 force_db, void *cmd, void *buf, u32 datasize, u32 reqtype) { + int xmit_stopped; struct iq_post_status st; struct octeon_instr_queue *iq = oct->instr_queue[iq_no]; @@ -554,12 +565,13 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no, st = __post_command2(iq, cmd); if (st.status != IQ_SEND_FAILED) { - octeon_report_sent_bytes_to_bql(buf, reqtype); + xmit_stopped = octeon_report_sent_bytes_to_bql(buf, reqtype); __add_to_request_list(iq, st.index, buf, reqtype); INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize); INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1); - if (force_db) + if (iq->fill_cnt >= MAX_OCTEON_FILL_COUNT || force_db || + xmit_stopped || st.status == IQ_SEND_STOP) ring_doorbell(oct, iq); } else { INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1); diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 2887bcaf6af5..3f6afb54a5eb 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -705,14 +705,15 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device *netdev, u64 clock_comp = (NSEC_PER_SEC << 32) / octeon_get_io_clock_rate(); if (!ptp.s.ptp_en) cvmx_write_csr(CVMX_MIO_PTP_CLOCK_COMP, clock_comp); - pr_info("PTP Clock: Using sclk reference at %lld Hz\n", - (NSEC_PER_SEC << 32) / clock_comp); + netdev_info(netdev, + "PTP Clock using sclk reference @ %lldHz\n", + (NSEC_PER_SEC << 32) / clock_comp); } else { /* The clock is already programmed to use a GPIO */ u64 clock_comp = cvmx_read_csr(CVMX_MIO_PTP_CLOCK_COMP); - pr_info("PTP Clock: Using GPIO %d at %lld Hz\n", - ptp.s.ext_clk_in, - (NSEC_PER_SEC << 32) / clock_comp); + netdev_info(netdev, + "PTP Clock using GPIO%d @ %lld Hz\n", + ptp.s.ext_clk_in, (NSEC_PER_SEC << 32) / clock_comp); } /* Enable the clock if it wasn't done already */ @@ -926,14 +927,11 @@ static void octeon_mgmt_adjust_link(struct net_device *netdev) spin_unlock_irqrestore(&p->lock, flags); if (link_changed != 0) { - if (link_changed > 0) { - pr_info("%s: Link is up - %d/%s\n", netdev->name, - phydev->speed, - phydev->duplex == DUPLEX_FULL ? - "Full" : "Half"); - } else { - pr_info("%s: Link is down\n", netdev->name); - } + if (link_changed > 0) + netdev_info(netdev, "Link is up - %d/%s\n", + phydev->speed, phydev->duplex == DUPLEX_FULL ? "Full" : "Half"); + else + netdev_info(netdev, "Link is down\n"); } } diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index fb770b0182d3..8f1dd55b3e08 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -361,17 +361,8 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) } } -static void nic_free_lmacmem(struct nicpf *nic) +static void nic_get_hw_info(struct nicpf *nic) { - kfree(nic->vf_lmac_map); - kfree(nic->link); - kfree(nic->duplex); - kfree(nic->speed); -} - -static int nic_get_hw_info(struct nicpf *nic) -{ - u8 max_lmac; u16 sdevid; struct hw_info *hw = nic->hw; @@ -419,41 +410,16 @@ static int nic_get_hw_info(struct nicpf *nic) break; } hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev); - - /* Allocate memory for LMAC tracking elements */ - max_lmac = hw->bgx_cnt * MAX_LMAC_PER_BGX; - nic->vf_lmac_map = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); - if (!nic->vf_lmac_map) - goto error; - nic->link = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); - if (!nic->link) - goto error; - nic->duplex = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); - if (!nic->duplex) - goto error; - nic->speed = kmalloc_array(max_lmac, sizeof(u32), GFP_KERNEL); - if (!nic->speed) - goto error; - return 0; - -error: - nic_free_lmacmem(nic); - return -ENOMEM; } #define BGX0_BLOCK 8 #define BGX1_BLOCK 9 -static int nic_init_hw(struct nicpf *nic) +static void nic_init_hw(struct nicpf *nic) { - int i, err; + int i; u64 cqm_cfg; - /* Get HW capability info */ - err = nic_get_hw_info(nic); - if (err) - return err; - /* Enable NIC HW block */ nic_reg_write(nic, NIC_PF_CFG, 0x3); @@ -498,8 +464,6 @@ static int nic_init_hw(struct nicpf *nic) cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG); if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL) nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL); - - return 0; } /* Channel parse index configuration */ @@ -584,9 +548,6 @@ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg) static void nic_send_rss_size(struct nicpf *nic, int vf) { union nic_mbx mbx = {}; - u64 *msg; - - msg = (u64 *)&mbx; mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; mbx.rss_size.ind_tbl_size = nic->hw->rss_ind_tbl_size; @@ -608,7 +569,6 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg) rssi_base = nic->rssi_base[cfg->vf_id] + cfg->tbl_offset; rssi = rssi_base; - qset = cfg->vf_id; for (; rssi < (rssi_base + cfg->tbl_len); rssi++) { u8 svf = cfg->ind_tbl[idx] >> 3; @@ -1273,6 +1233,7 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct device *dev = &pdev->dev; struct nicpf *nic; + u8 max_lmac; int err; BUILD_BUG_ON(sizeof(union nic_mbx) > 16); @@ -1282,10 +1243,8 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; nic->hw = devm_kzalloc(dev, sizeof(struct hw_info), GFP_KERNEL); - if (!nic->hw) { - devm_kfree(dev, nic); + if (!nic->hw) return -ENOMEM; - } pci_set_drvdata(pdev, nic); @@ -1326,11 +1285,33 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) nic->node = nic_get_node_id(pdev); - /* Initialize hardware */ - err = nic_init_hw(nic); - if (err) + /* Get HW capability info */ + nic_get_hw_info(nic); + + /* Allocate memory for LMAC tracking elements */ + err = -ENOMEM; + max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX; + + nic->vf_lmac_map = devm_kmalloc_array(dev, max_lmac, sizeof(u8), + GFP_KERNEL); + if (!nic->vf_lmac_map) + goto err_release_regions; + + nic->link = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL); + if (!nic->link) + goto err_release_regions; + + nic->duplex = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL); + if (!nic->duplex) + goto err_release_regions; + + nic->speed = devm_kmalloc_array(dev, max_lmac, sizeof(u32), GFP_KERNEL); + if (!nic->speed) goto err_release_regions; + /* Initialize hardware */ + nic_init_hw(nic); + nic_set_lmac_vf_mapping(nic); /* Register interrupts */ @@ -1364,9 +1345,6 @@ err_unregister_interrupts: err_release_regions: pci_release_regions(pdev); err_disable_device: - nic_free_lmacmem(nic); - devm_kfree(dev, nic->hw); - devm_kfree(dev, nic); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); return err; @@ -1388,10 +1366,6 @@ static void nic_remove(struct pci_dev *pdev) nic_unregister_interrupts(nic); pci_release_regions(pdev); - nic_free_lmacmem(nic); - devm_kfree(&pdev->dev, nic->hw); - devm_kfree(&pdev->dev, nic); - pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 805ab45e9b5a..a063c36c4c58 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -523,6 +523,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, xdp.data_hard_start = page_address(page); xdp.data = (void *)cpu_addr; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; orig_data = xdp.data; @@ -1740,7 +1741,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) return 0; } -static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp) +static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) { struct nicvf *nic = netdev_priv(netdev); @@ -1773,7 +1774,7 @@ static const struct net_device_ops nicvf_netdev_ops = { .ndo_tx_timeout = nicvf_tx_timeout, .ndo_fix_features = nicvf_fix_features, .ndo_set_features = nicvf_set_features, - .ndo_xdp = nicvf_xdp, + .ndo_bpf = nicvf_xdp, }; static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 0f13a7f7c1d3..30de26ef3da4 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -1882,10 +1882,10 @@ send: /* * Callback for the Tx buffer reclaim timer. Runs with softirqs disabled. */ -static void sge_tx_reclaim_cb(unsigned long data) +static void sge_tx_reclaim_cb(struct timer_list *t) { int i; - struct sge *sge = (struct sge *)data; + struct sge *sge = from_timer(sge, t, tx_reclaim_timer); for (i = 0; i < SGE_CMDQ_N; ++i) { struct cmdQ *q = &sge->cmdQ[i]; @@ -1978,10 +1978,10 @@ void t1_sge_start(struct sge *sge) /* * Callback for the T2 ESPI 'stuck packet feature' workaorund */ -static void espibug_workaround_t204(unsigned long data) +static void espibug_workaround_t204(struct timer_list *t) { - struct adapter *adapter = (struct adapter *)data; - struct sge *sge = adapter->sge; + struct sge *sge = from_timer(sge, t, espibug_timer); + struct adapter *adapter = sge->adapter; unsigned int nports = adapter->params.nports; u32 seop[MAX_NPORTS]; @@ -2021,10 +2021,10 @@ static void espibug_workaround_t204(unsigned long data) mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout); } -static void espibug_workaround(unsigned long data) +static void espibug_workaround(struct timer_list *t) { - struct adapter *adapter = (struct adapter *)data; - struct sge *sge = adapter->sge; + struct sge *sge = from_timer(sge, t, espibug_timer); + struct adapter *adapter = sge->adapter; if (netif_running(adapter->port[0].dev)) { struct sk_buff *skb = sge->espibug_skb[0]; @@ -2075,19 +2075,15 @@ struct sge *t1_sge_create(struct adapter *adapter, struct sge_params *p) goto nomem_port; } - init_timer(&sge->tx_reclaim_timer); - sge->tx_reclaim_timer.data = (unsigned long)sge; - sge->tx_reclaim_timer.function = sge_tx_reclaim_cb; + timer_setup(&sge->tx_reclaim_timer, sge_tx_reclaim_cb, 0); if (is_T2(sge->adapter)) { - init_timer(&sge->espibug_timer); + timer_setup(&sge->espibug_timer, + adapter->params.nports > 1 ? espibug_workaround_t204 : espibug_workaround, + 0); - if (adapter->params.nports > 1) { + if (adapter->params.nports > 1) tx_sched_init(sge); - sge->espibug_timer.function = espibug_workaround_t204; - } else - sge->espibug_timer.function = espibug_workaround; - sge->espibug_timer.data = (unsigned long)sge->adapter; sge->espibug_timeout = 1; /* for T204, every 10ms */ diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index e2d342647b19..e988caa797cb 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -455,6 +455,11 @@ static int alloc_pg_chunk(struct adapter *adapter, struct sge_fl *q, q->pg_chunk.offset = 0; mapping = pci_map_page(adapter->pdev, q->pg_chunk.page, 0, q->alloc_size, PCI_DMA_FROMDEVICE); + if (unlikely(pci_dma_mapping_error(adapter->pdev, mapping))) { + __free_pages(q->pg_chunk.page, order); + q->pg_chunk.page = NULL; + return -EIO; + } q->pg_chunk.mapping = mapping; } sd->pg_chunk = q->pg_chunk; @@ -949,40 +954,78 @@ static inline unsigned int calc_tx_descs(const struct sk_buff *skb) return flits_to_desc(flits); } +/* map_skb - map a packet main body and its page fragments + * @pdev: the PCI device + * @skb: the packet + * @addr: placeholder to save the mapped addresses + * + * map the main body of an sk_buff and its page fragments, if any. + */ +static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb, + dma_addr_t *addr) +{ + const skb_frag_t *fp, *end; + const struct skb_shared_info *si; + + if (skb_headlen(skb)) { + *addr = pci_map_single(pdev, skb->data, skb_headlen(skb), + PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(pdev, *addr)) + goto out_err; + addr++; + } + + si = skb_shinfo(skb); + end = &si->frags[si->nr_frags]; + + for (fp = si->frags; fp < end; fp++) { + *addr = skb_frag_dma_map(&pdev->dev, fp, 0, skb_frag_size(fp), + DMA_TO_DEVICE); + if (pci_dma_mapping_error(pdev, *addr)) + goto unwind; + addr++; + } + return 0; + +unwind: + while (fp-- > si->frags) + dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp), + DMA_TO_DEVICE); + + pci_unmap_single(pdev, addr[-1], skb_headlen(skb), PCI_DMA_TODEVICE); +out_err: + return -ENOMEM; +} + /** - * make_sgl - populate a scatter/gather list for a packet + * write_sgl - populate a scatter/gather list for a packet * @skb: the packet * @sgp: the SGL to populate * @start: start address of skb main body data to include in the SGL * @len: length of skb main body data to include in the SGL - * @pdev: the PCI device + * @addr: the list of the mapped addresses * - * Generates a scatter/gather list for the buffers that make up a packet + * Copies the scatter/gather list for the buffers that make up a packet * and returns the SGL size in 8-byte words. The caller must size the SGL * appropriately. */ -static inline unsigned int make_sgl(const struct sk_buff *skb, - struct sg_ent *sgp, unsigned char *start, - unsigned int len, struct pci_dev *pdev) +static inline unsigned int write_sgl(const struct sk_buff *skb, + struct sg_ent *sgp, unsigned char *start, + unsigned int len, const dma_addr_t *addr) { - dma_addr_t mapping; - unsigned int i, j = 0, nfrags; + unsigned int i, j = 0, k = 0, nfrags; if (len) { - mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE); sgp->len[0] = cpu_to_be32(len); - sgp->addr[0] = cpu_to_be64(mapping); - j = 1; + sgp->addr[j++] = cpu_to_be64(addr[k++]); } nfrags = skb_shinfo(skb)->nr_frags; for (i = 0; i < nfrags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - mapping = skb_frag_dma_map(&pdev->dev, frag, 0, skb_frag_size(frag), - DMA_TO_DEVICE); sgp->len[j] = cpu_to_be32(skb_frag_size(frag)); - sgp->addr[j] = cpu_to_be64(mapping); + sgp->addr[j] = cpu_to_be64(addr[k++]); j ^= 1; if (j == 0) ++sgp; @@ -1138,7 +1181,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb, const struct port_info *pi, unsigned int pidx, unsigned int gen, struct sge_txq *q, unsigned int ndesc, - unsigned int compl) + unsigned int compl, const dma_addr_t *addr) { unsigned int flits, sgl_flits, cntrl, tso_info; struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1]; @@ -1196,7 +1239,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb, } sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; - sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev); + sgl_flits = write_sgl(skb, sgp, skb->data, skb_headlen(skb), addr); write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen, htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl), @@ -1227,6 +1270,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) struct netdev_queue *txq; struct sge_qset *qs; struct sge_txq *q; + dma_addr_t addr[MAX_SKB_FRAGS + 1]; /* * The chip min packet length is 9 octets but play safe and reject @@ -1255,6 +1299,14 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } + /* Check if ethernet packet can't be sent as immediate data */ + if (skb->len > (WR_LEN - sizeof(struct cpl_tx_pkt))) { + if (unlikely(map_skb(adap->pdev, skb, addr) < 0)) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + } + q->in_use += ndesc; if (unlikely(credits - ndesc < q->stop_thres)) { t3_stop_tx_queue(txq, qs, q); @@ -1312,7 +1364,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (likely(!skb_shared(skb))) skb_orphan(skb); - write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl); + write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl, addr); check_ring_tx_db(adap, q); return NETDEV_TX_OK; } @@ -1577,7 +1629,8 @@ static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev, */ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb, struct sge_txq *q, unsigned int pidx, - unsigned int gen, unsigned int ndesc) + unsigned int gen, unsigned int ndesc, + const dma_addr_t *addr) { unsigned int sgl_flits, flits; struct work_request_hdr *from; @@ -1598,10 +1651,9 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb, flits = skb_transport_offset(skb) / 8; sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; - sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb), - skb_tail_pointer(skb) - - skb_transport_header(skb), - adap->pdev); + sgl_flits = write_sgl(skb, sgp, skb_transport_header(skb), + skb_tail_pointer(skb) - skb_transport_header(skb), + addr); if (need_skb_unmap()) { setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits); skb->destructor = deferred_unmap_destructor; @@ -1659,6 +1711,12 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); goto again; } + if (!immediate(skb) && + map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) { + spin_unlock(&q->lock); + return NET_XMIT_SUCCESS; + } + gen = q->gen; q->in_use += ndesc; pidx = q->pidx; @@ -1669,7 +1727,7 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); } spin_unlock(&q->lock); - write_ofld_wr(adap, skb, q, pidx, gen, ndesc); + write_ofld_wr(adap, skb, q, pidx, gen, ndesc, (dma_addr_t *)skb->head); check_ring_tx_db(adap, q); return NET_XMIT_SUCCESS; } @@ -1687,6 +1745,7 @@ static void restart_offloadq(unsigned long data) struct sge_txq *q = &qs->txq[TXQ_OFLD]; const struct port_info *pi = netdev_priv(qs->netdev); struct adapter *adap = pi->adapter; + unsigned int written = 0; spin_lock(&q->lock); again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); @@ -1706,10 +1765,15 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); break; } + if (!immediate(skb) && + map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) + break; + gen = q->gen; q->in_use += ndesc; pidx = q->pidx; q->pidx += ndesc; + written += ndesc; if (q->pidx >= q->size) { q->pidx -= q->size; q->gen ^= 1; @@ -1717,7 +1781,8 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); __skb_unlink(skb, &q->sendq); spin_unlock(&q->lock); - write_ofld_wr(adap, skb, q, pidx, gen, ndesc); + write_ofld_wr(adap, skb, q, pidx, gen, ndesc, + (dma_addr_t *)skb->head); spin_lock(&q->lock); } spin_unlock(&q->lock); @@ -1727,8 +1792,9 @@ again: reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK); set_bit(TXQ_LAST_PKT_DB, &q->flags); #endif wmb(); - t3_write_reg(adap, A_SG_KDOORBELL, - F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); + if (likely(written)) + t3_write_reg(adap, A_SG_KDOORBELL, + F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id)); } /** @@ -2853,9 +2919,9 @@ void t3_sge_err_intr_handler(struct adapter *adapter) * bother cleaning them up here. * */ -static void sge_timer_tx(unsigned long data) +static void sge_timer_tx(struct timer_list *t) { - struct sge_qset *qs = (struct sge_qset *)data; + struct sge_qset *qs = from_timer(qs, t, tx_reclaim_timer); struct port_info *pi = netdev_priv(qs->netdev); struct adapter *adap = pi->adapter; unsigned int tbd[SGE_TXQ_PER_SET] = {0, 0}; @@ -2893,10 +2959,10 @@ static void sge_timer_tx(unsigned long data) * starved. * */ -static void sge_timer_rx(unsigned long data) +static void sge_timer_rx(struct timer_list *t) { spinlock_t *lock; - struct sge_qset *qs = (struct sge_qset *)data; + struct sge_qset *qs = from_timer(qs, t, rx_reclaim_timer); struct port_info *pi = netdev_priv(qs->netdev); struct adapter *adap = pi->adapter; u32 status; @@ -2976,8 +3042,8 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, struct sge_qset *q = &adapter->sge.qs[id]; init_qset_cntxt(q, id); - setup_timer(&q->tx_reclaim_timer, sge_timer_tx, (unsigned long)q); - setup_timer(&q->rx_reclaim_timer, sge_timer_rx, (unsigned long)q); + timer_setup(&q->tx_reclaim_timer, sge_timer_tx, 0); + timer_setup(&q->rx_reclaim_timer, sge_timer_rx, 0); q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size, sizeof(struct rx_desc), diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3cdev.h b/drivers/net/ethernet/chelsio/cxgb3/t3cdev.h index 705713b56636..3c3e6cf6aca6 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3cdev.h +++ b/drivers/net/ethernet/chelsio/cxgb3/t3cdev.h @@ -60,7 +60,7 @@ struct t3cdev { int (*ctl)(struct t3cdev *dev, unsigned int req, void *data); void (*neigh_update)(struct t3cdev *dev, struct neighbour *neigh); void *priv; /* driver private data */ - void *l2opt; /* optional layer 2 data */ + void __rcu *l2opt; /* optional layer 2 data */ void *l3opt; /* optional layer 3 data */ void *l4opt; /* optional layer 4 data */ void *ulp; /* ulp stuff */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index 24143c8abfdb..8c9c6b0d2e5d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -5,7 +5,10 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o cxgb4_ptp.o +cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \ + cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \ + cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \ + cudbg_common.o cudbg_lib.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c index 3103ef9b561d..290039026ece 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c @@ -96,7 +96,8 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6) if (!ret) { ce = cte; read_unlock_bh(&ctbl->lock); - goto found; + refcount_inc(&ce->refcnt); + return 0; } } read_unlock_bh(&ctbl->lock); @@ -108,7 +109,7 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6) list_del(&ce->list); INIT_LIST_HEAD(&ce->list); spin_lock_init(&ce->lock); - atomic_set(&ce->refcnt, 0); + refcount_set(&ce->refcnt, 0); atomic_dec(&ctbl->nfree); list_add_tail(&ce->list, &ctbl->hash_list[hash]); if (v6) { @@ -138,9 +139,7 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6) return -ENOMEM; } write_unlock_bh(&ctbl->lock); -found: - atomic_inc(&ce->refcnt); - + refcount_set(&ce->refcnt, 1); return 0; } EXPORT_SYMBOL(cxgb4_clip_get); @@ -179,7 +178,7 @@ void cxgb4_clip_release(const struct net_device *dev, const u32 *lip, u8 v6) found: write_lock_bh(&ctbl->lock); spin_lock_bh(&ce->lock); - if (atomic_dec_and_test(&ce->refcnt)) { + if (refcount_dec_and_test(&ce->refcnt)) { list_del(&ce->list); INIT_LIST_HEAD(&ce->list); list_add_tail(&ce->list, &ctbl->ce_free_head); @@ -266,7 +265,7 @@ int clip_tbl_show(struct seq_file *seq, void *v) ip[0] = '\0'; sprintf(ip, "%pISc", &ce->addr); seq_printf(seq, "%-25s %u\n", ip, - atomic_read(&ce->refcnt)); + refcount_read(&ce->refcnt)); } } seq_printf(seq, "Free clip entries : %d\n", atomic_read(&ctbl->nfree)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h index 35eb43c6bcbb..a0e0ae19649f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h @@ -10,9 +10,11 @@ * release for licensing terms and conditions. */ +#include <linux/refcount.h> + struct clip_entry { spinlock_t lock; /* Hold while modifying clip reference */ - atomic_t refcnt; + refcount_t refcnt; struct list_head list; union { struct sockaddr_in addr; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c new file mode 100644 index 000000000000..f78ba1743b5a --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include "cxgb4.h" +#include "cudbg_if.h" +#include "cudbg_lib_common.h" + +int cudbg_get_buff(struct cudbg_buffer *pdbg_buff, u32 size, + struct cudbg_buffer *pin_buff) +{ + u32 offset; + + offset = pdbg_buff->offset; + if (offset + size > pdbg_buff->size) + return CUDBG_STATUS_NO_MEM; + + pin_buff->data = (char *)pdbg_buff->data + offset; + pin_buff->offset = offset; + pin_buff->size = size; + pdbg_buff->size -= size; + return 0; +} + +void cudbg_put_buff(struct cudbg_buffer *pin_buff, + struct cudbg_buffer *pdbg_buff) +{ + pdbg_buff->size += pin_buff->size; + pin_buff->data = NULL; + pin_buff->offset = 0; + pin_buff->size = 0; +} + +void cudbg_update_buff(struct cudbg_buffer *pin_buff, + struct cudbg_buffer *pout_buff) +{ + /* We already write to buffer provided by ethool, so just + * increment offset to next free space. + */ + pout_buff->offset += pin_buff->size; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h new file mode 100644 index 000000000000..605689957496 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -0,0 +1,384 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#ifndef __CUDBG_ENTITY_H__ +#define __CUDBG_ENTITY_H__ + +#define EDC0_FLAG 3 +#define EDC1_FLAG 4 + +#define CUDBG_ENTITY_SIGNATURE 0xCCEDB001 + +struct card_mem { + u16 size_edc0; + u16 size_edc1; + u16 mem_flag; +}; + +struct cudbg_mbox_log { + struct mbox_cmd entry; + u32 hi[MBOX_LEN / 8]; + u32 lo[MBOX_LEN / 8]; +}; + +struct cudbg_cim_qcfg { + u8 chip; + u16 base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; + u16 size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; + u16 thres[CIM_NUM_IBQ]; + u32 obq_wr[2 * CIM_NUM_OBQ_T5]; + u32 stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)]; +}; + +struct cudbg_rss_vf_conf { + u32 rss_vf_vfl; + u32 rss_vf_vfh; +}; + +struct cudbg_pm_stats { + u32 tx_cnt[T6_PM_NSTATS]; + u32 rx_cnt[T6_PM_NSTATS]; + u64 tx_cyc[T6_PM_NSTATS]; + u64 rx_cyc[T6_PM_NSTATS]; +}; + +struct cudbg_hw_sched { + u32 kbps[NTX_SCHED]; + u32 ipg[NTX_SCHED]; + u32 pace_tab[NTX_SCHED]; + u32 mode; + u32 map; +}; + +struct ireg_field { + u32 ireg_addr; + u32 ireg_data; + u32 ireg_local_offset; + u32 ireg_offset_range; +}; + +struct ireg_buf { + struct ireg_field tp_pio; + u32 outbuf[32]; +}; + +struct cudbg_ulprx_la { + u32 data[ULPRX_LA_SIZE * 8]; + u32 size; +}; + +struct cudbg_tp_la { + u32 size; + u32 mode; + u8 data[0]; +}; + +struct cudbg_cim_pif_la { + int size; + u8 data[0]; +}; + +struct cudbg_clk_info { + u64 retransmit_min; + u64 retransmit_max; + u64 persist_timer_min; + u64 persist_timer_max; + u64 keepalive_idle_timer; + u64 keepalive_interval; + u64 initial_srtt; + u64 finwait2_timer; + u32 dack_timer; + u32 res; + u32 cclk_ps; + u32 tre; + u32 dack_re; +}; + +struct cudbg_tid_info_region { + u32 ntids; + u32 nstids; + u32 stid_base; + u32 hash_base; + + u32 natids; + u32 nftids; + u32 ftid_base; + u32 aftid_base; + u32 aftid_end; + + u32 sftid_base; + u32 nsftids; + + u32 uotid_base; + u32 nuotids; + + u32 sb; + u32 flags; + u32 le_db_conf; + u32 ip_users; + u32 ipv6_users; + + u32 hpftid_base; + u32 nhpftids; +}; + +#define CUDBG_TID_INFO_REV 1 + +struct cudbg_tid_info_region_rev1 { + struct cudbg_ver_hdr ver_hdr; + struct cudbg_tid_info_region tid; + u32 tid_start; + u32 reserved[16]; +}; + +#define CUDBG_MAX_FL_QIDS 1024 + +struct cudbg_ch_cntxt { + u32 cntxt_type; + u32 cntxt_id; + u32 data[SGE_CTXT_SIZE / 4]; +}; + +#define CUDBG_MAX_RPLC_SIZE 128 + +struct cudbg_mps_tcam { + u64 mask; + u32 rplc[8]; + u32 idx; + u32 cls_lo; + u32 cls_hi; + u32 rplc_size; + u32 vniy; + u32 vnix; + u32 dip_hit; + u32 vlan_vld; + u32 repli; + u16 ivlan; + u8 addr[ETH_ALEN]; + u8 lookup_type; + u8 port_num; + u8 reserved[2]; +}; + +#define CUDBG_VPD_PF_SIZE 0x800 +#define CUDBG_SCFG_VER_ADDR 0x06 +#define CUDBG_SCFG_VER_LEN 4 +#define CUDBG_VPD_VER_ADDR 0x18c7 +#define CUDBG_VPD_VER_LEN 2 + +struct cudbg_vpd_data { + u8 sn[SERNUM_LEN + 1]; + u8 bn[PN_LEN + 1]; + u8 na[MACADDR_LEN + 1]; + u8 mn[ID_LEN + 1]; + u16 fw_major; + u16 fw_minor; + u16 fw_micro; + u16 fw_build; + u32 scfg_vers; + u32 vpd_vers; +}; + +#define CUDBG_MAX_TCAM_TID 0x800 + +enum cudbg_le_entry_types { + LE_ET_UNKNOWN = 0, + LE_ET_TCAM_CON = 1, + LE_ET_TCAM_SERVER = 2, + LE_ET_TCAM_FILTER = 3, + LE_ET_TCAM_CLIP = 4, + LE_ET_TCAM_ROUTING = 5, + LE_ET_HASH_CON = 6, + LE_ET_INVALID_TID = 8, +}; + +struct cudbg_tcam { + u32 filter_start; + u32 server_start; + u32 clip_start; + u32 routing_start; + u32 tid_hash_base; + u32 max_tid; +}; + +struct cudbg_tid_data { + u32 tid; + u32 dbig_cmd; + u32 dbig_conf; + u32 dbig_rsp_stat; + u32 data[NUM_LE_DB_DBGI_RSP_DATA_INSTANCES]; +}; + +#define CUDBG_NUM_ULPTX 11 +#define CUDBG_NUM_ULPTX_READ 512 + +struct cudbg_ulptx_la { + u32 rdptr[CUDBG_NUM_ULPTX]; + u32 wrptr[CUDBG_NUM_ULPTX]; + u32 rddata[CUDBG_NUM_ULPTX]; + u32 rd_data[CUDBG_NUM_ULPTX][CUDBG_NUM_ULPTX_READ]; +}; + +#define CUDBG_CHAC_PBT_ADDR 0x2800 +#define CUDBG_CHAC_PBT_LRF 0x3000 +#define CUDBG_CHAC_PBT_DATA 0x3800 +#define CUDBG_PBT_DYNAMIC_ENTRIES 8 +#define CUDBG_PBT_STATIC_ENTRIES 16 +#define CUDBG_LRF_ENTRIES 8 +#define CUDBG_PBT_DATA_ENTRIES 512 + +struct cudbg_pbt_tables { + u32 pbt_dynamic[CUDBG_PBT_DYNAMIC_ENTRIES]; + u32 pbt_static[CUDBG_PBT_STATIC_ENTRIES]; + u32 lrf_table[CUDBG_LRF_ENTRIES]; + u32 pbt_data[CUDBG_PBT_DATA_ENTRIES]; +}; + +#define IREG_NUM_ELEM 4 + +static const u32 t6_tp_pio_array[][IREG_NUM_ELEM] = { + {0x7e40, 0x7e44, 0x020, 28}, /* t6_tp_pio_regs_20_to_3b */ + {0x7e40, 0x7e44, 0x040, 10}, /* t6_tp_pio_regs_40_to_49 */ + {0x7e40, 0x7e44, 0x050, 10}, /* t6_tp_pio_regs_50_to_59 */ + {0x7e40, 0x7e44, 0x060, 14}, /* t6_tp_pio_regs_60_to_6d */ + {0x7e40, 0x7e44, 0x06F, 1}, /* t6_tp_pio_regs_6f */ + {0x7e40, 0x7e44, 0x070, 6}, /* t6_tp_pio_regs_70_to_75 */ + {0x7e40, 0x7e44, 0x130, 18}, /* t6_tp_pio_regs_130_to_141 */ + {0x7e40, 0x7e44, 0x145, 19}, /* t6_tp_pio_regs_145_to_157 */ + {0x7e40, 0x7e44, 0x160, 1}, /* t6_tp_pio_regs_160 */ + {0x7e40, 0x7e44, 0x230, 25}, /* t6_tp_pio_regs_230_to_248 */ + {0x7e40, 0x7e44, 0x24a, 3}, /* t6_tp_pio_regs_24c */ + {0x7e40, 0x7e44, 0x8C0, 1} /* t6_tp_pio_regs_8c0 */ +}; + +static const u32 t5_tp_pio_array[][IREG_NUM_ELEM] = { + {0x7e40, 0x7e44, 0x020, 28}, /* t5_tp_pio_regs_20_to_3b */ + {0x7e40, 0x7e44, 0x040, 19}, /* t5_tp_pio_regs_40_to_52 */ + {0x7e40, 0x7e44, 0x054, 2}, /* t5_tp_pio_regs_54_to_55 */ + {0x7e40, 0x7e44, 0x060, 13}, /* t5_tp_pio_regs_60_to_6c */ + {0x7e40, 0x7e44, 0x06F, 1}, /* t5_tp_pio_regs_6f */ + {0x7e40, 0x7e44, 0x120, 4}, /* t5_tp_pio_regs_120_to_123 */ + {0x7e40, 0x7e44, 0x12b, 2}, /* t5_tp_pio_regs_12b_to_12c */ + {0x7e40, 0x7e44, 0x12f, 21}, /* t5_tp_pio_regs_12f_to_143 */ + {0x7e40, 0x7e44, 0x145, 19}, /* t5_tp_pio_regs_145_to_157 */ + {0x7e40, 0x7e44, 0x230, 25}, /* t5_tp_pio_regs_230_to_248 */ + {0x7e40, 0x7e44, 0x8C0, 1} /* t5_tp_pio_regs_8c0 */ +}; + +static const u32 t6_tp_tm_pio_array[][IREG_NUM_ELEM] = { + {0x7e18, 0x7e1c, 0x0, 12} +}; + +static const u32 t5_tp_tm_pio_array[][IREG_NUM_ELEM] = { + {0x7e18, 0x7e1c, 0x0, 12} +}; + +static const u32 t6_tp_mib_index_array[6][IREG_NUM_ELEM] = { + {0x7e50, 0x7e54, 0x0, 13}, + {0x7e50, 0x7e54, 0x10, 6}, + {0x7e50, 0x7e54, 0x18, 21}, + {0x7e50, 0x7e54, 0x30, 32}, + {0x7e50, 0x7e54, 0x50, 22}, + {0x7e50, 0x7e54, 0x68, 12} +}; + +static const u32 t5_tp_mib_index_array[9][IREG_NUM_ELEM] = { + {0x7e50, 0x7e54, 0x0, 13}, + {0x7e50, 0x7e54, 0x10, 6}, + {0x7e50, 0x7e54, 0x18, 8}, + {0x7e50, 0x7e54, 0x20, 13}, + {0x7e50, 0x7e54, 0x30, 16}, + {0x7e50, 0x7e54, 0x40, 16}, + {0x7e50, 0x7e54, 0x50, 16}, + {0x7e50, 0x7e54, 0x60, 6}, + {0x7e50, 0x7e54, 0x68, 4} +}; + +static const u32 t5_sge_dbg_index_array[2][IREG_NUM_ELEM] = { + {0x10cc, 0x10d0, 0x0, 16}, + {0x10cc, 0x10d4, 0x0, 16}, +}; + +static const u32 t5_pcie_pdbg_array[][IREG_NUM_ELEM] = { + {0x5a04, 0x5a0c, 0x00, 0x20}, /* t5_pcie_pdbg_regs_00_to_20 */ + {0x5a04, 0x5a0c, 0x21, 0x20}, /* t5_pcie_pdbg_regs_21_to_40 */ + {0x5a04, 0x5a0c, 0x41, 0x10}, /* t5_pcie_pdbg_regs_41_to_50 */ +}; + +static const u32 t5_pcie_cdbg_array[][IREG_NUM_ELEM] = { + {0x5a10, 0x5a18, 0x00, 0x20}, /* t5_pcie_cdbg_regs_00_to_20 */ + {0x5a10, 0x5a18, 0x21, 0x18}, /* t5_pcie_cdbg_regs_21_to_37 */ +}; + +static const u32 t5_pm_rx_array[][IREG_NUM_ELEM] = { + {0x8FD0, 0x8FD4, 0x10000, 0x20}, /* t5_pm_rx_regs_10000_to_10020 */ + {0x8FD0, 0x8FD4, 0x10021, 0x0D}, /* t5_pm_rx_regs_10021_to_1002c */ +}; + +static const u32 t5_pm_tx_array[][IREG_NUM_ELEM] = { + {0x8FF0, 0x8FF4, 0x10000, 0x20}, /* t5_pm_tx_regs_10000_to_10020 */ + {0x8FF0, 0x8FF4, 0x10021, 0x1D}, /* t5_pm_tx_regs_10021_to_1003c */ +}; + +static const u32 t6_ma_ireg_array[][IREG_NUM_ELEM] = { + {0x78f8, 0x78fc, 0xa000, 23}, /* t6_ma_regs_a000_to_a016 */ + {0x78f8, 0x78fc, 0xa400, 30}, /* t6_ma_regs_a400_to_a41e */ + {0x78f8, 0x78fc, 0xa800, 20} /* t6_ma_regs_a800_to_a813 */ +}; + +static const u32 t6_ma_ireg_array2[][IREG_NUM_ELEM] = { + {0x78f8, 0x78fc, 0xe400, 17}, /* t6_ma_regs_e400_to_e600 */ + {0x78f8, 0x78fc, 0xe640, 13} /* t6_ma_regs_e640_to_e7c0 */ +}; + +static const u32 t6_up_cim_reg_array[][IREG_NUM_ELEM] = { + {0x7b50, 0x7b54, 0x2000, 0x20}, /* up_cim_2000_to_207c */ + {0x7b50, 0x7b54, 0x2080, 0x1d}, /* up_cim_2080_to_20fc */ + {0x7b50, 0x7b54, 0x00, 0x20}, /* up_cim_00_to_7c */ + {0x7b50, 0x7b54, 0x80, 0x20}, /* up_cim_80_to_fc */ + {0x7b50, 0x7b54, 0x100, 0x11}, /* up_cim_100_to_14c */ + {0x7b50, 0x7b54, 0x200, 0x10}, /* up_cim_200_to_23c */ + {0x7b50, 0x7b54, 0x240, 0x2}, /* up_cim_240_to_244 */ + {0x7b50, 0x7b54, 0x250, 0x2}, /* up_cim_250_to_254 */ + {0x7b50, 0x7b54, 0x260, 0x2}, /* up_cim_260_to_264 */ + {0x7b50, 0x7b54, 0x270, 0x2}, /* up_cim_270_to_274 */ + {0x7b50, 0x7b54, 0x280, 0x20}, /* up_cim_280_to_2fc */ + {0x7b50, 0x7b54, 0x300, 0x20}, /* up_cim_300_to_37c */ + {0x7b50, 0x7b54, 0x380, 0x14}, /* up_cim_380_to_3cc */ + +}; + +static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM] = { + {0x7b50, 0x7b54, 0x2000, 0x20}, /* up_cim_2000_to_207c */ + {0x7b50, 0x7b54, 0x2080, 0x19}, /* up_cim_2080_to_20ec */ + {0x7b50, 0x7b54, 0x00, 0x20}, /* up_cim_00_to_7c */ + {0x7b50, 0x7b54, 0x80, 0x20}, /* up_cim_80_to_fc */ + {0x7b50, 0x7b54, 0x100, 0x11}, /* up_cim_100_to_14c */ + {0x7b50, 0x7b54, 0x200, 0x10}, /* up_cim_200_to_23c */ + {0x7b50, 0x7b54, 0x240, 0x2}, /* up_cim_240_to_244 */ + {0x7b50, 0x7b54, 0x250, 0x2}, /* up_cim_250_to_254 */ + {0x7b50, 0x7b54, 0x260, 0x2}, /* up_cim_260_to_264 */ + {0x7b50, 0x7b54, 0x270, 0x2}, /* up_cim_270_to_274 */ + {0x7b50, 0x7b54, 0x280, 0x20}, /* up_cim_280_to_2fc */ + {0x7b50, 0x7b54, 0x300, 0x20}, /* up_cim_300_to_37c */ + {0x7b50, 0x7b54, 0x380, 0x14}, /* up_cim_380_to_3cc */ +}; + +static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = { + {0x51320, 0x51324, 0xa000, 32} /* t6_hma_regs_a000_to_a01f */ +}; +#endif /* __CUDBG_ENTITY_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h new file mode 100644 index 000000000000..e10ff1ee62c5 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#ifndef __CUDBG_IF_H__ +#define __CUDBG_IF_H__ + +/* Error codes */ +#define CUDBG_STATUS_NO_MEM -19 +#define CUDBG_STATUS_ENTITY_NOT_FOUND -24 +#define CUDBG_SYSTEM_ERROR -29 +#define CUDBG_STATUS_CCLK_NOT_DEFINED -32 + +#define CUDBG_MAJOR_VERSION 1 +#define CUDBG_MINOR_VERSION 14 + +enum cudbg_dbg_entity_type { + CUDBG_REG_DUMP = 1, + CUDBG_DEV_LOG = 2, + CUDBG_CIM_LA = 3, + CUDBG_CIM_MA_LA = 4, + CUDBG_CIM_QCFG = 5, + CUDBG_CIM_IBQ_TP0 = 6, + CUDBG_CIM_IBQ_TP1 = 7, + CUDBG_CIM_IBQ_ULP = 8, + CUDBG_CIM_IBQ_SGE0 = 9, + CUDBG_CIM_IBQ_SGE1 = 10, + CUDBG_CIM_IBQ_NCSI = 11, + CUDBG_CIM_OBQ_ULP0 = 12, + CUDBG_CIM_OBQ_ULP1 = 13, + CUDBG_CIM_OBQ_ULP2 = 14, + CUDBG_CIM_OBQ_ULP3 = 15, + CUDBG_CIM_OBQ_SGE = 16, + CUDBG_CIM_OBQ_NCSI = 17, + CUDBG_EDC0 = 18, + CUDBG_EDC1 = 19, + CUDBG_RSS = 22, + CUDBG_RSS_VF_CONF = 25, + CUDBG_PATH_MTU = 27, + CUDBG_PM_STATS = 30, + CUDBG_HW_SCHED = 31, + CUDBG_TP_INDIRECT = 36, + CUDBG_SGE_INDIRECT = 37, + CUDBG_ULPRX_LA = 41, + CUDBG_TP_LA = 43, + CUDBG_CIM_PIF_LA = 45, + CUDBG_CLK = 46, + CUDBG_CIM_OBQ_RXQ0 = 47, + CUDBG_CIM_OBQ_RXQ1 = 48, + CUDBG_PCIE_INDIRECT = 50, + CUDBG_PM_INDIRECT = 51, + CUDBG_TID_INFO = 54, + CUDBG_DUMP_CONTEXT = 56, + CUDBG_MPS_TCAM = 57, + CUDBG_VPD_DATA = 58, + CUDBG_LE_TCAM = 59, + CUDBG_CCTRL = 60, + CUDBG_MA_INDIRECT = 61, + CUDBG_ULPTX_LA = 62, + CUDBG_UP_CIM_INDIRECT = 64, + CUDBG_PBT_TABLE = 65, + CUDBG_MBOX_LOG = 66, + CUDBG_HMA_INDIRECT = 67, + CUDBG_MAX_ENTITY = 70, +}; + +struct cudbg_init { + struct adapter *adap; /* Pointer to adapter structure */ + void *outbuf; /* Output buffer */ + u32 outbuf_size; /* Output buffer size */ +}; + +static inline unsigned int cudbg_mbytes_to_bytes(unsigned int size) +{ + return size * 1024 * 1024; +} +#endif /* __CUDBG_IF_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c new file mode 100644 index 000000000000..d699bf88d18f --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -0,0 +1,1929 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include "t4_regs.h" +#include "cxgb4.h" +#include "cudbg_if.h" +#include "cudbg_lib_common.h" +#include "cudbg_lib.h" +#include "cudbg_entity.h" + +static void cudbg_write_and_release_buff(struct cudbg_buffer *pin_buff, + struct cudbg_buffer *dbg_buff) +{ + cudbg_update_buff(pin_buff, dbg_buff); + cudbg_put_buff(pin_buff, dbg_buff); +} + +static int is_fw_attached(struct cudbg_init *pdbg_init) +{ + struct adapter *padap = pdbg_init->adap; + + if (!(padap->flags & FW_OK) || padap->use_bd) + return 0; + + return 1; +} + +/* This function will add additional padding bytes into debug_buffer to make it + * 4 byte aligned. + */ +void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff, + struct cudbg_entity_hdr *entity_hdr) +{ + u8 zero_buf[4] = {0}; + u8 padding, remain; + + remain = (dbg_buff->offset - entity_hdr->start_offset) % 4; + padding = 4 - remain; + if (remain) { + memcpy(((u8 *)dbg_buff->data) + dbg_buff->offset, &zero_buf, + padding); + dbg_buff->offset += padding; + entity_hdr->num_pad = padding; + } + entity_hdr->size = dbg_buff->offset - entity_hdr->start_offset; +} + +struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i) +{ + struct cudbg_hdr *cudbg_hdr = (struct cudbg_hdr *)outbuf; + + return (struct cudbg_entity_hdr *) + ((char *)outbuf + cudbg_hdr->hdr_len + + (sizeof(struct cudbg_entity_hdr) * (i - 1))); +} + +static int cudbg_read_vpd_reg(struct adapter *padap, u32 addr, u32 len, + void *dest) +{ + int vaddr, rc; + + vaddr = t4_eeprom_ptov(addr, padap->pf, EEPROMPFSIZE); + if (vaddr < 0) + return vaddr; + + rc = pci_read_vpd(padap->pdev, vaddr, len, dest); + if (rc < 0) + return rc; + + return 0; +} + +int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + u32 buf_size = 0; + int rc = 0; + + if (is_t4(padap->params.chip)) + buf_size = T4_REGMAP_SIZE; + else if (is_t5(padap->params.chip) || is_t6(padap->params.chip)) + buf_size = T5_REGMAP_SIZE; + + rc = cudbg_get_buff(dbg_buff, buf_size, &temp_buff); + if (rc) + return rc; + t4_get_regs(padap, (void *)temp_buff.data, temp_buff.size); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct devlog_params *dparams; + int rc = 0; + + rc = t4_init_devlog_params(padap); + if (rc < 0) { + cudbg_err->sys_err = rc; + return rc; + } + + dparams = &padap->params.devlog; + rc = cudbg_get_buff(dbg_buff, dparams->size, &temp_buff); + if (rc) + return rc; + + /* Collect FW devlog */ + if (dparams->start != 0) { + spin_lock(&padap->win0_lock); + rc = t4_memory_rw(padap, padap->params.drv_memwin, + dparams->memtype, dparams->start, + dparams->size, + (__be32 *)(char *)temp_buff.data, + 1); + spin_unlock(&padap->win0_lock); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_cim_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int size, rc; + u32 cfg = 0; + + if (is_t6(padap->params.chip)) { + size = padap->params.cim_la_size / 10 + 1; + size *= 11 * sizeof(u32); + } else { + size = padap->params.cim_la_size / 8; + size *= 8 * sizeof(u32); + } + + size += sizeof(cfg); + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + rc = t4_cim_read(padap, UP_UP_DBG_LA_CFG_A, 1, &cfg); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + + memcpy((char *)temp_buff.data, &cfg, sizeof(cfg)); + rc = t4_cim_read_la(padap, + (u32 *)((char *)temp_buff.data + sizeof(cfg)), + NULL); + if (rc < 0) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_cim_ma_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int size, rc; + + size = 2 * CIM_MALA_SIZE * 5 * sizeof(u32); + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + t4_cim_read_ma_la(padap, + (u32 *)temp_buff.data, + (u32 *)((char *)temp_buff.data + + 5 * CIM_MALA_SIZE)); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_cim_qcfg(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_cim_qcfg *cim_qcfg_data; + int rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_cim_qcfg), + &temp_buff); + if (rc) + return rc; + + cim_qcfg_data = (struct cudbg_cim_qcfg *)temp_buff.data; + cim_qcfg_data->chip = padap->params.chip; + rc = t4_cim_read(padap, UP_IBQ_0_RDADDR_A, + ARRAY_SIZE(cim_qcfg_data->stat), cim_qcfg_data->stat); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + + rc = t4_cim_read(padap, UP_OBQ_0_REALADDR_A, + ARRAY_SIZE(cim_qcfg_data->obq_wr), + cim_qcfg_data->obq_wr); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + + t4_read_cimq_cfg(padap, cim_qcfg_data->base, cim_qcfg_data->size, + cim_qcfg_data->thres); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +static int cudbg_read_cim_ibq(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err, int qid) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int no_of_read_words, rc = 0; + u32 qsize; + + /* collect CIM IBQ */ + qsize = CIM_IBQ_SIZE * 4 * sizeof(u32); + rc = cudbg_get_buff(dbg_buff, qsize, &temp_buff); + if (rc) + return rc; + + /* t4_read_cim_ibq will return no. of read words or error */ + no_of_read_words = t4_read_cim_ibq(padap, qid, + (u32 *)temp_buff.data, qsize); + /* no_of_read_words is less than or equal to 0 means error */ + if (no_of_read_words <= 0) { + if (!no_of_read_words) + rc = CUDBG_SYSTEM_ERROR; + else + rc = no_of_read_words; + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_cim_ibq_tp0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 0); +} + +int cudbg_collect_cim_ibq_tp1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 1); +} + +int cudbg_collect_cim_ibq_ulp(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 2); +} + +int cudbg_collect_cim_ibq_sge0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 3); +} + +int cudbg_collect_cim_ibq_sge1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 4); +} + +int cudbg_collect_cim_ibq_ncsi(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 5); +} + +u32 cudbg_cim_obq_size(struct adapter *padap, int qid) +{ + u32 value; + + t4_write_reg(padap, CIM_QUEUE_CONFIG_REF_A, OBQSELECT_F | + QUENUMSELECT_V(qid)); + value = t4_read_reg(padap, CIM_QUEUE_CONFIG_CTRL_A); + value = CIMQSIZE_G(value) * 64; /* size in number of words */ + return value * sizeof(u32); +} + +static int cudbg_read_cim_obq(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err, int qid) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int no_of_read_words, rc = 0; + u32 qsize; + + /* collect CIM OBQ */ + qsize = cudbg_cim_obq_size(padap, qid); + rc = cudbg_get_buff(dbg_buff, qsize, &temp_buff); + if (rc) + return rc; + + /* t4_read_cim_obq will return no. of read words or error */ + no_of_read_words = t4_read_cim_obq(padap, qid, + (u32 *)temp_buff.data, qsize); + /* no_of_read_words is less than or equal to 0 means error */ + if (no_of_read_words <= 0) { + if (!no_of_read_words) + rc = CUDBG_SYSTEM_ERROR; + else + rc = no_of_read_words; + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_cim_obq_ulp0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 0); +} + +int cudbg_collect_cim_obq_ulp1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 1); +} + +int cudbg_collect_cim_obq_ulp2(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 2); +} + +int cudbg_collect_cim_obq_ulp3(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 3); +} + +int cudbg_collect_cim_obq_sge(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 4); +} + +int cudbg_collect_cim_obq_ncsi(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 5); +} + +int cudbg_collect_obq_sge_rx_q0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 6); +} + +int cudbg_collect_obq_sge_rx_q1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 7); +} + +static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, u8 mem_type, + unsigned long tot_len, + struct cudbg_error *cudbg_err) +{ + unsigned long bytes, bytes_left, bytes_read = 0; + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int rc = 0; + + bytes_left = tot_len; + while (bytes_left > 0) { + bytes = min_t(unsigned long, bytes_left, + (unsigned long)CUDBG_CHUNK_SIZE); + rc = cudbg_get_buff(dbg_buff, bytes, &temp_buff); + if (rc) + return rc; + spin_lock(&padap->win0_lock); + rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type, + bytes_read, bytes, + (__be32 *)temp_buff.data, + 1); + spin_unlock(&padap->win0_lock); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + bytes_left -= bytes; + bytes_read += bytes; + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + } + return rc; +} + +static void cudbg_collect_mem_info(struct cudbg_init *pdbg_init, + struct card_mem *mem_info) +{ + struct adapter *padap = pdbg_init->adap; + u32 value; + + value = t4_read_reg(padap, MA_EDRAM0_BAR_A); + value = EDRAM0_SIZE_G(value); + mem_info->size_edc0 = (u16)value; + + value = t4_read_reg(padap, MA_EDRAM1_BAR_A); + value = EDRAM1_SIZE_G(value); + mem_info->size_edc1 = (u16)value; + + value = t4_read_reg(padap, MA_TARGET_MEM_ENABLE_A); + if (value & EDRAM0_ENABLE_F) + mem_info->mem_flag |= (1 << EDC0_FLAG); + if (value & EDRAM1_ENABLE_F) + mem_info->mem_flag |= (1 << EDC1_FLAG); +} + +static void cudbg_t4_fwcache(struct cudbg_init *pdbg_init, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + int rc; + + if (is_fw_attached(pdbg_init)) { + /* Flush uP dcache before reading edcX/mcX */ + rc = t4_fwcache(padap, FW_PARAM_DEV_FWCACHE_FLUSH); + if (rc) + cudbg_err->sys_warn = rc; + } +} + +static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err, + u8 mem_type) +{ + struct card_mem mem_info = {0}; + unsigned long flag, size; + int rc; + + cudbg_t4_fwcache(pdbg_init, cudbg_err); + cudbg_collect_mem_info(pdbg_init, &mem_info); + switch (mem_type) { + case MEM_EDC0: + flag = (1 << EDC0_FLAG); + size = cudbg_mbytes_to_bytes(mem_info.size_edc0); + break; + case MEM_EDC1: + flag = (1 << EDC1_FLAG); + size = cudbg_mbytes_to_bytes(mem_info.size_edc1); + break; + default: + rc = CUDBG_STATUS_ENTITY_NOT_FOUND; + goto err; + } + + if (mem_info.mem_flag & flag) { + rc = cudbg_read_fw_mem(pdbg_init, dbg_buff, mem_type, + size, cudbg_err); + if (rc) + goto err; + } else { + rc = CUDBG_STATUS_ENTITY_NOT_FOUND; + goto err; + } +err: + return rc; +} + +int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_collect_mem_region(pdbg_init, dbg_buff, cudbg_err, + MEM_EDC0); +} + +int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + return cudbg_collect_mem_region(pdbg_init, dbg_buff, cudbg_err, + MEM_EDC1); +} + +int cudbg_collect_rss(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int rc; + + rc = cudbg_get_buff(dbg_buff, RSS_NENTRIES * sizeof(u16), &temp_buff); + if (rc) + return rc; + + rc = t4_read_rss(padap, (u16 *)temp_buff.data); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_rss_vf_conf *vfconf; + int vf, rc, vf_count; + + vf_count = padap->params.arch.vfcount; + rc = cudbg_get_buff(dbg_buff, + vf_count * sizeof(struct cudbg_rss_vf_conf), + &temp_buff); + if (rc) + return rc; + + vfconf = (struct cudbg_rss_vf_conf *)temp_buff.data; + for (vf = 0; vf < vf_count; vf++) + t4_read_rss_vf_config(padap, vf, &vfconf[vf].rss_vf_vfl, + &vfconf[vf].rss_vf_vfh, true); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_path_mtu(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int rc; + + rc = cudbg_get_buff(dbg_buff, NMTUS * sizeof(u16), &temp_buff); + if (rc) + return rc; + + t4_read_mtu_tbl(padap, (u16 *)temp_buff.data, NULL); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_pm_stats(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_pm_stats *pm_stats_buff; + int rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_pm_stats), + &temp_buff); + if (rc) + return rc; + + pm_stats_buff = (struct cudbg_pm_stats *)temp_buff.data; + t4_pmtx_get_stats(padap, pm_stats_buff->tx_cnt, pm_stats_buff->tx_cyc); + t4_pmrx_get_stats(padap, pm_stats_buff->rx_cnt, pm_stats_buff->rx_cyc); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_hw_sched(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_hw_sched *hw_sched_buff; + int i, rc = 0; + + if (!padap->params.vpd.cclk) + return CUDBG_STATUS_CCLK_NOT_DEFINED; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_hw_sched), + &temp_buff); + hw_sched_buff = (struct cudbg_hw_sched *)temp_buff.data; + hw_sched_buff->map = t4_read_reg(padap, TP_TX_MOD_QUEUE_REQ_MAP_A); + hw_sched_buff->mode = TIMERMODE_G(t4_read_reg(padap, TP_MOD_CONFIG_A)); + t4_read_pace_tbl(padap, hw_sched_buff->pace_tab); + for (i = 0; i < NTX_SCHED; ++i) + t4_get_tx_sched(padap, i, &hw_sched_buff->kbps[i], + &hw_sched_buff->ipg[i], true); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *ch_tp_pio; + int i, rc, n = 0; + u32 size; + + if (is_t5(padap->params.chip)) + n = sizeof(t5_tp_pio_array) + + sizeof(t5_tp_tm_pio_array) + + sizeof(t5_tp_mib_index_array); + else + n = sizeof(t6_tp_pio_array) + + sizeof(t6_tp_tm_pio_array) + + sizeof(t6_tp_mib_index_array); + + n = n / (IREG_NUM_ELEM * sizeof(u32)); + size = sizeof(struct ireg_buf) * n; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + ch_tp_pio = (struct ireg_buf *)temp_buff.data; + + /* TP_PIO */ + if (is_t5(padap->params.chip)) + n = sizeof(t5_tp_pio_array) / (IREG_NUM_ELEM * sizeof(u32)); + else if (is_t6(padap->params.chip)) + n = sizeof(t6_tp_pio_array) / (IREG_NUM_ELEM * sizeof(u32)); + + for (i = 0; i < n; i++) { + struct ireg_field *tp_pio = &ch_tp_pio->tp_pio; + u32 *buff = ch_tp_pio->outbuf; + + if (is_t5(padap->params.chip)) { + tp_pio->ireg_addr = t5_tp_pio_array[i][0]; + tp_pio->ireg_data = t5_tp_pio_array[i][1]; + tp_pio->ireg_local_offset = t5_tp_pio_array[i][2]; + tp_pio->ireg_offset_range = t5_tp_pio_array[i][3]; + } else if (is_t6(padap->params.chip)) { + tp_pio->ireg_addr = t6_tp_pio_array[i][0]; + tp_pio->ireg_data = t6_tp_pio_array[i][1]; + tp_pio->ireg_local_offset = t6_tp_pio_array[i][2]; + tp_pio->ireg_offset_range = t6_tp_pio_array[i][3]; + } + t4_tp_pio_read(padap, buff, tp_pio->ireg_offset_range, + tp_pio->ireg_local_offset, true); + ch_tp_pio++; + } + + /* TP_TM_PIO */ + if (is_t5(padap->params.chip)) + n = sizeof(t5_tp_tm_pio_array) / (IREG_NUM_ELEM * sizeof(u32)); + else if (is_t6(padap->params.chip)) + n = sizeof(t6_tp_tm_pio_array) / (IREG_NUM_ELEM * sizeof(u32)); + + for (i = 0; i < n; i++) { + struct ireg_field *tp_pio = &ch_tp_pio->tp_pio; + u32 *buff = ch_tp_pio->outbuf; + + if (is_t5(padap->params.chip)) { + tp_pio->ireg_addr = t5_tp_tm_pio_array[i][0]; + tp_pio->ireg_data = t5_tp_tm_pio_array[i][1]; + tp_pio->ireg_local_offset = t5_tp_tm_pio_array[i][2]; + tp_pio->ireg_offset_range = t5_tp_tm_pio_array[i][3]; + } else if (is_t6(padap->params.chip)) { + tp_pio->ireg_addr = t6_tp_tm_pio_array[i][0]; + tp_pio->ireg_data = t6_tp_tm_pio_array[i][1]; + tp_pio->ireg_local_offset = t6_tp_tm_pio_array[i][2]; + tp_pio->ireg_offset_range = t6_tp_tm_pio_array[i][3]; + } + t4_tp_tm_pio_read(padap, buff, tp_pio->ireg_offset_range, + tp_pio->ireg_local_offset, true); + ch_tp_pio++; + } + + /* TP_MIB_INDEX */ + if (is_t5(padap->params.chip)) + n = sizeof(t5_tp_mib_index_array) / + (IREG_NUM_ELEM * sizeof(u32)); + else if (is_t6(padap->params.chip)) + n = sizeof(t6_tp_mib_index_array) / + (IREG_NUM_ELEM * sizeof(u32)); + + for (i = 0; i < n ; i++) { + struct ireg_field *tp_pio = &ch_tp_pio->tp_pio; + u32 *buff = ch_tp_pio->outbuf; + + if (is_t5(padap->params.chip)) { + tp_pio->ireg_addr = t5_tp_mib_index_array[i][0]; + tp_pio->ireg_data = t5_tp_mib_index_array[i][1]; + tp_pio->ireg_local_offset = + t5_tp_mib_index_array[i][2]; + tp_pio->ireg_offset_range = + t5_tp_mib_index_array[i][3]; + } else if (is_t6(padap->params.chip)) { + tp_pio->ireg_addr = t6_tp_mib_index_array[i][0]; + tp_pio->ireg_data = t6_tp_mib_index_array[i][1]; + tp_pio->ireg_local_offset = + t6_tp_mib_index_array[i][2]; + tp_pio->ireg_offset_range = + t6_tp_mib_index_array[i][3]; + } + t4_tp_mib_read(padap, buff, tp_pio->ireg_offset_range, + tp_pio->ireg_local_offset, true); + ch_tp_pio++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *ch_sge_dbg; + int i, rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(*ch_sge_dbg) * 2, &temp_buff); + if (rc) + return rc; + + ch_sge_dbg = (struct ireg_buf *)temp_buff.data; + for (i = 0; i < 2; i++) { + struct ireg_field *sge_pio = &ch_sge_dbg->tp_pio; + u32 *buff = ch_sge_dbg->outbuf; + + sge_pio->ireg_addr = t5_sge_dbg_index_array[i][0]; + sge_pio->ireg_data = t5_sge_dbg_index_array[i][1]; + sge_pio->ireg_local_offset = t5_sge_dbg_index_array[i][2]; + sge_pio->ireg_offset_range = t5_sge_dbg_index_array[i][3]; + t4_read_indirect(padap, + sge_pio->ireg_addr, + sge_pio->ireg_data, + buff, + sge_pio->ireg_offset_range, + sge_pio->ireg_local_offset); + ch_sge_dbg++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_ulprx_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_ulprx_la *ulprx_la_buff; + int rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_ulprx_la), + &temp_buff); + if (rc) + return rc; + + ulprx_la_buff = (struct cudbg_ulprx_la *)temp_buff.data; + t4_ulprx_read_la(padap, (u32 *)ulprx_la_buff->data); + ulprx_la_buff->size = ULPRX_LA_SIZE; + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_tp_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_tp_la *tp_la_buff; + int size, rc; + + size = sizeof(struct cudbg_tp_la) + TPLA_SIZE * sizeof(u64); + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + tp_la_buff = (struct cudbg_tp_la *)temp_buff.data; + tp_la_buff->mode = DBGLAMODE_G(t4_read_reg(padap, TP_DBG_LA_CONFIG_A)); + t4_tp_read_la(padap, (u64 *)tp_la_buff->data, NULL); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct cudbg_cim_pif_la *cim_pif_la_buff; + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + int size, rc; + + size = sizeof(struct cudbg_cim_pif_la) + + 2 * CIM_PIFLA_SIZE * 6 * sizeof(u32); + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + cim_pif_la_buff = (struct cudbg_cim_pif_la *)temp_buff.data; + cim_pif_la_buff->size = CIM_PIFLA_SIZE; + t4_cim_read_pif_la(padap, (u32 *)cim_pif_la_buff->data, + (u32 *)cim_pif_la_buff->data + 6 * CIM_PIFLA_SIZE, + NULL, NULL); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_clk_info(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_clk_info *clk_info_buff; + u64 tp_tick_us; + int rc; + + if (!padap->params.vpd.cclk) + return CUDBG_STATUS_CCLK_NOT_DEFINED; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_clk_info), + &temp_buff); + if (rc) + return rc; + + clk_info_buff = (struct cudbg_clk_info *)temp_buff.data; + clk_info_buff->cclk_ps = 1000000000 / padap->params.vpd.cclk; /* psec */ + clk_info_buff->res = t4_read_reg(padap, TP_TIMER_RESOLUTION_A); + clk_info_buff->tre = TIMERRESOLUTION_G(clk_info_buff->res); + clk_info_buff->dack_re = DELAYEDACKRESOLUTION_G(clk_info_buff->res); + tp_tick_us = (clk_info_buff->cclk_ps << clk_info_buff->tre) / 1000000; + + clk_info_buff->dack_timer = + (clk_info_buff->cclk_ps << clk_info_buff->dack_re) / 1000000 * + t4_read_reg(padap, TP_DACK_TIMER_A); + clk_info_buff->retransmit_min = + tp_tick_us * t4_read_reg(padap, TP_RXT_MIN_A); + clk_info_buff->retransmit_max = + tp_tick_us * t4_read_reg(padap, TP_RXT_MAX_A); + clk_info_buff->persist_timer_min = + tp_tick_us * t4_read_reg(padap, TP_PERS_MIN_A); + clk_info_buff->persist_timer_max = + tp_tick_us * t4_read_reg(padap, TP_PERS_MAX_A); + clk_info_buff->keepalive_idle_timer = + tp_tick_us * t4_read_reg(padap, TP_KEEP_IDLE_A); + clk_info_buff->keepalive_interval = + tp_tick_us * t4_read_reg(padap, TP_KEEP_INTVL_A); + clk_info_buff->initial_srtt = + tp_tick_us * INITSRTT_G(t4_read_reg(padap, TP_INIT_SRTT_A)); + clk_info_buff->finwait2_timer = + tp_tick_us * t4_read_reg(padap, TP_FINWAIT2_TIMER_A); + + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *ch_pcie; + int i, rc, n; + u32 size; + + n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32)); + size = sizeof(struct ireg_buf) * n * 2; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + ch_pcie = (struct ireg_buf *)temp_buff.data; + /* PCIE_PDBG */ + for (i = 0; i < n; i++) { + struct ireg_field *pcie_pio = &ch_pcie->tp_pio; + u32 *buff = ch_pcie->outbuf; + + pcie_pio->ireg_addr = t5_pcie_pdbg_array[i][0]; + pcie_pio->ireg_data = t5_pcie_pdbg_array[i][1]; + pcie_pio->ireg_local_offset = t5_pcie_pdbg_array[i][2]; + pcie_pio->ireg_offset_range = t5_pcie_pdbg_array[i][3]; + t4_read_indirect(padap, + pcie_pio->ireg_addr, + pcie_pio->ireg_data, + buff, + pcie_pio->ireg_offset_range, + pcie_pio->ireg_local_offset); + ch_pcie++; + } + + /* PCIE_CDBG */ + n = sizeof(t5_pcie_cdbg_array) / (IREG_NUM_ELEM * sizeof(u32)); + for (i = 0; i < n; i++) { + struct ireg_field *pcie_pio = &ch_pcie->tp_pio; + u32 *buff = ch_pcie->outbuf; + + pcie_pio->ireg_addr = t5_pcie_cdbg_array[i][0]; + pcie_pio->ireg_data = t5_pcie_cdbg_array[i][1]; + pcie_pio->ireg_local_offset = t5_pcie_cdbg_array[i][2]; + pcie_pio->ireg_offset_range = t5_pcie_cdbg_array[i][3]; + t4_read_indirect(padap, + pcie_pio->ireg_addr, + pcie_pio->ireg_data, + buff, + pcie_pio->ireg_offset_range, + pcie_pio->ireg_local_offset); + ch_pcie++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *ch_pm; + int i, rc, n; + u32 size; + + n = sizeof(t5_pm_rx_array) / (IREG_NUM_ELEM * sizeof(u32)); + size = sizeof(struct ireg_buf) * n * 2; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + ch_pm = (struct ireg_buf *)temp_buff.data; + /* PM_RX */ + for (i = 0; i < n; i++) { + struct ireg_field *pm_pio = &ch_pm->tp_pio; + u32 *buff = ch_pm->outbuf; + + pm_pio->ireg_addr = t5_pm_rx_array[i][0]; + pm_pio->ireg_data = t5_pm_rx_array[i][1]; + pm_pio->ireg_local_offset = t5_pm_rx_array[i][2]; + pm_pio->ireg_offset_range = t5_pm_rx_array[i][3]; + t4_read_indirect(padap, + pm_pio->ireg_addr, + pm_pio->ireg_data, + buff, + pm_pio->ireg_offset_range, + pm_pio->ireg_local_offset); + ch_pm++; + } + + /* PM_TX */ + n = sizeof(t5_pm_tx_array) / (IREG_NUM_ELEM * sizeof(u32)); + for (i = 0; i < n; i++) { + struct ireg_field *pm_pio = &ch_pm->tp_pio; + u32 *buff = ch_pm->outbuf; + + pm_pio->ireg_addr = t5_pm_tx_array[i][0]; + pm_pio->ireg_data = t5_pm_tx_array[i][1]; + pm_pio->ireg_local_offset = t5_pm_tx_array[i][2]; + pm_pio->ireg_offset_range = t5_pm_tx_array[i][3]; + t4_read_indirect(padap, + pm_pio->ireg_addr, + pm_pio->ireg_data, + buff, + pm_pio->ireg_offset_range, + pm_pio->ireg_local_offset); + ch_pm++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_tid(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_tid_info_region_rev1 *tid1; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_tid_info_region *tid; + u32 para[2], val[2]; + int rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_tid_info_region_rev1), + &temp_buff); + if (rc) + return rc; + + tid1 = (struct cudbg_tid_info_region_rev1 *)temp_buff.data; + tid = &tid1->tid; + tid1->ver_hdr.signature = CUDBG_ENTITY_SIGNATURE; + tid1->ver_hdr.revision = CUDBG_TID_INFO_REV; + tid1->ver_hdr.size = sizeof(struct cudbg_tid_info_region_rev1) - + sizeof(struct cudbg_ver_hdr); + +#define FW_PARAM_PFVF_A(param) \ + (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \ + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param) | \ + FW_PARAMS_PARAM_Y_V(0) | \ + FW_PARAMS_PARAM_Z_V(0)) + + para[0] = FW_PARAM_PFVF_A(ETHOFLD_START); + para[1] = FW_PARAM_PFVF_A(ETHOFLD_END); + rc = t4_query_params(padap, padap->mbox, padap->pf, 0, 2, para, val); + if (rc < 0) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + tid->uotid_base = val[0]; + tid->nuotids = val[1] - val[0] + 1; + + if (is_t5(padap->params.chip)) { + tid->sb = t4_read_reg(padap, LE_DB_SERVER_INDEX_A) / 4; + } else if (is_t6(padap->params.chip)) { + tid1->tid_start = + t4_read_reg(padap, LE_DB_ACTIVE_TABLE_START_INDEX_A); + tid->sb = t4_read_reg(padap, LE_DB_SRVR_START_INDEX_A); + + para[0] = FW_PARAM_PFVF_A(HPFILTER_START); + para[1] = FW_PARAM_PFVF_A(HPFILTER_END); + rc = t4_query_params(padap, padap->mbox, padap->pf, 0, 2, + para, val); + if (rc < 0) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + tid->hpftid_base = val[0]; + tid->nhpftids = val[1] - val[0] + 1; + } + + tid->ntids = padap->tids.ntids; + tid->nstids = padap->tids.nstids; + tid->stid_base = padap->tids.stid_base; + tid->hash_base = padap->tids.hash_base; + + tid->natids = padap->tids.natids; + tid->nftids = padap->tids.nftids; + tid->ftid_base = padap->tids.ftid_base; + tid->aftid_base = padap->tids.aftid_base; + tid->aftid_end = padap->tids.aftid_end; + + tid->sftid_base = padap->tids.sftid_base; + tid->nsftids = padap->tids.nsftids; + + tid->flags = padap->flags; + tid->le_db_conf = t4_read_reg(padap, LE_DB_CONFIG_A); + tid->ip_users = t4_read_reg(padap, LE_DB_ACT_CNT_IPV4_A); + tid->ipv6_users = t4_read_reg(padap, LE_DB_ACT_CNT_IPV6_A); + +#undef FW_PARAM_PFVF_A + + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_dump_context_size(struct adapter *padap) +{ + u32 value, size; + u8 flq; + + value = t4_read_reg(padap, SGE_FLM_CFG_A); + + /* Get number of data freelist queues */ + flq = HDRSTARTFLQ_G(value); + size = CUDBG_MAX_FL_QIDS >> flq; + + /* Add extra space for congestion manager contexts. + * The number of CONM contexts are same as number of freelist + * queues. + */ + size += size; + return size * sizeof(struct cudbg_ch_cntxt); +} + +static void cudbg_read_sge_ctxt(struct cudbg_init *pdbg_init, u32 cid, + enum ctxt_type ctype, u32 *data) +{ + struct adapter *padap = pdbg_init->adap; + int rc = -1; + + /* Under heavy traffic, the SGE Queue contexts registers will be + * frequently accessed by firmware. + * + * To avoid conflicts with firmware, always ask firmware to fetch + * the SGE Queue contexts via mailbox. On failure, fallback to + * accessing hardware registers directly. + */ + if (is_fw_attached(pdbg_init)) + rc = t4_sge_ctxt_rd(padap, padap->mbox, cid, ctype, data); + if (rc) + t4_sge_ctxt_rd_bd(padap, cid, ctype, data); +} + +int cudbg_collect_dump_context(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_ch_cntxt *buff; + u32 size, i = 0; + int rc; + + rc = cudbg_dump_context_size(padap); + if (rc <= 0) + return CUDBG_STATUS_ENTITY_NOT_FOUND; + + size = rc; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + buff = (struct cudbg_ch_cntxt *)temp_buff.data; + while (size > 0) { + buff->cntxt_type = CTXT_FLM; + buff->cntxt_id = i; + cudbg_read_sge_ctxt(pdbg_init, i, CTXT_FLM, buff->data); + buff++; + size -= sizeof(struct cudbg_ch_cntxt); + + buff->cntxt_type = CTXT_CNM; + buff->cntxt_id = i; + cudbg_read_sge_ctxt(pdbg_init, i, CTXT_CNM, buff->data); + buff++; + size -= sizeof(struct cudbg_ch_cntxt); + + i++; + } + + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +static inline void cudbg_tcamxy2valmask(u64 x, u64 y, u8 *addr, u64 *mask) +{ + *mask = x | y; + y = (__force u64)cpu_to_be64(y); + memcpy(addr, (char *)&y + 2, ETH_ALEN); +} + +static void cudbg_mps_rpl_backdoor(struct adapter *padap, + struct fw_ldst_mps_rplc *mps_rplc) +{ + if (is_t5(padap->params.chip)) { + mps_rplc->rplc255_224 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP3_A)); + mps_rplc->rplc223_192 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP2_A)); + mps_rplc->rplc191_160 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP1_A)); + mps_rplc->rplc159_128 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP0_A)); + } else { + mps_rplc->rplc255_224 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP7_A)); + mps_rplc->rplc223_192 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP6_A)); + mps_rplc->rplc191_160 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP5_A)); + mps_rplc->rplc159_128 = htonl(t4_read_reg(padap, + MPS_VF_RPLCT_MAP4_A)); + } + mps_rplc->rplc127_96 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP3_A)); + mps_rplc->rplc95_64 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP2_A)); + mps_rplc->rplc63_32 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP1_A)); + mps_rplc->rplc31_0 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP0_A)); +} + +static int cudbg_collect_tcam_index(struct adapter *padap, + struct cudbg_mps_tcam *tcam, u32 idx) +{ + u64 tcamy, tcamx, val; + u32 ctl, data2; + int rc = 0; + + if (CHELSIO_CHIP_VERSION(padap->params.chip) >= CHELSIO_T6) { + /* CtlReqID - 1: use Host Driver Requester ID + * CtlCmdType - 0: Read, 1: Write + * CtlTcamSel - 0: TCAM0, 1: TCAM1 + * CtlXYBitSel- 0: Y bit, 1: X bit + */ + + /* Read tcamy */ + ctl = CTLREQID_V(1) | CTLCMDTYPE_V(0) | CTLXYBITSEL_V(0); + if (idx < 256) + ctl |= CTLTCAMINDEX_V(idx) | CTLTCAMSEL_V(0); + else + ctl |= CTLTCAMINDEX_V(idx - 256) | CTLTCAMSEL_V(1); + + t4_write_reg(padap, MPS_CLS_TCAM_DATA2_CTL_A, ctl); + val = t4_read_reg(padap, MPS_CLS_TCAM_RDATA1_REQ_ID1_A); + tcamy = DMACH_G(val) << 32; + tcamy |= t4_read_reg(padap, MPS_CLS_TCAM_RDATA0_REQ_ID1_A); + data2 = t4_read_reg(padap, MPS_CLS_TCAM_RDATA2_REQ_ID1_A); + tcam->lookup_type = DATALKPTYPE_G(data2); + + /* 0 - Outer header, 1 - Inner header + * [71:48] bit locations are overloaded for + * outer vs. inner lookup types. + */ + if (tcam->lookup_type && tcam->lookup_type != DATALKPTYPE_M) { + /* Inner header VNI */ + tcam->vniy = (data2 & DATAVIDH2_F) | DATAVIDH1_G(data2); + tcam->vniy = (tcam->vniy << 16) | VIDL_G(val); + tcam->dip_hit = data2 & DATADIPHIT_F; + } else { + tcam->vlan_vld = data2 & DATAVIDH2_F; + tcam->ivlan = VIDL_G(val); + } + + tcam->port_num = DATAPORTNUM_G(data2); + + /* Read tcamx. Change the control param */ + ctl |= CTLXYBITSEL_V(1); + t4_write_reg(padap, MPS_CLS_TCAM_DATA2_CTL_A, ctl); + val = t4_read_reg(padap, MPS_CLS_TCAM_RDATA1_REQ_ID1_A); + tcamx = DMACH_G(val) << 32; + tcamx |= t4_read_reg(padap, MPS_CLS_TCAM_RDATA0_REQ_ID1_A); + data2 = t4_read_reg(padap, MPS_CLS_TCAM_RDATA2_REQ_ID1_A); + if (tcam->lookup_type && tcam->lookup_type != DATALKPTYPE_M) { + /* Inner header VNI mask */ + tcam->vnix = (data2 & DATAVIDH2_F) | DATAVIDH1_G(data2); + tcam->vnix = (tcam->vnix << 16) | VIDL_G(val); + } + } else { + tcamy = t4_read_reg64(padap, MPS_CLS_TCAM_Y_L(idx)); + tcamx = t4_read_reg64(padap, MPS_CLS_TCAM_X_L(idx)); + } + + /* If no entry, return */ + if (tcamx & tcamy) + return rc; + + tcam->cls_lo = t4_read_reg(padap, MPS_CLS_SRAM_L(idx)); + tcam->cls_hi = t4_read_reg(padap, MPS_CLS_SRAM_H(idx)); + + if (is_t5(padap->params.chip)) + tcam->repli = (tcam->cls_lo & REPLICATE_F); + else if (is_t6(padap->params.chip)) + tcam->repli = (tcam->cls_lo & T6_REPLICATE_F); + + if (tcam->repli) { + struct fw_ldst_cmd ldst_cmd; + struct fw_ldst_mps_rplc mps_rplc; + + memset(&ldst_cmd, 0, sizeof(ldst_cmd)); + ldst_cmd.op_to_addrspace = + htonl(FW_CMD_OP_V(FW_LDST_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F | + FW_LDST_CMD_ADDRSPACE_V(FW_LDST_ADDRSPC_MPS)); + ldst_cmd.cycles_to_len16 = htonl(FW_LEN16(ldst_cmd)); + ldst_cmd.u.mps.rplc.fid_idx = + htons(FW_LDST_CMD_FID_V(FW_LDST_MPS_RPLC) | + FW_LDST_CMD_IDX_V(idx)); + + rc = t4_wr_mbox(padap, padap->mbox, &ldst_cmd, sizeof(ldst_cmd), + &ldst_cmd); + if (rc) + cudbg_mps_rpl_backdoor(padap, &mps_rplc); + else + mps_rplc = ldst_cmd.u.mps.rplc; + + tcam->rplc[0] = ntohl(mps_rplc.rplc31_0); + tcam->rplc[1] = ntohl(mps_rplc.rplc63_32); + tcam->rplc[2] = ntohl(mps_rplc.rplc95_64); + tcam->rplc[3] = ntohl(mps_rplc.rplc127_96); + if (padap->params.arch.mps_rplc_size > CUDBG_MAX_RPLC_SIZE) { + tcam->rplc[4] = ntohl(mps_rplc.rplc159_128); + tcam->rplc[5] = ntohl(mps_rplc.rplc191_160); + tcam->rplc[6] = ntohl(mps_rplc.rplc223_192); + tcam->rplc[7] = ntohl(mps_rplc.rplc255_224); + } + } + cudbg_tcamxy2valmask(tcamx, tcamy, tcam->addr, &tcam->mask); + tcam->idx = idx; + tcam->rplc_size = padap->params.arch.mps_rplc_size; + return rc; +} + +int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + u32 size = 0, i, n, total_size = 0; + struct cudbg_mps_tcam *tcam; + int rc; + + n = padap->params.arch.mps_tcam_size; + size = sizeof(struct cudbg_mps_tcam) * n; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + tcam = (struct cudbg_mps_tcam *)temp_buff.data; + for (i = 0; i < n; i++) { + rc = cudbg_collect_tcam_index(padap, tcam, i); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + total_size += sizeof(struct cudbg_mps_tcam); + tcam++; + } + + if (!total_size) { + rc = CUDBG_SYSTEM_ERROR; + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + char vpd_str[CUDBG_VPD_VER_LEN + 1]; + u32 scfg_vers, vpd_vers, fw_vers; + struct cudbg_vpd_data *vpd_data; + struct vpd_params vpd = { 0 }; + int rc, ret; + + rc = t4_get_raw_vpd_params(padap, &vpd); + if (rc) + return rc; + + rc = t4_get_fw_version(padap, &fw_vers); + if (rc) + return rc; + + /* Serial Configuration Version is located beyond the PF's vpd size. + * Temporarily give access to entire EEPROM to get it. + */ + rc = pci_set_vpd_size(padap->pdev, EEPROMVSIZE); + if (rc < 0) + return rc; + + ret = cudbg_read_vpd_reg(padap, CUDBG_SCFG_VER_ADDR, CUDBG_SCFG_VER_LEN, + &scfg_vers); + + /* Restore back to original PF's vpd size */ + rc = pci_set_vpd_size(padap->pdev, CUDBG_VPD_PF_SIZE); + if (rc < 0) + return rc; + + if (ret) + return ret; + + rc = cudbg_read_vpd_reg(padap, CUDBG_VPD_VER_ADDR, CUDBG_VPD_VER_LEN, + vpd_str); + if (rc) + return rc; + + vpd_str[CUDBG_VPD_VER_LEN] = '\0'; + rc = kstrtouint(vpd_str, 0, &vpd_vers); + if (rc) + return rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_vpd_data), + &temp_buff); + if (rc) + return rc; + + vpd_data = (struct cudbg_vpd_data *)temp_buff.data; + memcpy(vpd_data->sn, vpd.sn, SERNUM_LEN + 1); + memcpy(vpd_data->bn, vpd.pn, PN_LEN + 1); + memcpy(vpd_data->na, vpd.na, MACADDR_LEN + 1); + memcpy(vpd_data->mn, vpd.id, ID_LEN + 1); + vpd_data->scfg_vers = scfg_vers; + vpd_data->vpd_vers = vpd_vers; + vpd_data->fw_major = FW_HDR_FW_VER_MAJOR_G(fw_vers); + vpd_data->fw_minor = FW_HDR_FW_VER_MINOR_G(fw_vers); + vpd_data->fw_micro = FW_HDR_FW_VER_MICRO_G(fw_vers); + vpd_data->fw_build = FW_HDR_FW_VER_BUILD_G(fw_vers); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +static int cudbg_read_tid(struct cudbg_init *pdbg_init, u32 tid, + struct cudbg_tid_data *tid_data) +{ + struct adapter *padap = pdbg_init->adap; + int i, cmd_retry = 8; + u32 val; + + /* Fill REQ_DATA regs with 0's */ + for (i = 0; i < NUM_LE_DB_DBGI_REQ_DATA_INSTANCES; i++) + t4_write_reg(padap, LE_DB_DBGI_REQ_DATA_A + (i << 2), 0); + + /* Write DBIG command */ + val = DBGICMD_V(4) | DBGITID_V(tid); + t4_write_reg(padap, LE_DB_DBGI_REQ_TCAM_CMD_A, val); + tid_data->dbig_cmd = val; + + val = DBGICMDSTRT_F | DBGICMDMODE_V(1); /* LE mode */ + t4_write_reg(padap, LE_DB_DBGI_CONFIG_A, val); + tid_data->dbig_conf = val; + + /* Poll the DBGICMDBUSY bit */ + val = 1; + while (val) { + val = t4_read_reg(padap, LE_DB_DBGI_CONFIG_A); + val = val & DBGICMDBUSY_F; + cmd_retry--; + if (!cmd_retry) + return CUDBG_SYSTEM_ERROR; + } + + /* Check RESP status */ + val = t4_read_reg(padap, LE_DB_DBGI_RSP_STATUS_A); + tid_data->dbig_rsp_stat = val; + if (!(val & 1)) + return CUDBG_SYSTEM_ERROR; + + /* Read RESP data */ + for (i = 0; i < NUM_LE_DB_DBGI_RSP_DATA_INSTANCES; i++) + tid_data->data[i] = t4_read_reg(padap, + LE_DB_DBGI_RSP_DATA_A + + (i << 2)); + tid_data->tid = tid; + return 0; +} + +static int cudbg_get_le_type(u32 tid, struct cudbg_tcam tcam_region) +{ + int type = LE_ET_UNKNOWN; + + if (tid < tcam_region.server_start) + type = LE_ET_TCAM_CON; + else if (tid < tcam_region.filter_start) + type = LE_ET_TCAM_SERVER; + else if (tid < tcam_region.clip_start) + type = LE_ET_TCAM_FILTER; + else if (tid < tcam_region.routing_start) + type = LE_ET_TCAM_CLIP; + else if (tid < tcam_region.tid_hash_base) + type = LE_ET_TCAM_ROUTING; + else if (tid < tcam_region.max_tid) + type = LE_ET_HASH_CON; + else + type = LE_ET_INVALID_TID; + + return type; +} + +static int cudbg_is_ipv6_entry(struct cudbg_tid_data *tid_data, + struct cudbg_tcam tcam_region) +{ + int ipv6 = 0; + int le_type; + + le_type = cudbg_get_le_type(tid_data->tid, tcam_region); + if (tid_data->tid & 1) + return 0; + + if (le_type == LE_ET_HASH_CON) { + ipv6 = tid_data->data[16] & 0x8000; + } else if (le_type == LE_ET_TCAM_CON) { + ipv6 = tid_data->data[16] & 0x8000; + if (ipv6) + ipv6 = tid_data->data[9] == 0x00C00000; + } else { + ipv6 = 0; + } + return ipv6; +} + +void cudbg_fill_le_tcam_info(struct adapter *padap, + struct cudbg_tcam *tcam_region) +{ + u32 value; + + /* Get the LE regions */ + value = t4_read_reg(padap, LE_DB_TID_HASHBASE_A); /* hash base index */ + tcam_region->tid_hash_base = value; + + /* Get routing table index */ + value = t4_read_reg(padap, LE_DB_ROUTING_TABLE_INDEX_A); + tcam_region->routing_start = value; + + /*Get clip table index */ + value = t4_read_reg(padap, LE_DB_CLIP_TABLE_INDEX_A); + tcam_region->clip_start = value; + + /* Get filter table index */ + value = t4_read_reg(padap, LE_DB_FILTER_TABLE_INDEX_A); + tcam_region->filter_start = value; + + /* Get server table index */ + value = t4_read_reg(padap, LE_DB_SERVER_INDEX_A); + tcam_region->server_start = value; + + /* Check whether hash is enabled and calculate the max tids */ + value = t4_read_reg(padap, LE_DB_CONFIG_A); + if ((value >> HASHEN_S) & 1) { + value = t4_read_reg(padap, LE_DB_HASH_CONFIG_A); + if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5) { + tcam_region->max_tid = (value & 0xFFFFF) + + tcam_region->tid_hash_base; + } else { + value = HASHTIDSIZE_G(value); + value = 1 << value; + tcam_region->max_tid = value + + tcam_region->tid_hash_base; + } + } else { /* hash not enabled */ + tcam_region->max_tid = CUDBG_MAX_TCAM_TID; + } +} + +int cudbg_collect_le_tcam(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_tcam tcam_region = { 0 }; + struct cudbg_tid_data *tid_data; + u32 bytes = 0; + int rc, size; + u32 i; + + cudbg_fill_le_tcam_info(padap, &tcam_region); + + size = sizeof(struct cudbg_tid_data) * tcam_region.max_tid; + size += sizeof(struct cudbg_tcam); + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + memcpy(temp_buff.data, &tcam_region, sizeof(struct cudbg_tcam)); + bytes = sizeof(struct cudbg_tcam); + tid_data = (struct cudbg_tid_data *)(temp_buff.data + bytes); + /* read all tid */ + for (i = 0; i < tcam_region.max_tid; ) { + rc = cudbg_read_tid(pdbg_init, i, tid_data); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + + /* ipv6 takes two tids */ + cudbg_is_ipv6_entry(tid_data, tcam_region) ? i += 2 : i++; + + tid_data++; + bytes += sizeof(struct cudbg_tid_data); + } + + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_cctrl(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + u32 size; + int rc; + + size = sizeof(u16) * NMTUS * NCCTRL_WIN; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + t4_read_cong_tbl(padap, (void *)temp_buff.data); + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *ma_indr; + int i, rc, n; + u32 size, j; + + if (CHELSIO_CHIP_VERSION(padap->params.chip) < CHELSIO_T6) + return CUDBG_STATUS_ENTITY_NOT_FOUND; + + n = sizeof(t6_ma_ireg_array) / (IREG_NUM_ELEM * sizeof(u32)); + size = sizeof(struct ireg_buf) * n * 2; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + ma_indr = (struct ireg_buf *)temp_buff.data; + for (i = 0; i < n; i++) { + struct ireg_field *ma_fli = &ma_indr->tp_pio; + u32 *buff = ma_indr->outbuf; + + ma_fli->ireg_addr = t6_ma_ireg_array[i][0]; + ma_fli->ireg_data = t6_ma_ireg_array[i][1]; + ma_fli->ireg_local_offset = t6_ma_ireg_array[i][2]; + ma_fli->ireg_offset_range = t6_ma_ireg_array[i][3]; + t4_read_indirect(padap, ma_fli->ireg_addr, ma_fli->ireg_data, + buff, ma_fli->ireg_offset_range, + ma_fli->ireg_local_offset); + ma_indr++; + } + + n = sizeof(t6_ma_ireg_array2) / (IREG_NUM_ELEM * sizeof(u32)); + for (i = 0; i < n; i++) { + struct ireg_field *ma_fli = &ma_indr->tp_pio; + u32 *buff = ma_indr->outbuf; + + ma_fli->ireg_addr = t6_ma_ireg_array2[i][0]; + ma_fli->ireg_data = t6_ma_ireg_array2[i][1]; + ma_fli->ireg_local_offset = t6_ma_ireg_array2[i][2]; + for (j = 0; j < t6_ma_ireg_array2[i][3]; j++) { + t4_read_indirect(padap, ma_fli->ireg_addr, + ma_fli->ireg_data, buff, 1, + ma_fli->ireg_local_offset); + buff++; + ma_fli->ireg_local_offset += 0x20; + } + ma_indr++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_ulptx_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_ulptx_la *ulptx_la_buff; + u32 i, j; + int rc; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_ulptx_la), + &temp_buff); + if (rc) + return rc; + + ulptx_la_buff = (struct cudbg_ulptx_la *)temp_buff.data; + for (i = 0; i < CUDBG_NUM_ULPTX; i++) { + ulptx_la_buff->rdptr[i] = t4_read_reg(padap, + ULP_TX_LA_RDPTR_0_A + + 0x10 * i); + ulptx_la_buff->wrptr[i] = t4_read_reg(padap, + ULP_TX_LA_WRPTR_0_A + + 0x10 * i); + ulptx_la_buff->rddata[i] = t4_read_reg(padap, + ULP_TX_LA_RDDATA_0_A + + 0x10 * i); + for (j = 0; j < CUDBG_NUM_ULPTX_READ; j++) + ulptx_la_buff->rd_data[i][j] = + t4_read_reg(padap, + ULP_TX_LA_RDDATA_0_A + 0x10 * i); + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *up_cim; + int i, rc, n; + u32 size; + + n = sizeof(t5_up_cim_reg_array) / (IREG_NUM_ELEM * sizeof(u32)); + size = sizeof(struct ireg_buf) * n; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + up_cim = (struct ireg_buf *)temp_buff.data; + for (i = 0; i < n; i++) { + struct ireg_field *up_cim_reg = &up_cim->tp_pio; + u32 *buff = up_cim->outbuf; + + if (is_t5(padap->params.chip)) { + up_cim_reg->ireg_addr = t5_up_cim_reg_array[i][0]; + up_cim_reg->ireg_data = t5_up_cim_reg_array[i][1]; + up_cim_reg->ireg_local_offset = + t5_up_cim_reg_array[i][2]; + up_cim_reg->ireg_offset_range = + t5_up_cim_reg_array[i][3]; + } else if (is_t6(padap->params.chip)) { + up_cim_reg->ireg_addr = t6_up_cim_reg_array[i][0]; + up_cim_reg->ireg_data = t6_up_cim_reg_array[i][1]; + up_cim_reg->ireg_local_offset = + t6_up_cim_reg_array[i][2]; + up_cim_reg->ireg_offset_range = + t6_up_cim_reg_array[i][3]; + } + + rc = t4_cim_read(padap, up_cim_reg->ireg_local_offset, + up_cim_reg->ireg_offset_range, buff); + if (rc) { + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + up_cim++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_pbt_tables(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct cudbg_pbt_tables *pbt; + int i, rc; + u32 addr; + + rc = cudbg_get_buff(dbg_buff, sizeof(struct cudbg_pbt_tables), + &temp_buff); + if (rc) + return rc; + + pbt = (struct cudbg_pbt_tables *)temp_buff.data; + /* PBT dynamic entries */ + addr = CUDBG_CHAC_PBT_ADDR; + for (i = 0; i < CUDBG_PBT_DYNAMIC_ENTRIES; i++) { + rc = t4_cim_read(padap, addr + (i * 4), 1, + &pbt->pbt_dynamic[i]); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + } + + /* PBT static entries */ + /* static entries start when bit 6 is set */ + addr = CUDBG_CHAC_PBT_ADDR + (1 << 6); + for (i = 0; i < CUDBG_PBT_STATIC_ENTRIES; i++) { + rc = t4_cim_read(padap, addr + (i * 4), 1, + &pbt->pbt_static[i]); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + } + + /* LRF entries */ + addr = CUDBG_CHAC_PBT_LRF; + for (i = 0; i < CUDBG_LRF_ENTRIES; i++) { + rc = t4_cim_read(padap, addr + (i * 4), 1, + &pbt->lrf_table[i]); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + } + + /* PBT data entries */ + addr = CUDBG_CHAC_PBT_DATA; + for (i = 0; i < CUDBG_PBT_DATA_ENTRIES; i++) { + rc = t4_cim_read(padap, addr + (i * 4), 1, + &pbt->pbt_data[i]); + if (rc) { + cudbg_err->sys_err = rc; + cudbg_put_buff(&temp_buff, dbg_buff); + return rc; + } + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_mbox_log *mboxlog = NULL; + struct cudbg_buffer temp_buff = { 0 }; + struct mbox_cmd_log *log = NULL; + struct mbox_cmd *entry; + unsigned int entry_idx; + u16 mbox_cmds; + int i, k, rc; + u64 flit; + u32 size; + + log = padap->mbox_log; + mbox_cmds = padap->mbox_log->size; + size = sizeof(struct cudbg_mbox_log) * mbox_cmds; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + mboxlog = (struct cudbg_mbox_log *)temp_buff.data; + for (k = 0; k < mbox_cmds; k++) { + entry_idx = log->cursor + k; + if (entry_idx >= log->size) + entry_idx -= log->size; + + entry = mbox_cmd_log_entry(log, entry_idx); + /* skip over unused entries */ + if (entry->timestamp == 0) + continue; + + memcpy(&mboxlog->entry, entry, sizeof(struct mbox_cmd)); + for (i = 0; i < MBOX_LEN / 8; i++) { + flit = entry->cmd[i]; + mboxlog->hi[i] = (u32)(flit >> 32); + mboxlog->lo[i] = (u32)flit; + } + mboxlog++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} + +int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err) +{ + struct adapter *padap = pdbg_init->adap; + struct cudbg_buffer temp_buff = { 0 }; + struct ireg_buf *hma_indr; + int i, rc, n; + u32 size; + + if (CHELSIO_CHIP_VERSION(padap->params.chip) < CHELSIO_T6) + return CUDBG_STATUS_ENTITY_NOT_FOUND; + + n = sizeof(t6_hma_ireg_array) / (IREG_NUM_ELEM * sizeof(u32)); + size = sizeof(struct ireg_buf) * n; + rc = cudbg_get_buff(dbg_buff, size, &temp_buff); + if (rc) + return rc; + + hma_indr = (struct ireg_buf *)temp_buff.data; + for (i = 0; i < n; i++) { + struct ireg_field *hma_fli = &hma_indr->tp_pio; + u32 *buff = hma_indr->outbuf; + + hma_fli->ireg_addr = t6_hma_ireg_array[i][0]; + hma_fli->ireg_data = t6_hma_ireg_array[i][1]; + hma_fli->ireg_local_offset = t6_hma_ireg_array[i][2]; + hma_fli->ireg_offset_range = t6_hma_ireg_array[i][3]; + t4_read_indirect(padap, hma_fli->ireg_addr, hma_fli->ireg_data, + buff, hma_fli->ireg_offset_range, + hma_fli->ireg_local_offset); + hma_indr++; + } + cudbg_write_and_release_buff(&temp_buff, dbg_buff); + return rc; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h new file mode 100644 index 000000000000..caeee8e33e86 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#ifndef __CUDBG_LIB_H__ +#define __CUDBG_LIB_H__ + +int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ma_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_qcfg(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ibq_tp0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ibq_tp1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ibq_ulp(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ibq_sge0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ibq_sge1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_ibq_ncsi(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_obq_ulp0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_obq_ulp1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_obq_ulp2(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_obq_ulp3(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_obq_sge(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_obq_ncsi(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_rss(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_rss_vf_config(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_path_mtu(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_pm_stats(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_hw_sched(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_ulprx_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_tp_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cim_pif_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_clk_info(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_obq_sge_rx_q0(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_obq_sge_rx_q1(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_tid(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_dump_context(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_vpd_data(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_le_tcam(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_cctrl(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_ulptx_la(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_pbt_tables(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); +int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); + +struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i); +void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff, + struct cudbg_entity_hdr *entity_hdr); +u32 cudbg_cim_obq_size(struct adapter *padap, int qid); +int cudbg_dump_context_size(struct adapter *padap); + +struct cudbg_tcam; +void cudbg_fill_le_tcam_info(struct adapter *padap, + struct cudbg_tcam *tcam_region); +#endif /* __CUDBG_LIB_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h new file mode 100644 index 000000000000..24b33f28e548 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#ifndef __CUDBG_LIB_COMMON_H__ +#define __CUDBG_LIB_COMMON_H__ + +#define CUDBG_SIGNATURE 67856866 /* CUDB in ascii */ + +enum cudbg_dump_type { + CUDBG_DUMP_TYPE_MINI = 1, +}; + +enum cudbg_compression_type { + CUDBG_COMPRESSION_NONE = 1, +}; + +struct cudbg_hdr { + u32 signature; + u32 hdr_len; + u16 major_ver; + u16 minor_ver; + u32 data_len; + u32 hdr_flags; + u16 max_entities; + u8 chip_ver; + u8 dump_type:3; + u8 reserved1:1; + u8 compress_type:4; + u32 reserved[8]; +}; + +struct cudbg_entity_hdr { + u32 entity_type; + u32 start_offset; + u32 size; + int hdr_flags; + u32 sys_warn; + u32 sys_err; + u8 num_pad; + u8 flag; /* bit 0 is used to indicate ext data */ + u8 reserved1[2]; + u32 next_ext_offset; /* pointer to next extended entity meta data */ + u32 reserved[5]; +}; + +struct cudbg_ver_hdr { + u32 signature; + u16 revision; + u16 size; +}; + +struct cudbg_buffer { + u32 size; + u32 offset; + char *data; +}; + +struct cudbg_error { + int sys_err; + int sys_warn; + int app_err; +}; + +#define CDUMP_MAX_COMP_BUF_SIZE ((64 * 1024) - 1) +#define CUDBG_CHUNK_SIZE ((CDUMP_MAX_COMP_BUF_SIZE / 1024) * 1024) + +int cudbg_get_buff(struct cudbg_buffer *pdbg_buff, u32 size, + struct cudbg_buffer *pin_buff); +void cudbg_put_buff(struct cudbg_buffer *pin_buff, + struct cudbg_buffer *pdbg_buff); +void cudbg_update_buff(struct cudbg_buffer *pin_buff, + struct cudbg_buffer *pout_buff); +#endif /* __CUDBG_LIB_COMMON_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index ea72d2d2e1b4..6f9fa6e3c42a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -287,10 +287,18 @@ struct tp_params { * places we store their offsets here, or a -1 if the field isn't * present. */ - int vlan_shift; - int vnic_shift; + int fcoe_shift; int port_shift; + int vnic_shift; + int vlan_shift; + int tos_shift; int protocol_shift; + int ethertype_shift; + int macmatch_shift; + int matchtype_shift; + int frag_shift; + + u64 hash_filter_mask; }; struct vpd_params { @@ -358,6 +366,7 @@ struct adapter_params { unsigned char crypto; /* HW capability for crypto */ unsigned char bypass; + unsigned char hash_filter; unsigned int ofldq_wr_cred; bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ @@ -367,6 +376,7 @@ struct adapter_params { unsigned int max_ird_adapter; /* Max read depth per adapter */ bool fr_nsmr_tpte_wr_support; /* FW support for FR_NSMR_TPTE_WR */ u8 fw_caps_support; /* 32-bit Port Capabilities */ + bool filter2_wr_support; /* FW support for FILTER2_WR */ /* MPS Buffer Group Map[per Port]. Bit i is set if buffer group i is * used by the Port @@ -549,6 +559,7 @@ enum { /* adapter flags */ MASTER_PF = (1 << 7), FW_OFLD_CONN = (1 << 9), ROOT_NO_RELAXED_ORDERING = (1 << 10), + SHUTTING_DOWN = (1 << 11), }; enum { @@ -857,6 +868,7 @@ struct adapter { unsigned int clipt_start; unsigned int clipt_end; struct clip_tbl *clipt; + struct smt_data *smt; struct cxgb4_uld_info *uld; void *uld_handle[CXGB4_ULD_MAX]; unsigned int num_uld; @@ -904,6 +916,15 @@ struct adapter { /* TC u32 offload */ struct cxgb4_tc_u32_table *tc_u32; struct chcr_stats_debug chcr_stats; + + /* TC flower offload */ + struct rhashtable flower_tbl; + struct rhashtable_params flower_ht_params; + struct timer_list flower_stats_timer; + struct work_struct flower_stats_work; + + /* Ethtool Dump */ + struct ethtool_dump eth_dump; }; /* Support for "sched-class" command to allow a TX Scheduling Class to be @@ -1031,6 +1052,7 @@ struct ch_filter_specification { * matching that doesn't exist as a (value, mask) tuple. */ uint32_t type:1; /* 0 => IPv4, 1 => IPv6 */ + u32 hash:1; /* 0 => wild-card, 1 => exact-match */ /* Packet dispatch information. Ingress packets which match the * filter rules will be dropped, passed to the host or switched back @@ -1055,10 +1077,19 @@ struct ch_filter_specification { uint32_t newdmac:1; /* rewrite destination MAC address */ uint32_t newsmac:1; /* rewrite source MAC address */ uint32_t newvlan:2; /* rewrite VLAN Tag */ + uint32_t nat_mode:3; /* specify NAT operation mode */ uint8_t dmac[ETH_ALEN]; /* new destination MAC address */ uint8_t smac[ETH_ALEN]; /* new source MAC address */ uint16_t vlan; /* VLAN Tag to insert */ + u8 nat_lip[16]; /* local IP to use after NAT'ing */ + u8 nat_fip[16]; /* foreign IP to use after NAT'ing */ + u16 nat_lport; /* local port to use after NAT'ing */ + u16 nat_fport; /* foreign port to use after NAT'ing */ + + /* reservation for future additions */ + u8 rsvd[24]; + /* Filter rule value/mask pairs. */ struct ch_filter_tuple val; @@ -1078,6 +1109,17 @@ enum { VLAN_REWRITE }; +enum { + NAT_MODE_NONE = 0, /* No NAT performed */ + NAT_MODE_DIP, /* NAT on Dst IP */ + NAT_MODE_DIP_DP, /* NAT on Dst IP, Dst Port */ + NAT_MODE_DIP_DP_SIP, /* NAT on Dst IP, Dst Port and Src IP */ + NAT_MODE_DIP_DP_SP, /* NAT on Dst IP, Dst Port and Src Port */ + NAT_MODE_SIP_SP, /* NAT on Src IP and Src Port */ + NAT_MODE_DIP_SIP_SP, /* NAT on Dst IP, Src IP and Src Port */ + NAT_MODE_ALL /* NAT on entire 4-tuple */ +}; + /* Host shadow copy of ingress filter entry. This is in host native format * and doesn't match the ordering or bit order, etc. of the hardware of the * firmware command. The use of bit-field structure elements is purely to @@ -1090,9 +1132,9 @@ struct filter_entry { u32 locked:1; /* filter is administratively locked */ u32 pending:1; /* filter action is pending firmware reply */ - u32 smtidx:8; /* Source MAC Table index for smac */ struct filter_ctx *ctx; /* Caller's completion hook */ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + struct smt_entry *smt; /* Source Mac Table entry for smac */ struct net_device *dev; /* Associated net device */ u32 tid; /* This will store the actual tid */ @@ -1109,6 +1151,11 @@ static inline int is_offload(const struct adapter *adap) return adap->params.offload; } +static inline int is_hashfilter(const struct adapter *adap) +{ + return adap->params.hash_filter; +} + static inline int is_pci_uld(const struct adapter *adap) { return adap->params.crypto; @@ -1312,6 +1359,12 @@ static inline unsigned int core_ticks_to_us(const struct adapter *adapter, adapter->params.vpd.cclk); } +static inline unsigned int dack_ticks_to_usec(const struct adapter *adap, + unsigned int ticks) +{ + return (ticks << adap->params.tp.dack_re) / core_ticks_per_usec(adap); +} + void t4_set_reg_field(struct adapter *adap, unsigned int addr, u32 mask, u32 val); @@ -1406,6 +1459,7 @@ static inline int t4_memory_write(struct adapter *adap, int mtype, u32 addr, unsigned int t4_get_regs_len(struct adapter *adapter); void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size); +int t4_eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz); int t4_seeprom_wp(struct adapter *adapter, bool enable); int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p); int t4_get_vpd_params(struct adapter *adapter, struct vpd_params *p); @@ -1451,7 +1505,7 @@ unsigned int qtimer_val(const struct adapter *adap, int t4_init_devlog_params(struct adapter *adapter); int t4_init_sge_params(struct adapter *adapter); -int t4_init_tp_params(struct adapter *adap); +int t4_init_tp_params(struct adapter *adap, bool sleep_ok); int t4_filter_field_shift(const struct adapter *adap, int filter_sel); int t4_init_rss_mode(struct adapter *adap, int mbox); int t4_init_portinfo(struct port_info *pi, int mbox, @@ -1465,14 +1519,15 @@ int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode, int t4_config_vi_rss(struct adapter *adapter, int mbox, unsigned int viid, unsigned int flags, unsigned int defq); int t4_read_rss(struct adapter *adapter, u16 *entries); -void t4_read_rss_key(struct adapter *adapter, u32 *key); -void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx); +void t4_read_rss_key(struct adapter *adapter, u32 *key, bool sleep_ok); +void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx, + bool sleep_ok); void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index, - u32 *valp); + u32 *valp, bool sleep_ok); void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index, - u32 *vfl, u32 *vfh); -u32 t4_read_rss_pf_map(struct adapter *adapter); -u32 t4_read_rss_pf_mask(struct adapter *adapter); + u32 *vfl, u32 *vfh, bool sleep_ok); +u32 t4_read_rss_pf_map(struct adapter *adapter, bool sleep_ok); +u32 t4_read_rss_pf_mask(struct adapter *adapter, bool sleep_ok); unsigned int t4_get_mps_bg_map(struct adapter *adapter, int pidx); unsigned int t4_get_tp_ch_map(struct adapter *adapter, int pidx); @@ -1503,14 +1558,18 @@ void t4_read_cong_tbl(struct adapter *adap, u16 incr[NMTUS][NCCTRL_WIN]); void t4_tp_wr_bits_indirect(struct adapter *adap, unsigned int addr, unsigned int mask, unsigned int val); void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr); -void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st); -void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st); -void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st); -void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st); +void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st, + bool sleep_ok); +void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st, + bool sleep_ok); +void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st, + bool sleep_ok); +void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st, + bool sleep_ok); void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, - struct tp_tcp_stats *v6); + struct tp_tcp_stats *v6, bool sleep_ok); void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx, - struct tp_fcoe_stats *st); + struct tp_fcoe_stats *st, bool sleep_ok); void t4_load_mtus(struct adapter *adap, const unsigned short *mtus, const unsigned short *alpha, const unsigned short *beta); @@ -1608,6 +1667,13 @@ void t4_get_trace_filter(struct adapter *adapter, struct trace_params *tp, int filter_index, int *enabled); int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val); +void t4_read_pace_tbl(struct adapter *adap, unsigned int pace_vals[NTX_SCHED]); +void t4_get_tx_sched(struct adapter *adap, unsigned int sched, + unsigned int *kbps, unsigned int *ipg, bool sleep_ok); +int t4_sge_ctxt_rd(struct adapter *adap, unsigned int mbox, unsigned int cid, + enum ctxt_type ctype, u32 *data); +int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid, + enum ctxt_type ctype, u32 *data); int t4_sched_params(struct adapter *adapter, int type, int level, int mode, int rateunit, int ratemode, int channel, int class, int minrate, int maxrate, int weight, int pktsize); @@ -1619,6 +1685,13 @@ void t4_idma_monitor(struct adapter *adapter, int hz, int ticks); int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, unsigned int naddr, u8 *addr); +void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok); +void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok); +void t4_tp_mib_read(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok); + void t4_uld_mem_free(struct adapter *adap); int t4_uld_mem_alloc(struct adapter *adap); void t4_uld_clean_up(struct adapter *adap); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c new file mode 100644 index 000000000000..29cc625e9833 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c @@ -0,0 +1,403 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include "t4_regs.h" +#include "cxgb4.h" +#include "cxgb4_cudbg.h" +#include "cudbg_entity.h" + +static const struct cxgb4_collect_entity cxgb4_collect_mem_dump[] = { + { CUDBG_EDC0, cudbg_collect_edc0_meminfo }, + { CUDBG_EDC1, cudbg_collect_edc1_meminfo }, +}; + +static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = { + { CUDBG_MBOX_LOG, cudbg_collect_mbox_log }, + { CUDBG_DEV_LOG, cudbg_collect_fw_devlog }, + { CUDBG_REG_DUMP, cudbg_collect_reg_dump }, + { CUDBG_CIM_LA, cudbg_collect_cim_la }, + { CUDBG_CIM_MA_LA, cudbg_collect_cim_ma_la }, + { CUDBG_CIM_QCFG, cudbg_collect_cim_qcfg }, + { CUDBG_CIM_IBQ_TP0, cudbg_collect_cim_ibq_tp0 }, + { CUDBG_CIM_IBQ_TP1, cudbg_collect_cim_ibq_tp1 }, + { CUDBG_CIM_IBQ_ULP, cudbg_collect_cim_ibq_ulp }, + { CUDBG_CIM_IBQ_SGE0, cudbg_collect_cim_ibq_sge0 }, + { CUDBG_CIM_IBQ_SGE1, cudbg_collect_cim_ibq_sge1 }, + { CUDBG_CIM_IBQ_NCSI, cudbg_collect_cim_ibq_ncsi }, + { CUDBG_CIM_OBQ_ULP0, cudbg_collect_cim_obq_ulp0 }, + { CUDBG_CIM_OBQ_ULP1, cudbg_collect_cim_obq_ulp1 }, + { CUDBG_CIM_OBQ_ULP2, cudbg_collect_cim_obq_ulp2 }, + { CUDBG_CIM_OBQ_ULP3, cudbg_collect_cim_obq_ulp3 }, + { CUDBG_CIM_OBQ_SGE, cudbg_collect_cim_obq_sge }, + { CUDBG_CIM_OBQ_NCSI, cudbg_collect_cim_obq_ncsi }, + { CUDBG_RSS, cudbg_collect_rss }, + { CUDBG_RSS_VF_CONF, cudbg_collect_rss_vf_config }, + { CUDBG_PATH_MTU, cudbg_collect_path_mtu }, + { CUDBG_PM_STATS, cudbg_collect_pm_stats }, + { CUDBG_HW_SCHED, cudbg_collect_hw_sched }, + { CUDBG_TP_INDIRECT, cudbg_collect_tp_indirect }, + { CUDBG_SGE_INDIRECT, cudbg_collect_sge_indirect }, + { CUDBG_ULPRX_LA, cudbg_collect_ulprx_la }, + { CUDBG_TP_LA, cudbg_collect_tp_la }, + { CUDBG_CIM_PIF_LA, cudbg_collect_cim_pif_la }, + { CUDBG_CLK, cudbg_collect_clk_info }, + { CUDBG_CIM_OBQ_RXQ0, cudbg_collect_obq_sge_rx_q0 }, + { CUDBG_CIM_OBQ_RXQ1, cudbg_collect_obq_sge_rx_q1 }, + { CUDBG_PCIE_INDIRECT, cudbg_collect_pcie_indirect }, + { CUDBG_PM_INDIRECT, cudbg_collect_pm_indirect }, + { CUDBG_TID_INFO, cudbg_collect_tid }, + { CUDBG_DUMP_CONTEXT, cudbg_collect_dump_context }, + { CUDBG_MPS_TCAM, cudbg_collect_mps_tcam }, + { CUDBG_VPD_DATA, cudbg_collect_vpd_data }, + { CUDBG_LE_TCAM, cudbg_collect_le_tcam }, + { CUDBG_CCTRL, cudbg_collect_cctrl }, + { CUDBG_MA_INDIRECT, cudbg_collect_ma_indirect }, + { CUDBG_ULPTX_LA, cudbg_collect_ulptx_la }, + { CUDBG_UP_CIM_INDIRECT, cudbg_collect_up_cim_indirect }, + { CUDBG_PBT_TABLE, cudbg_collect_pbt_tables }, + { CUDBG_HMA_INDIRECT, cudbg_collect_hma_indirect }, +}; + +static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity) +{ + struct cudbg_tcam tcam_region = { 0 }; + u32 value, n = 0, len = 0; + + switch (entity) { + case CUDBG_REG_DUMP: + switch (CHELSIO_CHIP_VERSION(adap->params.chip)) { + case CHELSIO_T4: + len = T4_REGMAP_SIZE; + break; + case CHELSIO_T5: + case CHELSIO_T6: + len = T5_REGMAP_SIZE; + break; + default: + break; + } + break; + case CUDBG_DEV_LOG: + len = adap->params.devlog.size; + break; + case CUDBG_CIM_LA: + if (is_t6(adap->params.chip)) { + len = adap->params.cim_la_size / 10 + 1; + len *= 11 * sizeof(u32); + } else { + len = adap->params.cim_la_size / 8; + len *= 8 * sizeof(u32); + } + len += sizeof(u32); /* for reading CIM LA configuration */ + break; + case CUDBG_CIM_MA_LA: + len = 2 * CIM_MALA_SIZE * 5 * sizeof(u32); + break; + case CUDBG_CIM_QCFG: + len = sizeof(struct cudbg_cim_qcfg); + break; + case CUDBG_CIM_IBQ_TP0: + case CUDBG_CIM_IBQ_TP1: + case CUDBG_CIM_IBQ_ULP: + case CUDBG_CIM_IBQ_SGE0: + case CUDBG_CIM_IBQ_SGE1: + case CUDBG_CIM_IBQ_NCSI: + len = CIM_IBQ_SIZE * 4 * sizeof(u32); + break; + case CUDBG_CIM_OBQ_ULP0: + len = cudbg_cim_obq_size(adap, 0); + break; + case CUDBG_CIM_OBQ_ULP1: + len = cudbg_cim_obq_size(adap, 1); + break; + case CUDBG_CIM_OBQ_ULP2: + len = cudbg_cim_obq_size(adap, 2); + break; + case CUDBG_CIM_OBQ_ULP3: + len = cudbg_cim_obq_size(adap, 3); + break; + case CUDBG_CIM_OBQ_SGE: + len = cudbg_cim_obq_size(adap, 4); + break; + case CUDBG_CIM_OBQ_NCSI: + len = cudbg_cim_obq_size(adap, 5); + break; + case CUDBG_CIM_OBQ_RXQ0: + len = cudbg_cim_obq_size(adap, 6); + break; + case CUDBG_CIM_OBQ_RXQ1: + len = cudbg_cim_obq_size(adap, 7); + break; + case CUDBG_EDC0: + value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A); + if (value & EDRAM0_ENABLE_F) { + value = t4_read_reg(adap, MA_EDRAM0_BAR_A); + len = EDRAM0_SIZE_G(value); + } + len = cudbg_mbytes_to_bytes(len); + break; + case CUDBG_EDC1: + value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A); + if (value & EDRAM1_ENABLE_F) { + value = t4_read_reg(adap, MA_EDRAM1_BAR_A); + len = EDRAM1_SIZE_G(value); + } + len = cudbg_mbytes_to_bytes(len); + break; + case CUDBG_RSS: + len = RSS_NENTRIES * sizeof(u16); + break; + case CUDBG_RSS_VF_CONF: + len = adap->params.arch.vfcount * + sizeof(struct cudbg_rss_vf_conf); + break; + case CUDBG_PATH_MTU: + len = NMTUS * sizeof(u16); + break; + case CUDBG_PM_STATS: + len = sizeof(struct cudbg_pm_stats); + break; + case CUDBG_HW_SCHED: + len = sizeof(struct cudbg_hw_sched); + break; + case CUDBG_TP_INDIRECT: + switch (CHELSIO_CHIP_VERSION(adap->params.chip)) { + case CHELSIO_T5: + n = sizeof(t5_tp_pio_array) + + sizeof(t5_tp_tm_pio_array) + + sizeof(t5_tp_mib_index_array); + break; + case CHELSIO_T6: + n = sizeof(t6_tp_pio_array) + + sizeof(t6_tp_tm_pio_array) + + sizeof(t6_tp_mib_index_array); + break; + default: + break; + } + n = n / (IREG_NUM_ELEM * sizeof(u32)); + len = sizeof(struct ireg_buf) * n; + break; + case CUDBG_SGE_INDIRECT: + len = sizeof(struct ireg_buf) * 2; + break; + case CUDBG_ULPRX_LA: + len = sizeof(struct cudbg_ulprx_la); + break; + case CUDBG_TP_LA: + len = sizeof(struct cudbg_tp_la) + TPLA_SIZE * sizeof(u64); + break; + case CUDBG_CIM_PIF_LA: + len = sizeof(struct cudbg_cim_pif_la); + len += 2 * CIM_PIFLA_SIZE * 6 * sizeof(u32); + break; + case CUDBG_CLK: + len = sizeof(struct cudbg_clk_info); + break; + case CUDBG_PCIE_INDIRECT: + n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32)); + len = sizeof(struct ireg_buf) * n * 2; + break; + case CUDBG_PM_INDIRECT: + n = sizeof(t5_pm_rx_array) / (IREG_NUM_ELEM * sizeof(u32)); + len = sizeof(struct ireg_buf) * n * 2; + break; + case CUDBG_TID_INFO: + len = sizeof(struct cudbg_tid_info_region_rev1); + break; + case CUDBG_DUMP_CONTEXT: + len = cudbg_dump_context_size(adap); + break; + case CUDBG_MPS_TCAM: + len = sizeof(struct cudbg_mps_tcam) * + adap->params.arch.mps_tcam_size; + break; + case CUDBG_VPD_DATA: + len = sizeof(struct cudbg_vpd_data); + break; + case CUDBG_LE_TCAM: + cudbg_fill_le_tcam_info(adap, &tcam_region); + len = sizeof(struct cudbg_tcam) + + sizeof(struct cudbg_tid_data) * tcam_region.max_tid; + break; + case CUDBG_CCTRL: + len = sizeof(u16) * NMTUS * NCCTRL_WIN; + break; + case CUDBG_MA_INDIRECT: + if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) { + n = sizeof(t6_ma_ireg_array) / + (IREG_NUM_ELEM * sizeof(u32)); + len = sizeof(struct ireg_buf) * n * 2; + } + break; + case CUDBG_ULPTX_LA: + len = sizeof(struct cudbg_ulptx_la); + break; + case CUDBG_UP_CIM_INDIRECT: + n = sizeof(t5_up_cim_reg_array) / (IREG_NUM_ELEM * sizeof(u32)); + len = sizeof(struct ireg_buf) * n; + break; + case CUDBG_PBT_TABLE: + len = sizeof(struct cudbg_pbt_tables); + break; + case CUDBG_MBOX_LOG: + len = sizeof(struct cudbg_mbox_log) * adap->mbox_log->size; + break; + case CUDBG_HMA_INDIRECT: + if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) { + n = sizeof(t6_hma_ireg_array) / + (IREG_NUM_ELEM * sizeof(u32)); + len = sizeof(struct ireg_buf) * n; + } + break; + default: + break; + } + + return len; +} + +u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag) +{ + u32 i, entity; + u32 len = 0; + + if (flag & CXGB4_ETH_DUMP_HW) { + for (i = 0; i < ARRAY_SIZE(cxgb4_collect_hw_dump); i++) { + entity = cxgb4_collect_hw_dump[i].entity; + len += cxgb4_get_entity_length(adap, entity); + } + } + + if (flag & CXGB4_ETH_DUMP_MEM) { + for (i = 0; i < ARRAY_SIZE(cxgb4_collect_mem_dump); i++) { + entity = cxgb4_collect_mem_dump[i].entity; + len += cxgb4_get_entity_length(adap, entity); + } + } + + return len; +} + +static void cxgb4_cudbg_collect_entity(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + const struct cxgb4_collect_entity *e_arr, + u32 arr_size, void *buf, u32 *tot_size) +{ + struct adapter *adap = pdbg_init->adap; + struct cudbg_error cudbg_err = { 0 }; + struct cudbg_entity_hdr *entity_hdr; + u32 entity_size, i; + u32 total_size = 0; + int ret; + + for (i = 0; i < arr_size; i++) { + const struct cxgb4_collect_entity *e = &e_arr[i]; + + /* Skip entities that won't fit in output buffer */ + entity_size = cxgb4_get_entity_length(adap, e->entity); + if (entity_size > + pdbg_init->outbuf_size - *tot_size - total_size) + continue; + + entity_hdr = cudbg_get_entity_hdr(buf, e->entity); + entity_hdr->entity_type = e->entity; + entity_hdr->start_offset = dbg_buff->offset; + memset(&cudbg_err, 0, sizeof(struct cudbg_error)); + ret = e->collect_cb(pdbg_init, dbg_buff, &cudbg_err); + if (ret) { + entity_hdr->size = 0; + dbg_buff->offset = entity_hdr->start_offset; + } else { + cudbg_align_debug_buffer(dbg_buff, entity_hdr); + } + + /* Log error and continue with next entity */ + if (cudbg_err.sys_err) + ret = CUDBG_SYSTEM_ERROR; + + entity_hdr->hdr_flags = ret; + entity_hdr->sys_err = cudbg_err.sys_err; + entity_hdr->sys_warn = cudbg_err.sys_warn; + total_size += entity_hdr->size; + } + + *tot_size += total_size; +} + +int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, + u32 flag) +{ + struct cudbg_init cudbg_init = { 0 }; + struct cudbg_buffer dbg_buff = { 0 }; + u32 size, min_size, total_size = 0; + struct cudbg_hdr *cudbg_hdr; + + size = *buf_size; + + cudbg_init.adap = adap; + cudbg_init.outbuf = buf; + cudbg_init.outbuf_size = size; + + dbg_buff.data = buf; + dbg_buff.size = size; + dbg_buff.offset = 0; + + cudbg_hdr = (struct cudbg_hdr *)buf; + cudbg_hdr->signature = CUDBG_SIGNATURE; + cudbg_hdr->hdr_len = sizeof(struct cudbg_hdr); + cudbg_hdr->major_ver = CUDBG_MAJOR_VERSION; + cudbg_hdr->minor_ver = CUDBG_MINOR_VERSION; + cudbg_hdr->max_entities = CUDBG_MAX_ENTITY; + cudbg_hdr->chip_ver = adap->params.chip; + cudbg_hdr->dump_type = CUDBG_DUMP_TYPE_MINI; + cudbg_hdr->compress_type = CUDBG_COMPRESSION_NONE; + + min_size = sizeof(struct cudbg_hdr) + + sizeof(struct cudbg_entity_hdr) * + cudbg_hdr->max_entities; + if (size < min_size) + return -ENOMEM; + + dbg_buff.offset += min_size; + total_size = dbg_buff.offset; + + if (flag & CXGB4_ETH_DUMP_HW) + cxgb4_cudbg_collect_entity(&cudbg_init, &dbg_buff, + cxgb4_collect_hw_dump, + ARRAY_SIZE(cxgb4_collect_hw_dump), + buf, + &total_size); + + if (flag & CXGB4_ETH_DUMP_MEM) + cxgb4_cudbg_collect_entity(&cudbg_init, &dbg_buff, + cxgb4_collect_mem_dump, + ARRAY_SIZE(cxgb4_collect_mem_dump), + buf, + &total_size); + + cudbg_hdr->data_len = total_size; + *buf_size = total_size; + return 0; +} + +void cxgb4_init_ethtool_dump(struct adapter *adapter) +{ + adapter->eth_dump.flag = CXGB4_ETH_DUMP_NONE; + adapter->eth_dump.version = adapter->params.fw_vers; + adapter->eth_dump.len = 0; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h new file mode 100644 index 000000000000..c099b5aa2214 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2017 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#ifndef __CXGB4_CUDBG_H__ +#define __CXGB4_CUDBG_H__ + +#include "cudbg_if.h" +#include "cudbg_lib_common.h" +#include "cudbg_lib.h" + +typedef int (*cudbg_collect_callback_t)(struct cudbg_init *pdbg_init, + struct cudbg_buffer *dbg_buff, + struct cudbg_error *cudbg_err); + +struct cxgb4_collect_entity { + enum cudbg_dbg_entity_type entity; + cudbg_collect_callback_t collect_cb; +}; + +enum CXGB4_ETHTOOL_DUMP_FLAGS { + CXGB4_ETH_DUMP_NONE = ETH_FW_DUMP_DISABLE, + CXGB4_ETH_DUMP_MEM = (1 << 0), /* On-Chip Memory Dumps */ + CXGB4_ETH_DUMP_HW = (1 << 1), /* various FW and HW dumps */ +}; + +u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag); +int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size, + u32 flag); +void cxgb4_init_ethtool_dump(struct adapter *adapter); +#endif /* __CXGB4_CUDBG_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index 6ee2ed30626b..4e7f72b17e82 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -40,8 +40,7 @@ static inline bool cxgb4_dcb_state_synced(enum cxgb4_dcb_state state) return false; } -/* Initialize a port's Data Center Bridging state. Typically used after a - * Link Down event. +/* Initialize a port's Data Center Bridging state. */ void cxgb4_dcb_state_init(struct net_device *dev) { @@ -106,6 +105,15 @@ static void cxgb4_dcb_cleanup_apps(struct net_device *dev) } } +/* Reset a port's Data Center Bridging state. Typically used after a + * Link Down event. + */ +void cxgb4_dcb_reset(struct net_device *dev) +{ + cxgb4_dcb_cleanup_apps(dev); + cxgb4_dcb_state_init(dev); +} + /* Finite State machine for Data Center Bridging. */ void cxgb4_dcb_state_fsm(struct net_device *dev, @@ -194,8 +202,7 @@ void cxgb4_dcb_state_fsm(struct net_device *dev, * state. We need to reset back to a ground state * of incomplete. */ - cxgb4_dcb_cleanup_apps(dev); - cxgb4_dcb_state_init(dev); + cxgb4_dcb_reset(dev); dcb->state = CXGB4_DCB_STATE_FW_INCOMPLETE; dcb->supported = CXGB4_DCBX_FW_SUPPORT; linkwatch_fire_event(dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h index ccf24d3dc982..02040b99c78a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h @@ -131,6 +131,7 @@ struct port_dcb_info { void cxgb4_dcb_state_init(struct net_device *); void cxgb4_dcb_version_init(struct net_device *); +void cxgb4_dcb_reset(struct net_device *dev); void cxgb4_dcb_state_fsm(struct net_device *, enum cxgb4_dcb_state_input); void cxgb4_dcb_handle_fw_update(struct adapter *, const struct fw_port_cmd *); void cxgb4_dcb_set_caps(struct adapter *, const struct fw_port_cmd *); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 76540b0e082d..917663b35603 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2211,7 +2211,7 @@ static int rss_key_show(struct seq_file *seq, void *v) { u32 key[10]; - t4_read_rss_key(seq->private, key); + t4_read_rss_key(seq->private, key, true); seq_printf(seq, "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x\n", key[9], key[8], key[7], key[6], key[5], key[4], key[3], key[2], key[1], key[0]); @@ -2248,7 +2248,7 @@ static ssize_t rss_key_write(struct file *file, const char __user *buf, } } - t4_write_rss_key(adap, key, -1); + t4_write_rss_key(adap, key, -1, true); return count; } @@ -2325,12 +2325,13 @@ static int rss_pf_config_open(struct inode *inode, struct file *file) return -ENOMEM; pfconf = (struct rss_pf_conf *)p->data; - rss_pf_map = t4_read_rss_pf_map(adapter); - rss_pf_mask = t4_read_rss_pf_mask(adapter); + rss_pf_map = t4_read_rss_pf_map(adapter, true); + rss_pf_mask = t4_read_rss_pf_mask(adapter, true); for (pf = 0; pf < 8; pf++) { pfconf[pf].rss_pf_map = rss_pf_map; pfconf[pf].rss_pf_mask = rss_pf_mask; - t4_read_rss_pf_config(adapter, pf, &pfconf[pf].rss_pf_config); + t4_read_rss_pf_config(adapter, pf, &pfconf[pf].rss_pf_config, + true); } return 0; } @@ -2393,7 +2394,7 @@ static int rss_vf_config_open(struct inode *inode, struct file *file) vfconf = (struct rss_vf_conf *)p->data; for (vf = 0; vf < vfcount; vf++) { t4_read_rss_vf_config(adapter, vf, &vfconf[vf].rss_vf_vfl, - &vfconf[vf].rss_vf_vfh); + &vfconf[vf].rss_vf_vfh, true); } return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index a71af1e587e2..eb338212f5af 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -21,6 +21,7 @@ #include "cxgb4.h" #include "t4_regs.h" #include "t4fw_api.h" +#include "cxgb4_cudbg.h" #define EEPROM_MAGIC 0x38E2F10C @@ -335,10 +336,10 @@ static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s) memset(s, 0, sizeof(*s)); spin_lock(&adap->stats_lock); - t4_tp_get_tcp_stats(adap, &v4, &v6); - t4_tp_get_rdma_stats(adap, &rdma_stats); - t4_get_usm_stats(adap, &usm_stats); - t4_tp_get_err_stats(adap, &err_stats); + t4_tp_get_tcp_stats(adap, &v4, &v6, false); + t4_tp_get_rdma_stats(adap, &rdma_stats, false); + t4_get_usm_stats(adap, &usm_stats, false); + t4_tp_get_err_stats(adap, &err_stats, false); spin_unlock(&adap->stats_lock); s->db_drop = adap->db_stats.db_drop; @@ -388,9 +389,9 @@ static void collect_channel_stats(struct adapter *adap, struct channel_stats *s, memset(s, 0, sizeof(*s)); spin_lock(&adap->stats_lock); - t4_tp_get_cpl_stats(adap, &cpl_stats); - t4_tp_get_err_stats(adap, &err_stats); - t4_get_fcoe_stats(adap, i, &fcoe_stats); + t4_tp_get_cpl_stats(adap, &cpl_stats, false); + t4_tp_get_err_stats(adap, &err_stats, false); + t4_get_fcoe_stats(adap, i, &fcoe_stats, false); spin_unlock(&adap->stats_lock); s->cpl_req = cpl_stats.req[i]; @@ -1063,40 +1064,11 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) return 0; } -/** - * eeprom_ptov - translate a physical EEPROM address to virtual - * @phys_addr: the physical EEPROM address - * @fn: the PCI function number - * @sz: size of function-specific area - * - * Translate a physical EEPROM address to virtual. The first 1K is - * accessed through virtual addresses starting at 31K, the rest is - * accessed through virtual addresses starting at 0. - * - * The mapping is as follows: - * [0..1K) -> [31K..32K) - * [1K..1K+A) -> [31K-A..31K) - * [1K+A..ES) -> [0..ES-A-1K) - * - * where A = @fn * @sz, and ES = EEPROM size. - */ -static int eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz) -{ - fn *= sz; - if (phys_addr < 1024) - return phys_addr + (31 << 10); - if (phys_addr < 1024 + fn) - return 31744 - fn + phys_addr - 1024; - if (phys_addr < EEPROMSIZE) - return phys_addr - 1024 - fn; - return -EINVAL; -} - /* The next two routines implement eeprom read/write from physical addresses. */ static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v) { - int vaddr = eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE); + int vaddr = t4_eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE); if (vaddr >= 0) vaddr = pci_read_vpd(adap->pdev, vaddr, sizeof(u32), v); @@ -1105,7 +1077,7 @@ static int eeprom_rd_phys(struct adapter *adap, unsigned int phys_addr, u32 *v) static int eeprom_wr_phys(struct adapter *adap, unsigned int phys_addr, u32 v) { - int vaddr = eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE); + int vaddr = t4_eeprom_ptov(phys_addr, adap->pf, EEPROMPFSIZE); if (vaddr >= 0) vaddr = pci_write_vpd(adap->pdev, vaddr, sizeof(u32), &v); @@ -1374,6 +1346,56 @@ static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, return -EOPNOTSUPP; } +static int set_dump(struct net_device *dev, struct ethtool_dump *eth_dump) +{ + struct adapter *adapter = netdev2adap(dev); + u32 len = 0; + + len = sizeof(struct cudbg_hdr) + + sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY; + len += cxgb4_get_dump_length(adapter, eth_dump->flag); + + adapter->eth_dump.flag = eth_dump->flag; + adapter->eth_dump.len = len; + return 0; +} + +static int get_dump_flag(struct net_device *dev, struct ethtool_dump *eth_dump) +{ + struct adapter *adapter = netdev2adap(dev); + + eth_dump->flag = adapter->eth_dump.flag; + eth_dump->len = adapter->eth_dump.len; + eth_dump->version = adapter->eth_dump.version; + return 0; +} + +static int get_dump_data(struct net_device *dev, struct ethtool_dump *eth_dump, + void *buf) +{ + struct adapter *adapter = netdev2adap(dev); + u32 len = 0; + int ret = 0; + + if (adapter->eth_dump.flag == CXGB4_ETH_DUMP_NONE) + return -ENOENT; + + len = sizeof(struct cudbg_hdr) + + sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY; + len += cxgb4_get_dump_length(adapter, adapter->eth_dump.flag); + if (eth_dump->len < len) + return -ENOMEM; + + ret = cxgb4_cudbg_collect(adapter, buf, &len, adapter->eth_dump.flag); + if (ret) + return ret; + + eth_dump->flag = adapter->eth_dump.flag; + eth_dump->len = len; + eth_dump->version = adapter->eth_dump.version; + return 0; +} + static const struct ethtool_ops cxgb_ethtool_ops = { .get_link_ksettings = get_link_ksettings, .set_link_ksettings = set_link_ksettings, @@ -1404,7 +1426,10 @@ static const struct ethtool_ops cxgb_ethtool_ops = { .get_rxfh = get_rss_table, .set_rxfh = set_rss_table, .flash_device = set_flash, - .get_ts_info = get_ts_info + .get_ts_info = get_ts_info, + .set_dump = set_dump, + .get_dump_flag = get_dump_flag, + .get_dump_data = get_dump_data, }; void cxgb4_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 45b5853ca2f1..5980f308a253 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -31,10 +31,15 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include <net/ipv6.h> #include "cxgb4.h" #include "t4_regs.h" +#include "t4_tcb.h" +#include "t4_values.h" +#include "clip_tbl.h" #include "l2t.h" +#include "smt.h" #include "t4fw_api.h" #include "cxgb4_filter.h" @@ -48,6 +53,194 @@ static inline bool unsupported(u32 conf, u32 conf_mask, u32 val, u32 mask) return !(conf & conf_mask) && is_field_set(val, mask); } +static int set_tcb_field(struct adapter *adap, struct filter_entry *f, + unsigned int ftid, u16 word, u64 mask, u64 val, + int no_reply) +{ + struct cpl_set_tcb_field *req; + struct sk_buff *skb; + + skb = alloc_skb(sizeof(struct cpl_set_tcb_field), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + req = (struct cpl_set_tcb_field *)__skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, ftid); + req->reply_ctrl = htons(REPLY_CHAN_V(0) | + QUEUENO_V(adap->sge.fw_evtq.abs_id) | + NO_REPLY_V(no_reply)); + req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(ftid)); + req->mask = cpu_to_be64(mask); + req->val = cpu_to_be64(val); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + t4_ofld_send(adap, skb); + return 0; +} + +/* Set one of the t_flags bits in the TCB. + */ +static int set_tcb_tflag(struct adapter *adap, struct filter_entry *f, + unsigned int ftid, unsigned int bit_pos, + unsigned int val, int no_reply) +{ + return set_tcb_field(adap, f, ftid, TCB_T_FLAGS_W, 1ULL << bit_pos, + (unsigned long long)val << bit_pos, no_reply); +} + +static void mk_abort_req_ulp(struct cpl_abort_req *abort_req, unsigned int tid) +{ + struct ulp_txpkt *txpkt = (struct ulp_txpkt *)abort_req; + struct ulptx_idata *sc = (struct ulptx_idata *)(txpkt + 1); + + txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0)); + txpkt->len = htonl(DIV_ROUND_UP(sizeof(*abort_req), 16)); + sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM)); + sc->len = htonl(sizeof(*abort_req) - sizeof(struct work_request_hdr)); + OPCODE_TID(abort_req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); + abort_req->rsvd0 = htonl(0); + abort_req->rsvd1 = 0; + abort_req->cmd = CPL_ABORT_NO_RST; +} + +static void mk_abort_rpl_ulp(struct cpl_abort_rpl *abort_rpl, unsigned int tid) +{ + struct ulp_txpkt *txpkt = (struct ulp_txpkt *)abort_rpl; + struct ulptx_idata *sc = (struct ulptx_idata *)(txpkt + 1); + + txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0)); + txpkt->len = htonl(DIV_ROUND_UP(sizeof(*abort_rpl), 16)); + sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM)); + sc->len = htonl(sizeof(*abort_rpl) - sizeof(struct work_request_hdr)); + OPCODE_TID(abort_rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); + abort_rpl->rsvd0 = htonl(0); + abort_rpl->rsvd1 = 0; + abort_rpl->cmd = CPL_ABORT_NO_RST; +} + +static void mk_set_tcb_ulp(struct filter_entry *f, + struct cpl_set_tcb_field *req, + unsigned int word, u64 mask, u64 val, + u8 cookie, int no_reply) +{ + struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; + struct ulptx_idata *sc = (struct ulptx_idata *)(txpkt + 1); + + txpkt->cmd_dest = htonl(ULPTX_CMD_V(ULP_TX_PKT) | ULP_TXPKT_DEST_V(0)); + txpkt->len = htonl(DIV_ROUND_UP(sizeof(*req), 16)); + sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_IMM)); + sc->len = htonl(sizeof(*req) - sizeof(struct work_request_hdr)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, f->tid)); + req->reply_ctrl = htons(NO_REPLY_V(no_reply) | REPLY_CHAN_V(0) | + QUEUENO_V(0)); + req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(cookie)); + req->mask = cpu_to_be64(mask); + req->val = cpu_to_be64(val); + sc = (struct ulptx_idata *)(req + 1); + sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP)); + sc->len = htonl(0); +} + +static int configure_filter_smac(struct adapter *adap, struct filter_entry *f) +{ + int err; + + /* do a set-tcb for smac-sel and CWR bit.. */ + err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1); + if (err) + goto smac_err; + + err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W, + TCB_SMAC_SEL_V(TCB_SMAC_SEL_M), + TCB_SMAC_SEL_V(f->smt->idx), 1); + if (!err) + return 0; + +smac_err: + dev_err(adap->pdev_dev, "filter %u smac config failed with error %u\n", + f->tid, err); + return err; +} + +static void set_nat_params(struct adapter *adap, struct filter_entry *f, + unsigned int tid, bool dip, bool sip, bool dp, + bool sp) +{ + if (dip) { + if (f->fs.type) { + set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W, + WORD_MASK, f->fs.nat_lip[15] | + f->fs.nat_lip[14] << 8 | + f->fs.nat_lip[13] << 16 | + f->fs.nat_lip[12] << 24, 1); + + set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 1, + WORD_MASK, f->fs.nat_lip[11] | + f->fs.nat_lip[10] << 8 | + f->fs.nat_lip[9] << 16 | + f->fs.nat_lip[8] << 24, 1); + + set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 2, + WORD_MASK, f->fs.nat_lip[7] | + f->fs.nat_lip[6] << 8 | + f->fs.nat_lip[5] << 16 | + f->fs.nat_lip[4] << 24, 1); + + set_tcb_field(adap, f, tid, TCB_SND_UNA_RAW_W + 3, + WORD_MASK, f->fs.nat_lip[3] | + f->fs.nat_lip[2] << 8 | + f->fs.nat_lip[1] << 16 | + f->fs.nat_lip[0] << 24, 1); + } else { + set_tcb_field(adap, f, tid, TCB_RX_FRAG3_LEN_RAW_W, + WORD_MASK, f->fs.nat_lip[3] | + f->fs.nat_lip[2] << 8 | + f->fs.nat_lip[1] << 16 | + f->fs.nat_lip[0] << 24, 1); + } + } + + if (sip) { + if (f->fs.type) { + set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W, + WORD_MASK, f->fs.nat_fip[15] | + f->fs.nat_fip[14] << 8 | + f->fs.nat_fip[13] << 16 | + f->fs.nat_fip[12] << 24, 1); + + set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 1, + WORD_MASK, f->fs.nat_fip[11] | + f->fs.nat_fip[10] << 8 | + f->fs.nat_fip[9] << 16 | + f->fs.nat_fip[8] << 24, 1); + + set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 2, + WORD_MASK, f->fs.nat_fip[7] | + f->fs.nat_fip[6] << 8 | + f->fs.nat_fip[5] << 16 | + f->fs.nat_fip[4] << 24, 1); + + set_tcb_field(adap, f, tid, TCB_RX_FRAG2_PTR_RAW_W + 3, + WORD_MASK, f->fs.nat_fip[3] | + f->fs.nat_fip[2] << 8 | + f->fs.nat_fip[1] << 16 | + f->fs.nat_fip[0] << 24, 1); + + } else { + set_tcb_field(adap, f, tid, + TCB_RX_FRAG3_START_IDX_OFFSET_RAW_W, + WORD_MASK, f->fs.nat_fip[3] | + f->fs.nat_fip[2] << 8 | + f->fs.nat_fip[1] << 16 | + f->fs.nat_fip[0] << 24, 1); + } + } + + set_tcb_field(adap, f, tid, TCB_PDU_HDR_LEN_W, WORD_MASK, + (dp ? f->fs.nat_lport : 0) | + (sp ? f->fs.nat_fport << 16 : 0), 1); +} + /* Validate filter spec against configuration done on the card. */ static int validate_filter(struct net_device *dev, struct ch_filter_specification *fs) @@ -148,6 +341,116 @@ static int get_filter_steerq(struct net_device *dev, return iq; } +static int get_filter_count(struct adapter *adapter, unsigned int fidx, + u64 *pkts, u64 *bytes, bool hash) +{ + unsigned int tcb_base, tcbaddr; + unsigned int word_offset; + struct filter_entry *f; + __be64 be64_byte_count; + int ret; + + tcb_base = t4_read_reg(adapter, TP_CMM_TCB_BASE_A); + if (is_hashfilter(adapter) && hash) { + if (fidx < adapter->tids.ntids) { + f = adapter->tids.tid_tab[fidx]; + if (!f) + return -EINVAL; + } else { + return -E2BIG; + } + } else { + if ((fidx != (adapter->tids.nftids + + adapter->tids.nsftids - 1)) && + fidx >= adapter->tids.nftids) + return -E2BIG; + + f = &adapter->tids.ftid_tab[fidx]; + if (!f->valid) + return -EINVAL; + } + tcbaddr = tcb_base + f->tid * TCB_SIZE; + + spin_lock(&adapter->win0_lock); + if (is_t4(adapter->params.chip)) { + __be64 be64_count; + + /* T4 doesn't maintain byte counts in hw */ + *bytes = 0; + + /* Get pkts */ + word_offset = 4; + ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0, + tcbaddr + (word_offset * sizeof(__be32)), + sizeof(be64_count), + (__be32 *)&be64_count, + T4_MEMORY_READ); + if (ret < 0) + goto out; + *pkts = be64_to_cpu(be64_count); + } else { + __be32 be32_count; + + /* Get bytes */ + word_offset = 4; + ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0, + tcbaddr + (word_offset * sizeof(__be32)), + sizeof(be64_byte_count), + &be64_byte_count, + T4_MEMORY_READ); + if (ret < 0) + goto out; + *bytes = be64_to_cpu(be64_byte_count); + + /* Get pkts */ + word_offset = 6; + ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0, + tcbaddr + (word_offset * sizeof(__be32)), + sizeof(be32_count), + &be32_count, + T4_MEMORY_READ); + if (ret < 0) + goto out; + *pkts = (u64)be32_to_cpu(be32_count); + } + +out: + spin_unlock(&adapter->win0_lock); + return ret; +} + +int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx, + u64 *hitcnt, u64 *bytecnt, bool hash) +{ + struct adapter *adapter = netdev2adap(dev); + + return get_filter_count(adapter, fidx, hitcnt, bytecnt, hash); +} + +int cxgb4_get_free_ftid(struct net_device *dev, int family) +{ + struct adapter *adap = netdev2adap(dev); + struct tid_info *t = &adap->tids; + int ftid; + + spin_lock_bh(&t->ftid_lock); + if (family == PF_INET) { + ftid = find_first_zero_bit(t->ftid_bmap, t->nftids); + if (ftid >= t->nftids) + ftid = -1; + } else { + ftid = bitmap_find_free_region(t->ftid_bmap, t->nftids, 2); + if (ftid < 0) + goto out_unlock; + + /* this is only a lookup, keep the found region unallocated */ + bitmap_release_region(t->ftid_bmap, ftid, 2); + } +out_unlock: + spin_unlock_bh(&t->ftid_lock); + return ftid; +} + static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family) { spin_lock_bh(&t->ftid_lock); @@ -191,7 +494,8 @@ static int del_filter_wr(struct adapter *adapter, int fidx) return -ENOMEM; fwr = __skb_put(skb, len); - t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id); + t4_mk_filtdelwr(f->tid, fwr, (adapter->flags & SHUTTING_DOWN) ? -1 + : adapter->sge.fw_evtq.abs_id); /* Mark the filter as "pending" and ship off the Filter Work Request. * When we get the Work Request Reply we'll clear the pending status. @@ -210,7 +514,7 @@ static int del_filter_wr(struct adapter *adapter, int fidx) int set_filter_wr(struct adapter *adapter, int fidx) { struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct fw_filter_wr *fwr; + struct fw_filter2_wr *fwr; struct sk_buff *skb; skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); @@ -231,6 +535,21 @@ int set_filter_wr(struct adapter *adapter, int fidx) } } + /* If the new filter requires loopback Source MAC rewriting then + * we need to allocate a SMT entry for the filter. + */ + if (f->fs.newsmac) { + f->smt = cxgb4_smt_alloc_switching(f->dev, f->fs.smac); + if (!f->smt) { + if (f->l2t) { + cxgb4_l2t_release(f->l2t); + f->l2t = NULL; + } + kfree_skb(skb); + return -ENOMEM; + } + } + fwr = __skb_put_zero(skb, sizeof(*fwr)); /* It would be nice to put most of the following in t4_hw.c but most @@ -241,7 +560,10 @@ int set_filter_wr(struct adapter *adapter, int fidx) * filter specification structure but for now it's easiest to simply * put this fairly direct code in line ... */ - fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); + if (adapter->params.filter2_wr_support) + fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER2_WR)); + else + fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16)); fwr->tid_to_iq = htonl(FW_FILTER_WR_TID_V(f->tid) | @@ -256,7 +578,6 @@ int set_filter_wr(struct adapter *adapter, int fidx) FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | FW_FILTER_WR_DMAC_V(f->fs.newdmac) | - FW_FILTER_WR_SMAC_V(f->fs.newsmac) | FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || @@ -303,8 +624,18 @@ int set_filter_wr(struct adapter *adapter, int fidx) fwr->lpm = htons(f->fs.mask.lport); fwr->fp = htons(f->fs.val.fport); fwr->fpm = htons(f->fs.mask.fport); - if (f->fs.newsmac) - memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); + + if (adapter->params.filter2_wr_support) { + fwr->natmode_to_ulp_type = + FW_FILTER2_WR_ULP_TYPE_V(f->fs.nat_mode ? + ULP_MODE_TCPDDP : + ULP_MODE_NONE) | + FW_FILTER2_WR_NATMODE_V(f->fs.nat_mode); + memcpy(fwr->newlip, f->fs.nat_lip, sizeof(fwr->newlip)); + memcpy(fwr->newfip, f->fs.nat_fip, sizeof(fwr->newfip)); + fwr->newlport = htons(f->fs.nat_lport); + fwr->newfport = htons(f->fs.nat_fport); + } /* Mark the filter as "pending" and ship off the Filter Work Request. * When we get the Work Request Reply we'll clear the pending status. @@ -354,14 +685,18 @@ int delete_filter(struct adapter *adapter, unsigned int fidx) void clear_filter(struct adapter *adap, struct filter_entry *f) { /* If the new or old filter have loopback rewriteing rules then we'll - * need to free any existing Layer Two Table (L2T) entries of the old - * filter rule. The firmware will handle freeing up any Source MAC - * Table (SMT) entries used for rewriting Source MAC Addresses in - * loopback rules. + * need to free any existing L2T, SMT, CLIP entries of filter + * rule. */ if (f->l2t) cxgb4_l2t_release(f->l2t); + if (f->smt) + cxgb4_smt_release(f->smt); + + if (f->fs.hash && f->fs.type) + cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1); + /* The zeroing of the filter rule below clears the filter valid, * pending, locked flags, l2t pointer, etc. so it's all we need for * this operation. @@ -431,6 +766,418 @@ static void fill_default_mask(struct ch_filter_specification *fs) fs->mask.fport = ~0; } +static bool is_addr_all_mask(u8 *ipmask, int family) +{ + if (family == AF_INET) { + struct in_addr *addr; + + addr = (struct in_addr *)ipmask; + if (addr->s_addr == 0xffffffff) + return true; + } else if (family == AF_INET6) { + struct in6_addr *addr6; + + addr6 = (struct in6_addr *)ipmask; + if (addr6->s6_addr32[0] == 0xffffffff && + addr6->s6_addr32[1] == 0xffffffff && + addr6->s6_addr32[2] == 0xffffffff && + addr6->s6_addr32[3] == 0xffffffff) + return true; + } + return false; +} + +static bool is_inaddr_any(u8 *ip, int family) +{ + int addr_type; + + if (family == AF_INET) { + struct in_addr *addr; + + addr = (struct in_addr *)ip; + if (addr->s_addr == htonl(INADDR_ANY)) + return true; + } else if (family == AF_INET6) { + struct in6_addr *addr6; + + addr6 = (struct in6_addr *)ip; + addr_type = ipv6_addr_type((const struct in6_addr *) + &addr6); + if (addr_type == IPV6_ADDR_ANY) + return true; + } + return false; +} + +bool is_filter_exact_match(struct adapter *adap, + struct ch_filter_specification *fs) +{ + struct tp_params *tp = &adap->params.tp; + u64 hash_filter_mask = tp->hash_filter_mask; + u32 mask; + + if (!is_hashfilter(adap)) + return false; + + if (fs->type) { + if (is_inaddr_any(fs->val.fip, AF_INET6) || + !is_addr_all_mask(fs->mask.fip, AF_INET6)) + return false; + + if (is_inaddr_any(fs->val.lip, AF_INET6) || + !is_addr_all_mask(fs->mask.lip, AF_INET6)) + return false; + } else { + if (is_inaddr_any(fs->val.fip, AF_INET) || + !is_addr_all_mask(fs->mask.fip, AF_INET)) + return false; + + if (is_inaddr_any(fs->val.lip, AF_INET) || + !is_addr_all_mask(fs->mask.lip, AF_INET)) + return false; + } + + if (!fs->val.lport || fs->mask.lport != 0xffff) + return false; + + if (!fs->val.fport || fs->mask.fport != 0xffff) + return false; + + if (tp->fcoe_shift >= 0) { + mask = (hash_filter_mask >> tp->fcoe_shift) & FT_FCOE_W; + if (mask && !fs->mask.fcoe) + return false; + } + + if (tp->port_shift >= 0) { + mask = (hash_filter_mask >> tp->port_shift) & FT_PORT_W; + if (mask && !fs->mask.iport) + return false; + } + + if (tp->vnic_shift >= 0) { + mask = (hash_filter_mask >> tp->vnic_shift) & FT_VNIC_ID_W; + + if ((adap->params.tp.ingress_config & VNIC_F)) { + if (mask && !fs->mask.pfvf_vld) + return false; + } else { + if (mask && !fs->mask.ovlan_vld) + return false; + } + } + + if (tp->vlan_shift >= 0) { + mask = (hash_filter_mask >> tp->vlan_shift) & FT_VLAN_W; + if (mask && !fs->mask.ivlan) + return false; + } + + if (tp->tos_shift >= 0) { + mask = (hash_filter_mask >> tp->tos_shift) & FT_TOS_W; + if (mask && !fs->mask.tos) + return false; + } + + if (tp->protocol_shift >= 0) { + mask = (hash_filter_mask >> tp->protocol_shift) & FT_PROTOCOL_W; + if (mask && !fs->mask.proto) + return false; + } + + if (tp->ethertype_shift >= 0) { + mask = (hash_filter_mask >> tp->ethertype_shift) & + FT_ETHERTYPE_W; + if (mask && !fs->mask.ethtype) + return false; + } + + if (tp->macmatch_shift >= 0) { + mask = (hash_filter_mask >> tp->macmatch_shift) & FT_MACMATCH_W; + if (mask && !fs->mask.macidx) + return false; + } + + if (tp->matchtype_shift >= 0) { + mask = (hash_filter_mask >> tp->matchtype_shift) & + FT_MPSHITTYPE_W; + if (mask && !fs->mask.matchtype) + return false; + } + if (tp->frag_shift >= 0) { + mask = (hash_filter_mask >> tp->frag_shift) & + FT_FRAGMENTATION_W; + if (mask && !fs->mask.frag) + return false; + } + return true; +} + +static u64 hash_filter_ntuple(struct ch_filter_specification *fs, + struct net_device *dev) +{ + struct adapter *adap = netdev2adap(dev); + struct tp_params *tp = &adap->params.tp; + u64 ntuple = 0; + + /* Initialize each of the fields which we care about which are present + * in the Compressed Filter Tuple. + */ + if (tp->vlan_shift >= 0 && fs->mask.ivlan) + ntuple |= (FT_VLAN_VLD_F | fs->val.ivlan) << tp->vlan_shift; + + if (tp->port_shift >= 0 && fs->mask.iport) + ntuple |= (u64)fs->val.iport << tp->port_shift; + + if (tp->protocol_shift >= 0) { + if (!fs->val.proto) + ntuple |= (u64)IPPROTO_TCP << tp->protocol_shift; + else + ntuple |= (u64)fs->val.proto << tp->protocol_shift; + } + + if (tp->tos_shift >= 0 && fs->mask.tos) + ntuple |= (u64)(fs->val.tos) << tp->tos_shift; + + if (tp->vnic_shift >= 0) { + if ((adap->params.tp.ingress_config & VNIC_F) && + fs->mask.pfvf_vld) + ntuple |= (u64)((fs->val.pfvf_vld << 16) | + (fs->val.pf << 13) | + (fs->val.vf)) << tp->vnic_shift; + else + ntuple |= (u64)((fs->val.ovlan_vld << 16) | + (fs->val.ovlan)) << tp->vnic_shift; + } + + if (tp->macmatch_shift >= 0 && fs->mask.macidx) + ntuple |= (u64)(fs->val.macidx) << tp->macmatch_shift; + + if (tp->ethertype_shift >= 0 && fs->mask.ethtype) + ntuple |= (u64)(fs->val.ethtype) << tp->ethertype_shift; + + if (tp->matchtype_shift >= 0 && fs->mask.matchtype) + ntuple |= (u64)(fs->val.matchtype) << tp->matchtype_shift; + + if (tp->frag_shift >= 0 && fs->mask.frag) + ntuple |= (u64)(fs->val.frag) << tp->frag_shift; + + if (tp->fcoe_shift >= 0 && fs->mask.fcoe) + ntuple |= (u64)(fs->val.fcoe) << tp->fcoe_shift; + return ntuple; +} + +static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb, + unsigned int qid_filterid, struct adapter *adap) +{ + struct cpl_t6_act_open_req6 *t6req = NULL; + struct cpl_act_open_req6 *req = NULL; + + t6req = (struct cpl_t6_act_open_req6 *)__skb_put(skb, sizeof(*t6req)); + INIT_TP_WR(t6req, 0); + req = (struct cpl_act_open_req6 *)t6req; + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_filterid)); + req->local_port = cpu_to_be16(f->fs.val.lport); + req->peer_port = cpu_to_be16(f->fs.val.fport); + req->local_ip_hi = *(__be64 *)(&f->fs.val.lip); + req->local_ip_lo = *(((__be64 *)&f->fs.val.lip) + 1); + req->peer_ip_hi = *(__be64 *)(&f->fs.val.fip); + req->peer_ip_lo = *(((__be64 *)&f->fs.val.fip) + 1); + req->opt0 = cpu_to_be64(NAGLE_V(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + DELACK_V(f->fs.hitcnts) | + L2T_IDX_V(f->l2t ? f->l2t->idx : 0) | + SMAC_SEL_V((cxgb4_port_viid(f->dev) & + 0x7F) << 1) | + TX_CHAN_V(f->fs.eport) | + NO_CONG_V(f->fs.rpttid) | + ULP_MODE_V(f->fs.nat_mode ? + ULP_MODE_TCPDDP : ULP_MODE_NONE) | + TCAM_BYPASS_F | NON_OFFLOAD_F); + t6req->params = cpu_to_be64(FILTER_TUPLE_V(hash_filter_ntuple(&f->fs, + f->dev))); + t6req->opt2 = htonl(RSS_QUEUE_VALID_F | + RSS_QUEUE_V(f->fs.iq) | + TX_QUEUE_V(f->fs.nat_mode) | + T5_OPT_2_VALID_F | + RX_CHANNEL_F | + CONG_CNTRL_V((f->fs.action == FILTER_DROP) | + (f->fs.dirsteer << 1)) | + PACE_V((f->fs.maskhash) | + ((f->fs.dirsteerhash) << 1)) | + CCTRL_ECN_V(f->fs.action == FILTER_SWITCH)); +} + +static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb, + unsigned int qid_filterid, struct adapter *adap) +{ + struct cpl_t6_act_open_req *t6req = NULL; + struct cpl_act_open_req *req = NULL; + + t6req = (struct cpl_t6_act_open_req *)__skb_put(skb, sizeof(*t6req)); + INIT_TP_WR(t6req, 0); + req = (struct cpl_act_open_req *)t6req; + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_filterid)); + req->local_port = cpu_to_be16(f->fs.val.lport); + req->peer_port = cpu_to_be16(f->fs.val.fport); + req->local_ip = f->fs.val.lip[0] | f->fs.val.lip[1] << 8 | + f->fs.val.lip[2] << 16 | f->fs.val.lip[3] << 24; + req->peer_ip = f->fs.val.fip[0] | f->fs.val.fip[1] << 8 | + f->fs.val.fip[2] << 16 | f->fs.val.fip[3] << 24; + req->opt0 = cpu_to_be64(NAGLE_V(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + DELACK_V(f->fs.hitcnts) | + L2T_IDX_V(f->l2t ? f->l2t->idx : 0) | + SMAC_SEL_V((cxgb4_port_viid(f->dev) & + 0x7F) << 1) | + TX_CHAN_V(f->fs.eport) | + NO_CONG_V(f->fs.rpttid) | + ULP_MODE_V(f->fs.nat_mode ? + ULP_MODE_TCPDDP : ULP_MODE_NONE) | + TCAM_BYPASS_F | NON_OFFLOAD_F); + + t6req->params = cpu_to_be64(FILTER_TUPLE_V(hash_filter_ntuple(&f->fs, + f->dev))); + t6req->opt2 = htonl(RSS_QUEUE_VALID_F | + RSS_QUEUE_V(f->fs.iq) | + TX_QUEUE_V(f->fs.nat_mode) | + T5_OPT_2_VALID_F | + RX_CHANNEL_F | + CONG_CNTRL_V((f->fs.action == FILTER_DROP) | + (f->fs.dirsteer << 1)) | + PACE_V((f->fs.maskhash) | + ((f->fs.dirsteerhash) << 1)) | + CCTRL_ECN_V(f->fs.action == FILTER_SWITCH)); +} + +static int cxgb4_set_hash_filter(struct net_device *dev, + struct ch_filter_specification *fs, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + struct tid_info *t = &adapter->tids; + struct filter_entry *f; + struct sk_buff *skb; + int iq, atid, size; + int ret = 0; + u32 iconf; + + fill_default_mask(fs); + ret = validate_filter(dev, fs); + if (ret) + return ret; + + iq = get_filter_steerq(dev, fs); + if (iq < 0) + return iq; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return -ENOMEM; + + f->fs = *fs; + f->ctx = ctx; + f->dev = dev; + f->fs.iq = iq; + + /* If the new filter requires loopback Destination MAC and/or VLAN + * rewriting then we need to allocate a Layer 2 Table (L2T) entry for + * the filter. + */ + if (f->fs.newdmac || f->fs.newvlan) { + /* allocate L2T entry for new filter */ + f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, + f->fs.eport, f->fs.dmac); + if (!f->l2t) { + ret = -ENOMEM; + goto out_err; + } + } + + /* If the new filter requires loopback Source MAC rewriting then + * we need to allocate a SMT entry for the filter. + */ + if (f->fs.newsmac) { + f->smt = cxgb4_smt_alloc_switching(f->dev, f->fs.smac); + if (!f->smt) { + if (f->l2t) { + cxgb4_l2t_release(f->l2t); + f->l2t = NULL; + } + ret = -ENOMEM; + goto free_l2t; + } + } + + atid = cxgb4_alloc_atid(t, f); + if (atid < 0) { + ret = atid; + goto free_smt; + } + + iconf = adapter->params.tp.ingress_config; + if (iconf & VNIC_F) { + f->fs.val.ovlan = (fs->val.pf << 13) | fs->val.vf; + f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf; + f->fs.val.ovlan_vld = fs->val.pfvf_vld; + f->fs.mask.ovlan_vld = fs->mask.pfvf_vld; + } + + size = sizeof(struct cpl_t6_act_open_req); + if (f->fs.type) { + ret = cxgb4_clip_get(f->dev, (const u32 *)&f->fs.val.lip, 1); + if (ret) + goto free_atid; + + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) { + ret = -ENOMEM; + goto free_clip; + } + + mk_act_open_req6(f, skb, + ((adapter->sge.fw_evtq.abs_id << 14) | atid), + adapter); + } else { + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) { + ret = -ENOMEM; + goto free_atid; + } + + mk_act_open_req(f, skb, + ((adapter->sge.fw_evtq.abs_id << 14) | atid), + adapter); + } + + f->pending = 1; + set_wr_txq(skb, CPL_PRIORITY_SETUP, f->fs.val.iport & 0x3); + t4_ofld_send(adapter, skb); + return 0; + +free_clip: + cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1); + +free_atid: + cxgb4_free_atid(t, atid); + +free_smt: + if (f->smt) { + cxgb4_smt_release(f->smt); + f->smt = NULL; + } + +free_l2t: + if (f->l2t) { + cxgb4_l2t_release(f->l2t); + f->l2t = NULL; + } + +out_err: + kfree(f); + return ret; +} + /* Check a Chelsio Filter Request for validity, convert it into our internal * format and send it to the hardware. Return 0 on success, an error number * otherwise. We attach any provided filter operation context to the internal @@ -447,6 +1194,14 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, u32 iconf; int iq, ret; + if (fs->hash) { + if (is_hashfilter(adapter)) + return cxgb4_set_hash_filter(dev, fs, ctx); + netdev_err(dev, "%s: Exact-match filters only supported with Hash Filter configuration\n", + __func__); + return -EINVAL; + } + max_fidx = adapter->tids.nftids; if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && filter_id >= max_fidx) @@ -568,12 +1323,74 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, return ret; } +static int cxgb4_del_hash_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + struct tid_info *t = &adapter->tids; + struct cpl_abort_req *abort_req; + struct cpl_abort_rpl *abort_rpl; + struct cpl_set_tcb_field *req; + struct ulptx_idata *aligner; + struct work_request_hdr *wr; + struct filter_entry *f; + struct sk_buff *skb; + unsigned int wrlen; + int ret; + + netdev_dbg(dev, "%s: filter_id = %d ; nftids = %d\n", + __func__, filter_id, adapter->tids.nftids); + + if (filter_id > adapter->tids.ntids) + return -E2BIG; + + f = lookup_tid(t, filter_id); + if (!f) { + netdev_err(dev, "%s: no filter entry for filter_id = %d", + __func__, filter_id); + return -EINVAL; + } + + ret = writable_filter(f); + if (ret) + return ret; + + if (!f->valid) + return -EINVAL; + + f->ctx = ctx; + f->pending = 1; + wrlen = roundup(sizeof(*wr) + (sizeof(*req) + sizeof(*aligner)) + + sizeof(*abort_req) + sizeof(*abort_rpl), 16); + skb = alloc_skb(wrlen, GFP_KERNEL); + if (!skb) { + netdev_err(dev, "%s: could not allocate skb ..\n", __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + req = (struct cpl_set_tcb_field *)__skb_put(skb, wrlen); + INIT_ULPTX_WR(req, wrlen, 0, 0); + wr = (struct work_request_hdr *)req; + wr++; + req = (struct cpl_set_tcb_field *)wr; + mk_set_tcb_ulp(f, req, TCB_RSS_INFO_W, TCB_RSS_INFO_V(TCB_RSS_INFO_M), + TCB_RSS_INFO_V(adapter->sge.fw_evtq.abs_id), 0, 1); + aligner = (struct ulptx_idata *)(req + 1); + abort_req = (struct cpl_abort_req *)(aligner + 1); + mk_abort_req_ulp(abort_req, f->tid); + abort_rpl = (struct cpl_abort_rpl *)(abort_req + 1); + mk_abort_rpl_ulp(abort_rpl, f->tid); + t4_ofld_send(adapter, skb); + return 0; +} + /* Check a delete filter request for validity and send it to the hardware. * Return 0 on success, an error number otherwise. We attach any provided * filter operation context to the internal filter specification in order to * facilitate signaling completion of the operation. */ int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, struct filter_ctx *ctx) { struct adapter *adapter = netdev2adap(dev); @@ -581,6 +1398,14 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id, unsigned int max_fidx; int ret; + if (fs && fs->hash) { + if (is_hashfilter(adapter)) + return cxgb4_del_hash_filter(dev, filter_id, ctx); + netdev_err(dev, "%s: Exact-match filters only supported with Hash Filter configuration\n", + __func__); + return -EINVAL; + } + max_fidx = adapter->tids.nftids; if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && filter_id >= max_fidx) @@ -631,14 +1456,19 @@ out: return ret; } -int cxgb4_del_filter(struct net_device *dev, int filter_id) +int cxgb4_del_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs) { struct filter_ctx ctx; int ret; + /* If we are shutting down the adapter do not wait for completion */ + if (netdev2adap(dev)->flags & SHUTTING_DOWN) + return __cxgb4_del_filter(dev, filter_id, fs, NULL); + init_completion(&ctx.completion); - ret = __cxgb4_del_filter(dev, filter_id, &ctx); + ret = __cxgb4_del_filter(dev, filter_id, fs, &ctx); if (ret) goto out; @@ -652,6 +1482,157 @@ out: return ret; } +static int configure_filter_tcb(struct adapter *adap, unsigned int tid, + struct filter_entry *f) +{ + if (f->fs.hitcnts) + set_tcb_field(adap, f, tid, TCB_TIMESTAMP_W, + TCB_TIMESTAMP_V(TCB_TIMESTAMP_M) | + TCB_RTT_TS_RECENT_AGE_V(TCB_RTT_TS_RECENT_AGE_M), + TCB_TIMESTAMP_V(0ULL) | + TCB_RTT_TS_RECENT_AGE_V(0ULL), + 1); + + if (f->fs.newdmac) + set_tcb_tflag(adap, f, tid, TF_CCTRL_ECE_S, 1, + 1); + + if (f->fs.newvlan == VLAN_INSERT || + f->fs.newvlan == VLAN_REWRITE) + set_tcb_tflag(adap, f, tid, TF_CCTRL_RFR_S, 1, + 1); + if (f->fs.newsmac) + configure_filter_smac(adap, f); + + if (f->fs.nat_mode) { + switch (f->fs.nat_mode) { + case NAT_MODE_DIP: + set_nat_params(adap, f, tid, true, false, false, false); + break; + + case NAT_MODE_DIP_DP: + set_nat_params(adap, f, tid, true, false, true, false); + break; + + case NAT_MODE_DIP_DP_SIP: + set_nat_params(adap, f, tid, true, true, true, false); + break; + case NAT_MODE_DIP_DP_SP: + set_nat_params(adap, f, tid, true, false, true, true); + break; + + case NAT_MODE_SIP_SP: + set_nat_params(adap, f, tid, false, true, false, true); + break; + + case NAT_MODE_DIP_SIP_SP: + set_nat_params(adap, f, tid, true, true, false, true); + break; + + case NAT_MODE_ALL: + set_nat_params(adap, f, tid, true, true, true, true); + break; + + default: + pr_err("%s: Invalid NAT mode: %d\n", + __func__, f->fs.nat_mode); + return -EINVAL; + } + } + return 0; +} + +void hash_del_filter_rpl(struct adapter *adap, + const struct cpl_abort_rpl_rss *rpl) +{ + unsigned int status = rpl->status; + struct tid_info *t = &adap->tids; + unsigned int tid = GET_TID(rpl); + struct filter_ctx *ctx = NULL; + struct filter_entry *f; + + dev_dbg(adap->pdev_dev, "%s: status = %u; tid = %u\n", + __func__, status, tid); + + f = lookup_tid(t, tid); + if (!f) { + dev_err(adap->pdev_dev, "%s:could not find filter entry", + __func__); + return; + } + ctx = f->ctx; + f->ctx = NULL; + clear_filter(adap, f); + cxgb4_remove_tid(t, 0, tid, 0); + kfree(f); + if (ctx) { + ctx->result = 0; + complete(&ctx->completion); + } +} + +void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl) +{ + unsigned int ftid = TID_TID_G(AOPEN_ATID_G(ntohl(rpl->atid_status))); + unsigned int status = AOPEN_STATUS_G(ntohl(rpl->atid_status)); + struct tid_info *t = &adap->tids; + unsigned int tid = GET_TID(rpl); + struct filter_ctx *ctx = NULL; + struct filter_entry *f; + + dev_dbg(adap->pdev_dev, "%s: tid = %u; atid = %u; status = %u\n", + __func__, tid, ftid, status); + + f = lookup_atid(t, ftid); + if (!f) { + dev_err(adap->pdev_dev, "%s:could not find filter entry", + __func__); + return; + } + ctx = f->ctx; + f->ctx = NULL; + + switch (status) { + case CPL_ERR_NONE: + f->tid = tid; + f->pending = 0; + f->valid = 1; + cxgb4_insert_tid(t, f, f->tid, 0); + cxgb4_free_atid(t, ftid); + if (ctx) { + ctx->tid = f->tid; + ctx->result = 0; + } + if (configure_filter_tcb(adap, tid, f)) { + clear_filter(adap, f); + cxgb4_remove_tid(t, 0, tid, 0); + kfree(f); + if (ctx) { + ctx->result = -EINVAL; + complete(&ctx->completion); + } + return; + } + break; + + default: + dev_err(adap->pdev_dev, "%s: filter creation PROBLEM; status = %u\n", + __func__, status); + + if (ctx) { + if (status == CPL_ERR_TCAM_FULL) + ctx->result = -EAGAIN; + else + ctx->result = -EINVAL; + } + clear_filter(adap, f); + cxgb4_free_atid(t, ftid); + kfree(f); + } + if (ctx) + complete(&ctx->completion); +} + /* Handle a filter write/deletion reply. */ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) { @@ -690,19 +1671,23 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) clear_filter(adap, f); if (ctx) ctx->result = 0; - } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { - dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", - idx); - clear_filter(adap, f); - if (ctx) - ctx->result = -ENOMEM; } else if (ret == FW_FILTER_WR_FLT_ADDED) { - f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; - f->pending = 0; /* asynchronous setup completed */ - f->valid = 1; - if (ctx) { - ctx->result = 0; - ctx->tid = idx; + int err = 0; + + if (f->fs.newsmac) + err = configure_filter_smac(adap, f); + + if (!err) { + f->pending = 0; /* async setup completed */ + f->valid = 1; + if (ctx) { + ctx->result = 0; + ctx->tid = idx; + } + } else { + clear_filter(adap, f); + if (ctx) + ctx->result = err; } } else { /* Something went wrong. Issue a warning about the @@ -718,3 +1703,25 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) complete(&ctx->completion); } } + +int init_hash_filter(struct adapter *adap) +{ + /* On T6, verify the necessary register configs and warn the user in + * case of improper config + */ + if (is_t6(adap->params.chip)) { + if (TCAM_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_0_A)) != 4) + goto err; + + if (HASH_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_1_A)) != 4) + goto err; + } else { + dev_err(adap->pdev_dev, "Hash filter supported only on T6\n"); + return -EINVAL; + } + adap->params.hash_filter = 1; + return 0; +err: + dev_warn(adap->pdev_dev, "Invalid hash filter config!\n"); + return -EINVAL; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h index 23742cb1c69f..8db5fca6dcc9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -37,7 +37,12 @@ #include "t4_msg.h" +#define WORD_MASK 0xffffffff + void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl); +void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl); +void hash_del_filter_rpl(struct adapter *adap, + const struct cpl_abort_rpl_rss *rpl); void clear_filter(struct adapter *adap, struct filter_entry *f); int set_filter_wr(struct adapter *adapter, int fidx); @@ -45,4 +50,7 @@ int delete_filter(struct adapter *adapter, unsigned int fidx); int writable_filter(struct filter_entry *f); void clear_all_filters(struct adapter *adapter); +int init_hash_filter(struct adapter *adap); +bool is_filter_exact_match(struct adapter *adap, + struct ch_filter_specification *fs); #endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 92d9d795d874..6f900ffe25cc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -77,9 +77,12 @@ #include "cxgb4_debugfs.h" #include "clip_tbl.h" #include "l2t.h" +#include "smt.h" #include "sched.h" #include "cxgb4_tc_u32.h" +#include "cxgb4_tc_flower.h" #include "cxgb4_ptp.h" +#include "cxgb4_cudbg.h" char cxgb4_driver_name[] = KBUILD_MODNAME; @@ -280,7 +283,7 @@ void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat) else { #ifdef CONFIG_CHELSIO_T4_DCB if (cxgb4_dcb_enabled(dev)) { - cxgb4_dcb_state_init(dev); + cxgb4_dcb_reset(dev); dcb_tx_queue_prio_enable(dev, false); } #endif /* CONFIG_CHELSIO_T4_DCB */ @@ -561,10 +564,22 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, const struct cpl_l2t_write_rpl *p = (void *)rsp; do_l2t_write_rpl(q->adap, p); + } else if (opcode == CPL_SMT_WRITE_RPL) { + const struct cpl_smt_write_rpl *p = (void *)rsp; + + do_smt_write_rpl(q->adap, p); } else if (opcode == CPL_SET_TCB_RPL) { const struct cpl_set_tcb_rpl *p = (void *)rsp; filter_rpl(q->adap, p); + } else if (opcode == CPL_ACT_OPEN_RPL) { + const struct cpl_act_open_rpl *p = (void *)rsp; + + hash_filter_rpl(q->adap, p); + } else if (opcode == CPL_ABORT_RPL_RSS) { + const struct cpl_abort_rpl_rss *p = (void *)rsp; + + hash_del_filter_rpl(q->adap, p); } else dev_err(q->adap->pdev_dev, "unexpected CPL %#x on FW event queue\n", opcode); @@ -1637,7 +1652,7 @@ void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4, struct adapter *adap = pci_get_drvdata(pdev); spin_lock(&adap->stats_lock); - t4_tp_get_tcp_stats(adap, v4, v6); + t4_tp_get_tcp_stats(adap, v4, v6, false); spin_unlock(&adap->stats_lock); } EXPORT_SYMBOL(cxgb4_get_tcp_stats); @@ -2303,10 +2318,16 @@ static int cxgb_close(struct net_device *dev) { struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; + int ret; netif_tx_stop_all_queues(dev); netif_carrier_off(dev); - return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); + ret = t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); +#ifdef CONFIG_CHELSIO_T4_DCB + cxgb4_dcb_reset(dev); + dcb_tx_queue_prio_enable(dev, false); +#endif + return ret; } int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, @@ -2873,11 +2894,28 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +static int cxgb_setup_tc_flower(struct net_device *dev, + struct tc_cls_flower_offload *cls_flower) +{ + if (cls_flower->common.chain_index) + return -EOPNOTSUPP; + + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return cxgb4_tc_flower_replace(dev, cls_flower); + case TC_CLSFLOWER_DESTROY: + return cxgb4_tc_flower_destroy(dev, cls_flower); + case TC_CLSFLOWER_STATS: + return cxgb4_tc_flower_stats(dev, cls_flower); + default: + return -EOPNOTSUPP; + } +} + static int cxgb_setup_tc_cls_u32(struct net_device *dev, struct tc_cls_u32_offload *cls_u32) { - if (!is_classid_clsact_ingress(cls_u32->common.classid) || - cls_u32->common.chain_index) + if (cls_u32->common.chain_index) return -EOPNOTSUPP; switch (cls_u32->command) { @@ -2891,9 +2929,10 @@ static int cxgb_setup_tc_cls_u32(struct net_device *dev, } } -static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) +static int cxgb_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) { + struct net_device *dev = cb_priv; struct port_info *pi = netdev2pinfo(dev); struct adapter *adap = netdev2adap(dev); @@ -2904,9 +2943,45 @@ static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type, return -EINVAL; } + if (!tc_can_offload(dev)) + return -EOPNOTSUPP; + switch (type) { case TC_SETUP_CLSU32: return cxgb_setup_tc_cls_u32(dev, type_data); + case TC_SETUP_CLSFLOWER: + return cxgb_setup_tc_flower(dev, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int cxgb_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct port_info *pi = netdev2pinfo(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, cxgb_setup_tc_block_cb, + pi, dev); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, cxgb_setup_tc_block_cb, pi); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return cxgb_setup_tc_block(dev, type_data); default: return -EOPNOTSUPP; } @@ -3876,6 +3951,16 @@ static int adap_init0(struct adapter *adap) 1, params, val); adap->params.fr_nsmr_tpte_wr_support = (ret == 0 && val[0] != 0); + /* See if FW supports FW_FILTER2 work request */ + if (is_t4(adap->params.chip)) { + adap->params.filter2_wr_support = 0; + } else { + params[0] = FW_PARAM_DEV(FILTER2_WR); + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, + 1, params, val); + adap->params.filter2_wr_support = (ret == 0 && val[0] != 0); + } + /* * Get device capabilities so we can determine what resources we need * to manage. @@ -3889,7 +3974,8 @@ static int adap_init0(struct adapter *adap) if (ret < 0) goto bye; - if (caps_cmd.ofldcaps) { + if (caps_cmd.ofldcaps || + (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER))) { /* query offload-related parameters */ params[0] = FW_PARAM_DEV(NTID); params[1] = FW_PARAM_PFVF(SERVER_START); @@ -3926,8 +4012,13 @@ static int adap_init0(struct adapter *adap) adap->vres.ddp.size = val[4] - val[3] + 1; adap->params.ofldq_wr_cred = val[5]; - adap->params.offload = 1; - adap->num_ofld_uld += 1; + if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) { + if (init_hash_filter(adap) < 0) + goto bye; + } else { + adap->params.offload = 1; + adap->num_ofld_uld += 1; + } } if (caps_cmd.rdmacaps) { params[0] = FW_PARAM_PFVF(STAG_START); @@ -4048,7 +4139,7 @@ static int adap_init0(struct adapter *adap) } t4_init_sge_params(adap); adap->flags |= FW_OK; - t4_init_tp_params(adap); + t4_init_tp_params(adap, true); return 0; /* @@ -4612,9 +4703,11 @@ static void free_some_resources(struct adapter *adapter) { unsigned int i; + kvfree(adapter->smt); kvfree(adapter->l2t); t4_cleanup_sched(adapter); kvfree(adapter->tids.tid_tab); + cxgb4_cleanup_tc_flower(adapter); cxgb4_cleanup_tc_u32(adapter); kfree(adapter->sge.egr_map); kfree(adapter->sge.ingr_map); @@ -4995,7 +5088,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; /* MTU range: 81 - 9600 */ - netdev->min_mtu = 81; + netdev->min_mtu = 81; /* accommodate SACK */ netdev->max_mtu = MAX_MTU; netdev->netdev_ops = &cxgb4_netdev_ops; @@ -5006,6 +5099,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) cxgb4_set_ethtool_ops(netdev); } + cxgb4_init_ethtool_dump(adapter); + pci_set_drvdata(pdev, adapter); if (adapter->flags & FW_OK) { @@ -5035,6 +5130,12 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) */ cfg_queues(adapter); + adapter->smt = t4_init_smt(); + if (!adapter->smt) { + /* We tolerate a lack of SMT, giving up some functionality */ + dev_warn(&pdev->dev, "could not allocate SMT, continuing\n"); + } + adapter->l2t = t4_init_l2t(adapter->l2t_start, adapter->l2t_end); if (!adapter->l2t) { /* We tolerate a lack of L2T, giving up some functionality */ @@ -5083,9 +5184,13 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (!adapter->tc_u32) dev_warn(&pdev->dev, "could not offload tc u32, continuing\n"); + + if (cxgb4_init_tc_flower(adapter)) + dev_warn(&pdev->dev, + "could not offload tc flower, continuing\n"); } - if (is_offload(adapter)) { + if (is_offload(adapter) || is_hashfilter(adapter)) { if (t4_read_reg(adapter, LE_DB_CONFIG_A) & HASHEN_F) { u32 hash_base, hash_reg; @@ -5254,6 +5359,8 @@ static void remove_one(struct pci_dev *pdev) return; } + adapter->flags |= SHUTTING_DOWN; + if (adapter->pf == 4) { int i; @@ -5339,6 +5446,8 @@ static void shutdown_one(struct pci_dev *pdev) return; } + adapter->flags |= SHUTTING_DOWN; + if (adapter->pf == 4) { int i; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c new file mode 100644 index 000000000000..d4a548a6a55c --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -0,0 +1,876 @@ +/* + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_pedit.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_vlan.h> + +#include "cxgb4.h" +#include "cxgb4_filter.h" +#include "cxgb4_tc_flower.h" + +#define STATS_CHECK_PERIOD (HZ / 2) + +struct ch_tc_pedit_fields pedits[] = { + PEDIT_FIELDS(ETH_, DMAC_31_0, 4, dmac, 0), + PEDIT_FIELDS(ETH_, DMAC_47_32, 2, dmac, 4), + PEDIT_FIELDS(ETH_, SMAC_15_0, 2, smac, 0), + PEDIT_FIELDS(ETH_, SMAC_47_16, 4, smac, 2), + PEDIT_FIELDS(IP4_, SRC, 4, nat_fip, 0), + PEDIT_FIELDS(IP4_, DST, 4, nat_lip, 0), + PEDIT_FIELDS(IP6_, SRC_31_0, 4, nat_fip, 0), + PEDIT_FIELDS(IP6_, SRC_63_32, 4, nat_fip, 4), + PEDIT_FIELDS(IP6_, SRC_95_64, 4, nat_fip, 8), + PEDIT_FIELDS(IP6_, SRC_127_96, 4, nat_fip, 12), + PEDIT_FIELDS(IP6_, DST_31_0, 4, nat_lip, 0), + PEDIT_FIELDS(IP6_, DST_63_32, 4, nat_lip, 4), + PEDIT_FIELDS(IP6_, DST_95_64, 4, nat_lip, 8), + PEDIT_FIELDS(IP6_, DST_127_96, 4, nat_lip, 12), + PEDIT_FIELDS(TCP_, SPORT, 2, nat_fport, 0), + PEDIT_FIELDS(TCP_, DPORT, 2, nat_lport, 0), + PEDIT_FIELDS(UDP_, SPORT, 2, nat_fport, 0), + PEDIT_FIELDS(UDP_, DPORT, 2, nat_lport, 0), +}; + +static struct ch_tc_flower_entry *allocate_flower_entry(void) +{ + struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL); + spin_lock_init(&new->lock); + return new; +} + +/* Must be called with either RTNL or rcu_read_lock */ +static struct ch_tc_flower_entry *ch_flower_lookup(struct adapter *adap, + unsigned long flower_cookie) +{ + return rhashtable_lookup_fast(&adap->flower_tbl, &flower_cookie, + adap->flower_ht_params); +} + +static void cxgb4_process_flow_match(struct net_device *dev, + struct tc_cls_flower_offload *cls, + struct ch_filter_specification *fs) +{ + u16 addr_type = 0; + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_CONTROL, + cls->key); + + addr_type = key->addr_type; + } + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_BASIC, + cls->key); + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_BASIC, + cls->mask); + u16 ethtype_key = ntohs(key->n_proto); + u16 ethtype_mask = ntohs(mask->n_proto); + + if (ethtype_key == ETH_P_ALL) { + ethtype_key = 0; + ethtype_mask = 0; + } + + fs->val.ethtype = ethtype_key; + fs->mask.ethtype = ethtype_mask; + fs->val.proto = key->ip_proto; + fs->mask.proto = mask->ip_proto; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + cls->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + cls->mask); + fs->type = 0; + memcpy(&fs->val.lip[0], &key->dst, sizeof(key->dst)); + memcpy(&fs->val.fip[0], &key->src, sizeof(key->src)); + memcpy(&fs->mask.lip[0], &mask->dst, sizeof(mask->dst)); + memcpy(&fs->mask.fip[0], &mask->src, sizeof(mask->src)); + + /* also initialize nat_lip/fip to same values */ + memcpy(&fs->nat_lip[0], &key->dst, sizeof(key->dst)); + memcpy(&fs->nat_fip[0], &key->src, sizeof(key->src)); + + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + cls->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + cls->mask); + + fs->type = 1; + memcpy(&fs->val.lip[0], key->dst.s6_addr, sizeof(key->dst)); + memcpy(&fs->val.fip[0], key->src.s6_addr, sizeof(key->src)); + memcpy(&fs->mask.lip[0], mask->dst.s6_addr, sizeof(mask->dst)); + memcpy(&fs->mask.fip[0], mask->src.s6_addr, sizeof(mask->src)); + + /* also initialize nat_lip/fip to same values */ + memcpy(&fs->nat_lip[0], key->dst.s6_addr, sizeof(key->dst)); + memcpy(&fs->nat_fip[0], key->src.s6_addr, sizeof(key->src)); + } + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_dissector_key_ports *key, *mask; + + key = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_PORTS, + cls->key); + mask = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_PORTS, + cls->mask); + fs->val.lport = cpu_to_be16(key->dst); + fs->mask.lport = cpu_to_be16(mask->dst); + fs->val.fport = cpu_to_be16(key->src); + fs->mask.fport = cpu_to_be16(mask->src); + + /* also initialize nat_lport/fport to same values */ + fs->nat_lport = cpu_to_be16(key->dst); + fs->nat_fport = cpu_to_be16(key->src); + } + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) { + struct flow_dissector_key_ip *key, *mask; + + key = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IP, + cls->key); + mask = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IP, + cls->mask); + fs->val.tos = key->tos; + fs->mask.tos = mask->tos; + } + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key, *mask; + u16 vlan_tci, vlan_tci_mask; + + key = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_VLAN, + cls->key); + mask = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_VLAN, + cls->mask); + vlan_tci = key->vlan_id | (key->vlan_priority << + VLAN_PRIO_SHIFT); + vlan_tci_mask = mask->vlan_id | (mask->vlan_priority << + VLAN_PRIO_SHIFT); + fs->val.ivlan = cpu_to_be16(vlan_tci); + fs->mask.ivlan = cpu_to_be16(vlan_tci_mask); + + /* Chelsio adapters use ivlan_vld bit to match vlan packets + * as 802.1Q. Also, when vlan tag is present in packets, + * ethtype match is used then to match on ethtype of inner + * header ie. the header following the vlan header. + * So, set the ivlan_vld based on ethtype info supplied by + * TC for vlan packets if its 802.1Q. And then reset the + * ethtype value else, hw will try to match the supplied + * ethtype value with ethtype of inner header. + */ + if (fs->val.ethtype == ETH_P_8021Q) { + fs->val.ivlan_vld = 1; + fs->mask.ivlan_vld = 1; + fs->val.ethtype = 0; + fs->mask.ethtype = 0; + } + } + + /* Match only packets coming from the ingress port where this + * filter will be created. + */ + fs->val.iport = netdev2pinfo(dev)->port_id; + fs->mask.iport = ~0; +} + +static int cxgb4_validate_flow_match(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + u16 ethtype_mask = 0; + u16 ethtype_key = 0; + + if (cls->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_IP))) { + netdev_warn(dev, "Unsupported key used: 0x%x\n", + cls->dissector->used_keys); + return -EOPNOTSUPP; + } + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_BASIC, + cls->key); + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_BASIC, + cls->mask); + ethtype_key = ntohs(key->n_proto); + ethtype_mask = ntohs(mask->n_proto); + } + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) { + u16 eth_ip_type = ethtype_key & ethtype_mask; + struct flow_dissector_key_ip *mask; + + if (eth_ip_type != ETH_P_IP && eth_ip_type != ETH_P_IPV6) { + netdev_err(dev, "IP Key supported only with IPv4/v6"); + return -EINVAL; + } + + mask = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IP, + cls->mask); + if (mask->ttl) { + netdev_warn(dev, "ttl match unsupported for offload"); + return -EOPNOTSUPP; + } + } + + return 0; +} + +static void offload_pedit(struct ch_filter_specification *fs, u32 val, u32 mask, + u8 field) +{ + u32 set_val = val & ~mask; + u32 offset = 0; + u8 size = 1; + int i; + + for (i = 0; i < ARRAY_SIZE(pedits); i++) { + if (pedits[i].field == field) { + offset = pedits[i].offset; + size = pedits[i].size; + break; + } + } + memcpy((u8 *)fs + offset, &set_val, size); +} + +static void process_pedit_field(struct ch_filter_specification *fs, u32 val, + u32 mask, u32 offset, u8 htype) +{ + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + switch (offset) { + case PEDIT_ETH_DMAC_31_0: + fs->newdmac = 1; + offload_pedit(fs, val, mask, ETH_DMAC_31_0); + break; + case PEDIT_ETH_DMAC_47_32_SMAC_15_0: + if (~mask & PEDIT_ETH_DMAC_MASK) + offload_pedit(fs, val, mask, ETH_DMAC_47_32); + else + offload_pedit(fs, val >> 16, mask >> 16, + ETH_SMAC_15_0); + break; + case PEDIT_ETH_SMAC_47_16: + fs->newsmac = 1; + offload_pedit(fs, val, mask, ETH_SMAC_47_16); + } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + switch (offset) { + case PEDIT_IP4_SRC: + offload_pedit(fs, val, mask, IP4_SRC); + break; + case PEDIT_IP4_DST: + offload_pedit(fs, val, mask, IP4_DST); + } + fs->nat_mode = NAT_MODE_ALL; + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + switch (offset) { + case PEDIT_IP6_SRC_31_0: + offload_pedit(fs, val, mask, IP6_SRC_31_0); + break; + case PEDIT_IP6_SRC_63_32: + offload_pedit(fs, val, mask, IP6_SRC_63_32); + break; + case PEDIT_IP6_SRC_95_64: + offload_pedit(fs, val, mask, IP6_SRC_95_64); + break; + case PEDIT_IP6_SRC_127_96: + offload_pedit(fs, val, mask, IP6_SRC_127_96); + break; + case PEDIT_IP6_DST_31_0: + offload_pedit(fs, val, mask, IP6_DST_31_0); + break; + case PEDIT_IP6_DST_63_32: + offload_pedit(fs, val, mask, IP6_DST_63_32); + break; + case PEDIT_IP6_DST_95_64: + offload_pedit(fs, val, mask, IP6_DST_95_64); + break; + case PEDIT_IP6_DST_127_96: + offload_pedit(fs, val, mask, IP6_DST_127_96); + } + fs->nat_mode = NAT_MODE_ALL; + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + switch (offset) { + case PEDIT_TCP_SPORT_DPORT: + if (~mask & PEDIT_TCP_UDP_SPORT_MASK) + offload_pedit(fs, cpu_to_be32(val) >> 16, + cpu_to_be32(mask) >> 16, + TCP_SPORT); + else + offload_pedit(fs, cpu_to_be32(val), + cpu_to_be32(mask), TCP_DPORT); + } + fs->nat_mode = NAT_MODE_ALL; + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + switch (offset) { + case PEDIT_UDP_SPORT_DPORT: + if (~mask & PEDIT_TCP_UDP_SPORT_MASK) + offload_pedit(fs, cpu_to_be32(val) >> 16, + cpu_to_be32(mask) >> 16, + UDP_SPORT); + else + offload_pedit(fs, cpu_to_be32(val), + cpu_to_be32(mask), UDP_DPORT); + } + fs->nat_mode = NAT_MODE_ALL; + } +} + +static void cxgb4_process_flow_actions(struct net_device *in, + struct tc_cls_flower_offload *cls, + struct ch_filter_specification *fs) +{ + const struct tc_action *a; + LIST_HEAD(actions); + + tcf_exts_to_list(cls->exts, &actions); + list_for_each_entry(a, &actions, list) { + if (is_tcf_gact_ok(a)) { + fs->action = FILTER_PASS; + } else if (is_tcf_gact_shot(a)) { + fs->action = FILTER_DROP; + } else if (is_tcf_mirred_egress_redirect(a)) { + int ifindex = tcf_mirred_ifindex(a); + struct net_device *out = __dev_get_by_index(dev_net(in), + ifindex); + struct port_info *pi = netdev_priv(out); + + fs->action = FILTER_SWITCH; + fs->eport = pi->port_id; + } else if (is_tcf_vlan(a)) { + u32 vlan_action = tcf_vlan_action(a); + u8 prio = tcf_vlan_push_prio(a); + u16 vid = tcf_vlan_push_vid(a); + u16 vlan_tci = (prio << VLAN_PRIO_SHIFT) | vid; + + switch (vlan_action) { + case TCA_VLAN_ACT_POP: + fs->newvlan |= VLAN_REMOVE; + break; + case TCA_VLAN_ACT_PUSH: + fs->newvlan |= VLAN_INSERT; + fs->vlan = vlan_tci; + break; + case TCA_VLAN_ACT_MODIFY: + fs->newvlan |= VLAN_REWRITE; + fs->vlan = vlan_tci; + break; + default: + break; + } + } else if (is_tcf_pedit(a)) { + u32 mask, val, offset; + int nkeys, i; + u8 htype; + + nkeys = tcf_pedit_nkeys(a); + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + mask = tcf_pedit_mask(a, i); + val = tcf_pedit_val(a, i); + offset = tcf_pedit_offset(a, i); + + process_pedit_field(fs, val, mask, offset, + htype); + } + } + } +} + +static bool valid_l4_mask(u32 mask) +{ + u16 hi, lo; + + /* Either the upper 16-bits (SPORT) OR the lower + * 16-bits (DPORT) can be set, but NOT BOTH. + */ + hi = (mask >> 16) & 0xFFFF; + lo = mask & 0xFFFF; + + return hi && lo ? false : true; +} + +static bool valid_pedit_action(struct net_device *dev, + const struct tc_action *a) +{ + u32 mask, offset; + u8 cmd, htype; + int nkeys, i; + + nkeys = tcf_pedit_nkeys(a); + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + cmd = tcf_pedit_cmd(a, i); + mask = tcf_pedit_mask(a, i); + offset = tcf_pedit_offset(a, i); + + if (cmd != TCA_PEDIT_KEY_EX_CMD_SET) { + netdev_err(dev, "%s: Unsupported pedit cmd\n", + __func__); + return false; + } + + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + switch (offset) { + case PEDIT_ETH_DMAC_31_0: + case PEDIT_ETH_DMAC_47_32_SMAC_15_0: + case PEDIT_ETH_SMAC_47_16: + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + switch (offset) { + case PEDIT_IP4_SRC: + case PEDIT_IP4_DST: + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + switch (offset) { + case PEDIT_IP6_SRC_31_0: + case PEDIT_IP6_SRC_63_32: + case PEDIT_IP6_SRC_95_64: + case PEDIT_IP6_SRC_127_96: + case PEDIT_IP6_DST_31_0: + case PEDIT_IP6_DST_63_32: + case PEDIT_IP6_DST_95_64: + case PEDIT_IP6_DST_127_96: + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + switch (offset) { + case PEDIT_TCP_SPORT_DPORT: + if (!valid_l4_mask(~mask)) { + netdev_err(dev, "%s: Unsupported mask for TCP L4 ports\n", + __func__); + return false; + } + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + switch (offset) { + case PEDIT_UDP_SPORT_DPORT: + if (!valid_l4_mask(~mask)) { + netdev_err(dev, "%s: Unsupported mask for UDP L4 ports\n", + __func__); + return false; + } + break; + default: + netdev_err(dev, "%s: Unsupported pedit field\n", + __func__); + return false; + } + break; + default: + netdev_err(dev, "%s: Unsupported pedit type\n", + __func__); + return false; + } + } + return true; +} + +static int cxgb4_validate_flow_actions(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + const struct tc_action *a; + bool act_redir = false; + bool act_pedit = false; + bool act_vlan = false; + LIST_HEAD(actions); + + tcf_exts_to_list(cls->exts, &actions); + list_for_each_entry(a, &actions, list) { + if (is_tcf_gact_ok(a)) { + /* Do nothing */ + } else if (is_tcf_gact_shot(a)) { + /* Do nothing */ + } else if (is_tcf_mirred_egress_redirect(a)) { + struct adapter *adap = netdev2adap(dev); + struct net_device *n_dev; + unsigned int i, ifindex; + bool found = false; + + ifindex = tcf_mirred_ifindex(a); + for_each_port(adap, i) { + n_dev = adap->port[i]; + if (ifindex == n_dev->ifindex) { + found = true; + break; + } + } + + /* If interface doesn't belong to our hw, then + * the provided output port is not valid + */ + if (!found) { + netdev_err(dev, "%s: Out port invalid\n", + __func__); + return -EINVAL; + } + act_redir = true; + } else if (is_tcf_vlan(a)) { + u16 proto = be16_to_cpu(tcf_vlan_push_proto(a)); + u32 vlan_action = tcf_vlan_action(a); + + switch (vlan_action) { + case TCA_VLAN_ACT_POP: + break; + case TCA_VLAN_ACT_PUSH: + case TCA_VLAN_ACT_MODIFY: + if (proto != ETH_P_8021Q) { + netdev_err(dev, "%s: Unsupported vlan proto\n", + __func__); + return -EOPNOTSUPP; + } + break; + default: + netdev_err(dev, "%s: Unsupported vlan action\n", + __func__); + return -EOPNOTSUPP; + } + act_vlan = true; + } else if (is_tcf_pedit(a)) { + bool pedit_valid = valid_pedit_action(dev, a); + + if (!pedit_valid) + return -EOPNOTSUPP; + act_pedit = true; + } else { + netdev_err(dev, "%s: Unsupported action\n", __func__); + return -EOPNOTSUPP; + } + } + + if ((act_pedit || act_vlan) && !act_redir) { + netdev_err(dev, "%s: pedit/vlan rewrite invalid without egress redirect\n", + __func__); + return -EINVAL; + } + + return 0; +} + +int cxgb4_tc_flower_replace(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + struct adapter *adap = netdev2adap(dev); + struct ch_tc_flower_entry *ch_flower; + struct ch_filter_specification *fs; + struct filter_ctx ctx; + int fidx; + int ret; + + if (cxgb4_validate_flow_actions(dev, cls)) + return -EOPNOTSUPP; + + if (cxgb4_validate_flow_match(dev, cls)) + return -EOPNOTSUPP; + + ch_flower = allocate_flower_entry(); + if (!ch_flower) { + netdev_err(dev, "%s: ch_flower alloc failed.\n", __func__); + return -ENOMEM; + } + + fs = &ch_flower->fs; + fs->hitcnts = 1; + cxgb4_process_flow_match(dev, cls, fs); + cxgb4_process_flow_actions(dev, cls, fs); + + fs->hash = is_filter_exact_match(adap, fs); + if (fs->hash) { + fidx = 0; + } else { + fidx = cxgb4_get_free_ftid(dev, fs->type ? PF_INET6 : PF_INET); + if (fidx < 0) { + netdev_err(dev, "%s: No fidx for offload.\n", __func__); + ret = -ENOMEM; + goto free_entry; + } + } + + init_completion(&ctx.completion); + ret = __cxgb4_set_filter(dev, fidx, fs, &ctx); + if (ret) { + netdev_err(dev, "%s: filter creation err %d\n", + __func__, ret); + goto free_entry; + } + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) { + ret = -ETIMEDOUT; + goto free_entry; + } + + ret = ctx.result; + /* Check if hw returned error for filter creation */ + if (ret) { + netdev_err(dev, "%s: filter creation err %d\n", + __func__, ret); + goto free_entry; + } + + ch_flower->tc_flower_cookie = cls->cookie; + ch_flower->filter_id = ctx.tid; + ret = rhashtable_insert_fast(&adap->flower_tbl, &ch_flower->node, + adap->flower_ht_params); + if (ret) + goto del_filter; + + return 0; + +del_filter: + cxgb4_del_filter(dev, ch_flower->filter_id, &ch_flower->fs); + +free_entry: + kfree(ch_flower); + return ret; +} + +int cxgb4_tc_flower_destroy(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + struct adapter *adap = netdev2adap(dev); + struct ch_tc_flower_entry *ch_flower; + int ret; + + ch_flower = ch_flower_lookup(adap, cls->cookie); + if (!ch_flower) + return -ENOENT; + + ret = cxgb4_del_filter(dev, ch_flower->filter_id, &ch_flower->fs); + if (ret) + goto err; + + ret = rhashtable_remove_fast(&adap->flower_tbl, &ch_flower->node, + adap->flower_ht_params); + if (ret) { + netdev_err(dev, "Flow remove from rhashtable failed"); + goto err; + } + kfree_rcu(ch_flower, rcu); + +err: + return ret; +} + +static void ch_flower_stats_handler(struct work_struct *work) +{ + struct adapter *adap = container_of(work, struct adapter, + flower_stats_work); + struct ch_tc_flower_entry *flower_entry; + struct ch_tc_flower_stats *ofld_stats; + struct rhashtable_iter iter; + u64 packets; + u64 bytes; + int ret; + + rhashtable_walk_enter(&adap->flower_tbl, &iter); + do { + flower_entry = ERR_PTR(rhashtable_walk_start(&iter)); + if (IS_ERR(flower_entry)) + goto walk_stop; + + while ((flower_entry = rhashtable_walk_next(&iter)) && + !IS_ERR(flower_entry)) { + ret = cxgb4_get_filter_counters(adap->port[0], + flower_entry->filter_id, + &packets, &bytes, + flower_entry->fs.hash); + if (!ret) { + spin_lock(&flower_entry->lock); + ofld_stats = &flower_entry->stats; + + if (ofld_stats->prev_packet_count != packets) { + ofld_stats->prev_packet_count = packets; + ofld_stats->last_used = jiffies; + } + spin_unlock(&flower_entry->lock); + } + } +walk_stop: + rhashtable_walk_stop(&iter); + } while (flower_entry == ERR_PTR(-EAGAIN)); + rhashtable_walk_exit(&iter); + mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD); +} + +static void ch_flower_stats_cb(struct timer_list *t) +{ + struct adapter *adap = from_timer(adap, t, flower_stats_timer); + + schedule_work(&adap->flower_stats_work); +} + +int cxgb4_tc_flower_stats(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + struct adapter *adap = netdev2adap(dev); + struct ch_tc_flower_stats *ofld_stats; + struct ch_tc_flower_entry *ch_flower; + u64 packets; + u64 bytes; + int ret; + + ch_flower = ch_flower_lookup(adap, cls->cookie); + if (!ch_flower) { + ret = -ENOENT; + goto err; + } + + ret = cxgb4_get_filter_counters(dev, ch_flower->filter_id, + &packets, &bytes, + ch_flower->fs.hash); + if (ret < 0) + goto err; + + spin_lock_bh(&ch_flower->lock); + ofld_stats = &ch_flower->stats; + if (ofld_stats->packet_count != packets) { + if (ofld_stats->prev_packet_count != packets) + ofld_stats->last_used = jiffies; + tcf_exts_stats_update(cls->exts, bytes - ofld_stats->byte_count, + packets - ofld_stats->packet_count, + ofld_stats->last_used); + + ofld_stats->packet_count = packets; + ofld_stats->byte_count = bytes; + ofld_stats->prev_packet_count = packets; + } + spin_unlock_bh(&ch_flower->lock); + return 0; + +err: + return ret; +} + +static const struct rhashtable_params cxgb4_tc_flower_ht_params = { + .nelem_hint = 384, + .head_offset = offsetof(struct ch_tc_flower_entry, node), + .key_offset = offsetof(struct ch_tc_flower_entry, tc_flower_cookie), + .key_len = sizeof(((struct ch_tc_flower_entry *)0)->tc_flower_cookie), + .max_size = 524288, + .min_size = 512, + .automatic_shrinking = true +}; + +int cxgb4_init_tc_flower(struct adapter *adap) +{ + int ret; + + adap->flower_ht_params = cxgb4_tc_flower_ht_params; + ret = rhashtable_init(&adap->flower_tbl, &adap->flower_ht_params); + if (ret) + return ret; + + INIT_WORK(&adap->flower_stats_work, ch_flower_stats_handler); + timer_setup(&adap->flower_stats_timer, ch_flower_stats_cb, 0); + mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD); + return 0; +} + +void cxgb4_cleanup_tc_flower(struct adapter *adap) +{ + if (adap->flower_stats_timer.function) + del_timer_sync(&adap->flower_stats_timer); + cancel_work_sync(&adap->flower_stats_work); + rhashtable_destroy(&adap->flower_tbl); +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h new file mode 100644 index 000000000000..050c8a50ae41 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -0,0 +1,120 @@ +/* + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_FLOWER_H +#define __CXGB4_TC_FLOWER_H + +#include <net/pkt_cls.h> + +struct ch_tc_flower_stats { + u64 prev_packet_count; + u64 packet_count; + u64 byte_count; + u64 last_used; +}; + +struct ch_tc_flower_entry { + struct ch_filter_specification fs; + struct ch_tc_flower_stats stats; + unsigned long tc_flower_cookie; + struct rhash_head node; + struct rcu_head rcu; + spinlock_t lock; /* lock for stats */ + u32 filter_id; +}; + +enum { + ETH_DMAC_31_0, /* dmac bits 0.. 31 */ + ETH_DMAC_47_32, /* dmac bits 32..47 */ + ETH_SMAC_15_0, /* smac bits 0.. 15 */ + ETH_SMAC_47_16, /* smac bits 16..47 */ + + IP4_SRC, /* 32-bit IPv4 src */ + IP4_DST, /* 32-bit IPv4 dst */ + + IP6_SRC_31_0, /* src bits 0.. 31 */ + IP6_SRC_63_32, /* src bits 63.. 32 */ + IP6_SRC_95_64, /* src bits 95.. 64 */ + IP6_SRC_127_96, /* src bits 127..96 */ + + IP6_DST_31_0, /* dst bits 0.. 31 */ + IP6_DST_63_32, /* dst bits 63.. 32 */ + IP6_DST_95_64, /* dst bits 95.. 64 */ + IP6_DST_127_96, /* dst bits 127..96 */ + + TCP_SPORT, /* 16-bit TCP sport */ + TCP_DPORT, /* 16-bit TCP dport */ + + UDP_SPORT, /* 16-bit UDP sport */ + UDP_DPORT, /* 16-bit UDP dport */ +}; + +struct ch_tc_pedit_fields { + u8 field; + u8 size; + u32 offset; +}; + +#define PEDIT_FIELDS(type, field, size, fs_field, offset) \ + { type## field, size, \ + offsetof(struct ch_filter_specification, fs_field) + (offset) } + +#define PEDIT_ETH_DMAC_MASK 0xffff +#define PEDIT_TCP_UDP_SPORT_MASK 0xffff +#define PEDIT_ETH_DMAC_31_0 0x0 +#define PEDIT_ETH_DMAC_47_32_SMAC_15_0 0x4 +#define PEDIT_ETH_SMAC_47_16 0x8 +#define PEDIT_IP4_SRC 0xC +#define PEDIT_IP4_DST 0x10 +#define PEDIT_IP6_SRC_31_0 0x8 +#define PEDIT_IP6_SRC_63_32 0xC +#define PEDIT_IP6_SRC_95_64 0x10 +#define PEDIT_IP6_SRC_127_96 0x14 +#define PEDIT_IP6_DST_31_0 0x18 +#define PEDIT_IP6_DST_63_32 0x1C +#define PEDIT_IP6_DST_95_64 0x20 +#define PEDIT_IP6_DST_127_96 0x24 +#define PEDIT_TCP_SPORT_DPORT 0x0 +#define PEDIT_UDP_SPORT_DPORT 0x0 + +int cxgb4_tc_flower_replace(struct net_device *dev, + struct tc_cls_flower_offload *cls); +int cxgb4_tc_flower_destroy(struct net_device *dev, + struct tc_cls_flower_offload *cls); +int cxgb4_tc_flower_stats(struct net_device *dev, + struct tc_cls_flower_offload *cls); + +int cxgb4_init_tc_flower(struct adapter *adap); +void cxgb4_cleanup_tc_flower(struct adapter *adap); +#endif /* __CXGB4_TC_FLOWER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 48970ba08bdc..cd0cd13a964d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -380,7 +380,7 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) return -EINVAL; } - ret = cxgb4_del_filter(dev, filter_id); + ret = cxgb4_del_filter(dev, filter_id, NULL); if (ret) goto out; @@ -399,7 +399,7 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls) if (!test_bit(j, link->tid_map)) continue; - ret = __cxgb4_del_filter(dev, j, NULL); + ret = __cxgb4_del_filter(dev, j, NULL, NULL); if (ret) goto out; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 84541fce94c5..08e709ab6dd4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -212,14 +212,19 @@ struct filter_ctx { struct ch_filter_specification; +int cxgb4_get_free_ftid(struct net_device *dev, int family); int __cxgb4_set_filter(struct net_device *dev, int filter_id, struct ch_filter_specification *fs, struct filter_ctx *ctx); int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, struct filter_ctx *ctx); int cxgb4_set_filter(struct net_device *dev, int filter_id, struct ch_filter_specification *fs); -int cxgb4_del_filter(struct net_device *dev, int filter_id); +int cxgb4_del_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs); +int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx, + u64 *hitcnt, u64 *bytecnt, bool hash); static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index f7ef8871dd0b..1817a0307d26 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -422,7 +422,7 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, u8 lport; u16 vlan; struct l2t_entry *e; - int addr_len = neigh->tbl->key_len; + unsigned int addr_len = neigh->tbl->key_len; u32 *addr = (u32 *)neigh->primary_key; int ifidx = neigh->dev->ifindex; int hash = addr_hash(d, addr, addr_len, ifidx); @@ -536,7 +536,7 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh) struct l2t_entry *e; struct sk_buff_head *arpq = NULL; struct l2t_data *d = adap->l2t; - int addr_len = neigh->tbl->key_len; + unsigned int addr_len = neigh->tbl->key_len; u32 *addr = (u32 *) neigh->primary_key; int ifidx = neigh->dev->ifindex; int hash = addr_hash(d, addr, addr_len, ifidx); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 43f52a8fe708..922f2f937789 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1537,7 +1537,13 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb) */ static inline int is_ofld_imm(const struct sk_buff *skb) { - return skb->len <= MAX_IMM_TX_PKT_LEN; + struct work_request_hdr *req = (struct work_request_hdr *)skb->data; + unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi)); + + if (opcode == FW_CRYPTO_LOOKASIDE_WR) + return skb->len <= SGE_MAX_WR_LEN; + else + return skb->len <= MAX_IMM_TX_PKT_LEN; } /** @@ -2583,11 +2589,11 @@ irq_handler_t t4_intr_handler(struct adapter *adap) return t4_intr_intx; } -static void sge_rx_timer_cb(unsigned long data) +static void sge_rx_timer_cb(struct timer_list *t) { unsigned long m; unsigned int i; - struct adapter *adap = (struct adapter *)data; + struct adapter *adap = from_timer(adap, t, sge.rx_timer); struct sge *s = &adap->sge; for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) @@ -2620,11 +2626,11 @@ done: mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD); } -static void sge_tx_timer_cb(unsigned long data) +static void sge_tx_timer_cb(struct timer_list *t) { unsigned long m; unsigned int i, budget; - struct adapter *adap = (struct adapter *)data; + struct adapter *adap = from_timer(adap, t, sge.tx_timer); struct sge *s = &adap->sge; for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) @@ -3458,8 +3464,8 @@ int t4_sge_init(struct adapter *adap) /* Set up timers used for recuring callbacks to process RX and TX * administrative tasks. */ - setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap); - setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap); + timer_setup(&s->rx_timer, sge_rx_timer_cb, 0); + timer_setup(&s->tx_timer, sge_tx_timer_cb, 0); spin_lock_init(&s->intrq_lock); diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.c b/drivers/net/ethernet/chelsio/cxgb4/smt.c new file mode 100644 index 000000000000..7b2207a2a130 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/smt.c @@ -0,0 +1,247 @@ +/* + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "cxgb4.h" +#include "smt.h" +#include "t4_msg.h" +#include "t4fw_api.h" +#include "t4_regs.h" +#include "t4_values.h" + +struct smt_data *t4_init_smt(void) +{ + unsigned int smt_size; + struct smt_data *s; + int i; + + smt_size = SMT_SIZE; + + s = kvzalloc(sizeof(*s) + smt_size * sizeof(struct smt_entry), + GFP_KERNEL); + if (!s) + return NULL; + s->smt_size = smt_size; + rwlock_init(&s->lock); + for (i = 0; i < s->smt_size; ++i) { + s->smtab[i].idx = i; + s->smtab[i].state = SMT_STATE_UNUSED; + memset(&s->smtab[i].src_mac, 0, ETH_ALEN); + spin_lock_init(&s->smtab[i].lock); + atomic_set(&s->smtab[i].refcnt, 0); + } + return s; +} + +static struct smt_entry *find_or_alloc_smte(struct smt_data *s, u8 *smac) +{ + struct smt_entry *first_free = NULL; + struct smt_entry *e, *end; + + for (e = &s->smtab[0], end = &s->smtab[s->smt_size]; e != end; ++e) { + if (atomic_read(&e->refcnt) == 0) { + if (!first_free) + first_free = e; + } else { + if (e->state == SMT_STATE_SWITCHING) { + /* This entry is actually in use. See if we can + * re-use it ? + */ + if (memcmp(e->src_mac, smac, ETH_ALEN) == 0) + goto found_reuse; + } + } + } + + if (first_free) { + e = first_free; + goto found; + } + return NULL; + +found: + e->state = SMT_STATE_UNUSED; + +found_reuse: + return e; +} + +static void t4_smte_free(struct smt_entry *e) +{ + spin_lock_bh(&e->lock); + if (atomic_read(&e->refcnt) == 0) { /* hasn't been recycled */ + e->state = SMT_STATE_UNUSED; + } + spin_unlock_bh(&e->lock); +} + +/** + * @e: smt entry to release + * + * Releases ref count and frees up an smt entry from SMT table + */ +void cxgb4_smt_release(struct smt_entry *e) +{ + if (atomic_dec_and_test(&e->refcnt)) + t4_smte_free(e); +} +EXPORT_SYMBOL(cxgb4_smt_release); + +void do_smt_write_rpl(struct adapter *adap, const struct cpl_smt_write_rpl *rpl) +{ + unsigned int smtidx = TID_TID_G(GET_TID(rpl)); + struct smt_data *s = adap->smt; + + if (unlikely(rpl->status != CPL_ERR_NONE)) { + struct smt_entry *e = &s->smtab[smtidx]; + + dev_err(adap->pdev_dev, + "Unexpected SMT_WRITE_RPL status %u for entry %u\n", + rpl->status, smtidx); + spin_lock(&e->lock); + e->state = SMT_STATE_ERROR; + spin_unlock(&e->lock); + return; + } +} + +static int write_smt_entry(struct adapter *adapter, struct smt_entry *e) +{ + struct cpl_t6_smt_write_req *t6req; + struct smt_data *s = adapter->smt; + struct cpl_smt_write_req *req; + struct sk_buff *skb; + int size; + u8 row; + + if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) { + size = sizeof(*req); + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + /* Source MAC Table (SMT) contains 256 SMAC entries + * organized in 128 rows of 2 entries each. + */ + req = (struct cpl_smt_write_req *)__skb_put(skb, size); + INIT_TP_WR(req, 0); + + /* Each row contains an SMAC pair. + * LSB selects the SMAC entry within a row + */ + row = (e->idx >> 1); + if (e->idx & 1) { + req->pfvf1 = 0x0; + memcpy(req->src_mac1, e->src_mac, ETH_ALEN); + + /* fill pfvf0/src_mac0 with entry + * at prev index from smt-tab. + */ + req->pfvf0 = 0x0; + memcpy(req->src_mac0, s->smtab[e->idx - 1].src_mac, + ETH_ALEN); + } else { + req->pfvf0 = 0x0; + memcpy(req->src_mac0, e->src_mac, ETH_ALEN); + + /* fill pfvf1/src_mac1 with entry + * at next index from smt-tab + */ + req->pfvf1 = 0x0; + memcpy(req->src_mac1, s->smtab[e->idx + 1].src_mac, + ETH_ALEN); + } + } else { + size = sizeof(*t6req); + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + /* Source MAC Table (SMT) contains 256 SMAC entries */ + t6req = (struct cpl_t6_smt_write_req *)__skb_put(skb, size); + INIT_TP_WR(t6req, 0); + req = (struct cpl_smt_write_req *)t6req; + + /* fill pfvf0/src_mac0 from smt-tab */ + req->pfvf0 = 0x0; + memcpy(req->src_mac0, s->smtab[e->idx].src_mac, ETH_ALEN); + row = e->idx; + } + + OPCODE_TID(req) = + htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, e->idx | + TID_QID_V(adapter->sge.fw_evtq.abs_id))); + req->params = htonl(SMTW_NORPL_V(0) | + SMTW_IDX_V(row) | + SMTW_OVLAN_IDX_V(0)); + t4_mgmt_tx(adapter, skb); + return 0; +} + +static struct smt_entry *t4_smt_alloc_switching(struct adapter *adap, u16 pfvf, + u8 *smac) +{ + struct smt_data *s = adap->smt; + struct smt_entry *e; + + write_lock_bh(&s->lock); + e = find_or_alloc_smte(s, smac); + if (e) { + spin_lock(&e->lock); + if (!atomic_read(&e->refcnt)) { + atomic_set(&e->refcnt, 1); + e->state = SMT_STATE_SWITCHING; + e->pfvf = pfvf; + memcpy(e->src_mac, smac, ETH_ALEN); + write_smt_entry(adap, e); + } else { + atomic_inc(&e->refcnt); + } + spin_unlock(&e->lock); + } + write_unlock_bh(&s->lock); + return e; +} + +/** + * @dev: net_device pointer + * @smac: MAC address to add to SMT + * Returns pointer to the SMT entry created + * + * Allocates an SMT entry to be used by switching rule of a filter. + */ +struct smt_entry *cxgb4_smt_alloc_switching(struct net_device *dev, u8 *smac) +{ + struct adapter *adap = netdev2adap(dev); + + return t4_smt_alloc_switching(adap, 0x0, smac); +} +EXPORT_SYMBOL(cxgb4_smt_alloc_switching); diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.h b/drivers/net/ethernet/chelsio/cxgb4/smt.h new file mode 100644 index 000000000000..d6c2cc271398 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/smt.h @@ -0,0 +1,76 @@ +/* + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_SMT_H +#define __CXGB4_SMT_H + +#include <linux/spinlock.h> +#include <linux/if_ether.h> +#include <linux/atomic.h> + +struct adapter; +struct cpl_smt_write_rpl; + +/* SMT related handling. Heavily adapted based on l2t ops in l2t.h/l2t.c + */ +enum { + SMT_STATE_SWITCHING, + SMT_STATE_UNUSED, + SMT_STATE_ERROR +}; + +enum { + SMT_SIZE = 256 +}; + +struct smt_entry { + u16 state; + u16 idx; + u16 pfvf; + u8 src_mac[ETH_ALEN]; + atomic_t refcnt; + spinlock_t lock; /* protect smt entry add,removal */ +}; + +struct smt_data { + unsigned int smt_size; + rwlock_t lock; + struct smt_entry smtab[0]; +}; + +struct smt_data *t4_init_smt(void); +struct smt_entry *cxgb4_smt_alloc_switching(struct net_device *dev, u8 *smac); +void cxgb4_smt_release(struct smt_entry *e); +void do_smt_write_rpl(struct adapter *p, const struct cpl_smt_write_rpl *rpl); +#endif /* __CXGB4_SMT_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index b65ce26ff72f..f63210f15579 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2639,6 +2639,35 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) #define CHELSIO_VPD_UNIQUE_ID 0x82 /** + * t4_eeprom_ptov - translate a physical EEPROM address to virtual + * @phys_addr: the physical EEPROM address + * @fn: the PCI function number + * @sz: size of function-specific area + * + * Translate a physical EEPROM address to virtual. The first 1K is + * accessed through virtual addresses starting at 31K, the rest is + * accessed through virtual addresses starting at 0. + * + * The mapping is as follows: + * [0..1K) -> [31K..32K) + * [1K..1K+A) -> [31K-A..31K) + * [1K+A..ES) -> [0..ES-A-1K) + * + * where A = @fn * @sz, and ES = EEPROM size. + */ +int t4_eeprom_ptov(unsigned int phys_addr, unsigned int fn, unsigned int sz) +{ + fn *= sz; + if (phys_addr < 1024) + return phys_addr + (31 << 10); + if (phys_addr < 1024 + fn) + return 31744 - fn + phys_addr - 1024; + if (phys_addr < EEPROMSIZE) + return phys_addr - 1024 - fn; + return -EINVAL; +} + +/** * t4_seeprom_wp - enable/disable EEPROM write protection * @adapter: the adapter * @enable: whether to enable or disable write protection @@ -5052,23 +5081,26 @@ static unsigned int t4_use_ldst(struct adapter *adap) } /** - * t4_fw_tp_pio_rw - Access TP PIO through LDST - * @adap: the adapter - * @vals: where the indirect register values are stored/written - * @nregs: how many indirect registers to read/write - * @start_idx: index of first indirect register to read/write - * @rw: Read (1) or Write (0) + * t4_tp_fw_ldst_rw - Access TP indirect register through LDST + * @adap: the adapter + * @cmd: TP fw ldst address space type + * @vals: where the indirect register values are stored/written + * @nregs: how many indirect registers to read/write + * @start_idx: index of first indirect register to read/write + * @rw: Read (1) or Write (0) + * @sleep_ok: if true we may sleep while awaiting command completion * - * Access TP PIO registers through LDST + * Access TP indirect registers through LDST */ -static void t4_fw_tp_pio_rw(struct adapter *adap, u32 *vals, unsigned int nregs, - unsigned int start_index, unsigned int rw) +static int t4_tp_fw_ldst_rw(struct adapter *adap, int cmd, u32 *vals, + unsigned int nregs, unsigned int start_index, + unsigned int rw, bool sleep_ok) { - int ret, i; - int cmd = FW_LDST_ADDRSPC_TP_PIO; + int ret = 0; + unsigned int i; struct fw_ldst_cmd c; - for (i = 0 ; i < nregs; i++) { + for (i = 0; i < nregs; i++) { memset(&c, 0, sizeof(c)); c.op_to_addrspace = cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) | FW_CMD_REQUEST_F | @@ -5079,26 +5111,147 @@ static void t4_fw_tp_pio_rw(struct adapter *adap, u32 *vals, unsigned int nregs, c.u.addrval.addr = cpu_to_be32(start_index + i); c.u.addrval.val = rw ? 0 : cpu_to_be32(vals[i]); - ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c); - if (!ret && rw) + ret = t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, + sleep_ok); + if (ret) + return ret; + + if (rw) vals[i] = be32_to_cpu(c.u.addrval.val); } + return 0; +} + +/** + * t4_tp_indirect_rw - Read/Write TP indirect register through LDST or backdoor + * @adap: the adapter + * @reg_addr: Address Register + * @reg_data: Data register + * @buff: where the indirect register values are stored/written + * @nregs: how many indirect registers to read/write + * @start_index: index of first indirect register to read/write + * @rw: READ(1) or WRITE(0) + * @sleep_ok: if true we may sleep while awaiting command completion + * + * Read/Write TP indirect registers through LDST if possible. + * Else, use backdoor access + **/ +static void t4_tp_indirect_rw(struct adapter *adap, u32 reg_addr, u32 reg_data, + u32 *buff, u32 nregs, u32 start_index, int rw, + bool sleep_ok) +{ + int rc = -EINVAL; + int cmd; + + switch (reg_addr) { + case TP_PIO_ADDR_A: + cmd = FW_LDST_ADDRSPC_TP_PIO; + break; + case TP_TM_PIO_ADDR_A: + cmd = FW_LDST_ADDRSPC_TP_TM_PIO; + break; + case TP_MIB_INDEX_A: + cmd = FW_LDST_ADDRSPC_TP_MIB; + break; + default: + goto indirect_access; + } + + if (t4_use_ldst(adap)) + rc = t4_tp_fw_ldst_rw(adap, cmd, buff, nregs, start_index, rw, + sleep_ok); + +indirect_access: + + if (rc) { + if (rw) + t4_read_indirect(adap, reg_addr, reg_data, buff, nregs, + start_index); + else + t4_write_indirect(adap, reg_addr, reg_data, buff, nregs, + start_index); + } +} + +/** + * t4_tp_pio_read - Read TP PIO registers + * @adap: the adapter + * @buff: where the indirect register values are written + * @nregs: how many indirect registers to read + * @start_index: index of first indirect register to read + * @sleep_ok: if true we may sleep while awaiting command completion + * + * Read TP PIO Registers + **/ +void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok) +{ + t4_tp_indirect_rw(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, buff, nregs, + start_index, 1, sleep_ok); +} + +/** + * t4_tp_pio_write - Write TP PIO registers + * @adap: the adapter + * @buff: where the indirect register values are stored + * @nregs: how many indirect registers to write + * @start_index: index of first indirect register to write + * @sleep_ok: if true we may sleep while awaiting command completion + * + * Write TP PIO Registers + **/ +static void t4_tp_pio_write(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok) +{ + t4_tp_indirect_rw(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, buff, nregs, + start_index, 0, sleep_ok); +} + +/** + * t4_tp_tm_pio_read - Read TP TM PIO registers + * @adap: the adapter + * @buff: where the indirect register values are written + * @nregs: how many indirect registers to read + * @start_index: index of first indirect register to read + * @sleep_ok: if true we may sleep while awaiting command completion + * + * Read TP TM PIO Registers + **/ +void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs, + u32 start_index, bool sleep_ok) +{ + t4_tp_indirect_rw(adap, TP_TM_PIO_ADDR_A, TP_TM_PIO_DATA_A, buff, + nregs, start_index, 1, sleep_ok); +} + +/** + * t4_tp_mib_read - Read TP MIB registers + * @adap: the adapter + * @buff: where the indirect register values are written + * @nregs: how many indirect registers to read + * @start_index: index of first indirect register to read + * @sleep_ok: if true we may sleep while awaiting command completion + * + * Read TP MIB Registers + **/ +void t4_tp_mib_read(struct adapter *adap, u32 *buff, u32 nregs, u32 start_index, + bool sleep_ok) +{ + t4_tp_indirect_rw(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, buff, nregs, + start_index, 1, sleep_ok); } /** * t4_read_rss_key - read the global RSS key * @adap: the adapter * @key: 10-entry array holding the 320-bit RSS key + * @sleep_ok: if true we may sleep while awaiting command completion * * Reads the global 320-bit RSS key. */ -void t4_read_rss_key(struct adapter *adap, u32 *key) +void t4_read_rss_key(struct adapter *adap, u32 *key, bool sleep_ok) { - if (t4_use_ldst(adap)) - t4_fw_tp_pio_rw(adap, key, 10, TP_RSS_SECRET_KEY0_A, 1); - else - t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, key, 10, - TP_RSS_SECRET_KEY0_A); + t4_tp_pio_read(adap, key, 10, TP_RSS_SECRET_KEY0_A, sleep_ok); } /** @@ -5106,12 +5259,14 @@ void t4_read_rss_key(struct adapter *adap, u32 *key) * @adap: the adapter * @key: 10-entry array holding the 320-bit RSS key * @idx: which RSS key to write + * @sleep_ok: if true we may sleep while awaiting command completion * * Writes one of the RSS keys with the given 320-bit value. If @idx is * 0..15 the corresponding entry in the RSS key table is written, * otherwise the global RSS key is written. */ -void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx) +void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx, + bool sleep_ok) { u8 rss_key_addr_cnt = 16; u32 vrt = t4_read_reg(adap, TP_RSS_CONFIG_VRT_A); @@ -5124,11 +5279,7 @@ void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx) (vrt & KEYEXTEND_F) && (KEYMODE_G(vrt) == 3)) rss_key_addr_cnt = 32; - if (t4_use_ldst(adap)) - t4_fw_tp_pio_rw(adap, (void *)key, 10, TP_RSS_SECRET_KEY0_A, 0); - else - t4_write_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, key, 10, - TP_RSS_SECRET_KEY0_A); + t4_tp_pio_write(adap, (void *)key, 10, TP_RSS_SECRET_KEY0_A, sleep_ok); if (idx >= 0 && idx < rss_key_addr_cnt) { if (rss_key_addr_cnt > 16) @@ -5146,19 +5297,15 @@ void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx) * @adapter: the adapter * @index: the entry in the PF RSS table to read * @valp: where to store the returned value + * @sleep_ok: if true we may sleep while awaiting command completion * * Reads the PF RSS Configuration Table at the specified index and returns * the value found there. */ void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index, - u32 *valp) + u32 *valp, bool sleep_ok) { - if (t4_use_ldst(adapter)) - t4_fw_tp_pio_rw(adapter, valp, 1, - TP_RSS_PF0_CONFIG_A + index, 1); - else - t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, - valp, 1, TP_RSS_PF0_CONFIG_A + index); + t4_tp_pio_read(adapter, valp, 1, TP_RSS_PF0_CONFIG_A + index, sleep_ok); } /** @@ -5167,12 +5314,13 @@ void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index, * @index: the entry in the VF RSS table to read * @vfl: where to store the returned VFL * @vfh: where to store the returned VFH + * @sleep_ok: if true we may sleep while awaiting command completion * * Reads the VF RSS Configuration Table at the specified index and returns * the (VFL, VFH) values found there. */ void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index, - u32 *vfl, u32 *vfh) + u32 *vfl, u32 *vfh, bool sleep_ok) { u32 vrt, mask, data; @@ -5193,50 +5341,37 @@ void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index, /* Grab the VFL/VFH values ... */ - if (t4_use_ldst(adapter)) { - t4_fw_tp_pio_rw(adapter, vfl, 1, TP_RSS_VFL_CONFIG_A, 1); - t4_fw_tp_pio_rw(adapter, vfh, 1, TP_RSS_VFH_CONFIG_A, 1); - } else { - t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, - vfl, 1, TP_RSS_VFL_CONFIG_A); - t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, - vfh, 1, TP_RSS_VFH_CONFIG_A); - } + t4_tp_pio_read(adapter, vfl, 1, TP_RSS_VFL_CONFIG_A, sleep_ok); + t4_tp_pio_read(adapter, vfh, 1, TP_RSS_VFH_CONFIG_A, sleep_ok); } /** * t4_read_rss_pf_map - read PF RSS Map * @adapter: the adapter + * @sleep_ok: if true we may sleep while awaiting command completion * * Reads the PF RSS Map register and returns its value. */ -u32 t4_read_rss_pf_map(struct adapter *adapter) +u32 t4_read_rss_pf_map(struct adapter *adapter, bool sleep_ok) { u32 pfmap; - if (t4_use_ldst(adapter)) - t4_fw_tp_pio_rw(adapter, &pfmap, 1, TP_RSS_PF_MAP_A, 1); - else - t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, - &pfmap, 1, TP_RSS_PF_MAP_A); + t4_tp_pio_read(adapter, &pfmap, 1, TP_RSS_PF_MAP_A, sleep_ok); return pfmap; } /** * t4_read_rss_pf_mask - read PF RSS Mask * @adapter: the adapter + * @sleep_ok: if true we may sleep while awaiting command completion * * Reads the PF RSS Mask register and returns its value. */ -u32 t4_read_rss_pf_mask(struct adapter *adapter) +u32 t4_read_rss_pf_mask(struct adapter *adapter, bool sleep_ok) { u32 pfmask; - if (t4_use_ldst(adapter)) - t4_fw_tp_pio_rw(adapter, &pfmask, 1, TP_RSS_PF_MSK_A, 1); - else - t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A, - &pfmask, 1, TP_RSS_PF_MSK_A); + t4_tp_pio_read(adapter, &pfmask, 1, TP_RSS_PF_MSK_A, sleep_ok); return pfmask; } @@ -5245,12 +5380,13 @@ u32 t4_read_rss_pf_mask(struct adapter *adapter) * @adap: the adapter * @v4: holds the TCP/IP counter values * @v6: holds the TCP/IPv6 counter values + * @sleep_ok: if true we may sleep while awaiting command completion * * Returns the values of TP's TCP/IP and TCP/IPv6 MIB counters. * Either @v4 or @v6 may be %NULL to skip the corresponding stats. */ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, - struct tp_tcp_stats *v6) + struct tp_tcp_stats *v6, bool sleep_ok) { u32 val[TP_MIB_TCP_RXT_SEG_LO_A - TP_MIB_TCP_OUT_RST_A + 1]; @@ -5259,16 +5395,16 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, #define STAT64(x) (((u64)STAT(x##_HI) << 32) | STAT(x##_LO)) if (v4) { - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val, - ARRAY_SIZE(val), TP_MIB_TCP_OUT_RST_A); + t4_tp_mib_read(adap, val, ARRAY_SIZE(val), + TP_MIB_TCP_OUT_RST_A, sleep_ok); v4->tcp_out_rsts = STAT(OUT_RST); v4->tcp_in_segs = STAT64(IN_SEG); v4->tcp_out_segs = STAT64(OUT_SEG); v4->tcp_retrans_segs = STAT64(RXT_SEG); } if (v6) { - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val, - ARRAY_SIZE(val), TP_MIB_TCP_V6OUT_RST_A); + t4_tp_mib_read(adap, val, ARRAY_SIZE(val), + TP_MIB_TCP_V6OUT_RST_A, sleep_ok); v6->tcp_out_rsts = STAT(OUT_RST); v6->tcp_in_segs = STAT64(IN_SEG); v6->tcp_out_segs = STAT64(OUT_SEG); @@ -5283,63 +5419,66 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, * t4_tp_get_err_stats - read TP's error MIB counters * @adap: the adapter * @st: holds the counter values + * @sleep_ok: if true we may sleep while awaiting command completion * * Returns the values of TP's error counters. */ -void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st) +void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st, + bool sleep_ok) { int nchan = adap->params.arch.nchan; - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->mac_in_errs, nchan, TP_MIB_MAC_IN_ERR_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->hdr_in_errs, nchan, TP_MIB_HDR_IN_ERR_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tcp_in_errs, nchan, TP_MIB_TCP_IN_ERR_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tnl_cong_drops, nchan, TP_MIB_TNL_CNG_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->ofld_chan_drops, nchan, TP_MIB_OFD_CHN_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tnl_tx_drops, nchan, TP_MIB_TNL_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->ofld_vlan_drops, nchan, TP_MIB_OFD_VLN_DROP_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - st->tcp6_in_errs, nchan, TP_MIB_TCP_V6IN_ERR_0_A); - - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, - &st->ofld_no_neigh, 2, TP_MIB_OFD_ARP_DROP_A); + t4_tp_mib_read(adap, st->mac_in_errs, nchan, TP_MIB_MAC_IN_ERR_0_A, + sleep_ok); + t4_tp_mib_read(adap, st->hdr_in_errs, nchan, TP_MIB_HDR_IN_ERR_0_A, + sleep_ok); + t4_tp_mib_read(adap, st->tcp_in_errs, nchan, TP_MIB_TCP_IN_ERR_0_A, + sleep_ok); + t4_tp_mib_read(adap, st->tnl_cong_drops, nchan, + TP_MIB_TNL_CNG_DROP_0_A, sleep_ok); + t4_tp_mib_read(adap, st->ofld_chan_drops, nchan, + TP_MIB_OFD_CHN_DROP_0_A, sleep_ok); + t4_tp_mib_read(adap, st->tnl_tx_drops, nchan, TP_MIB_TNL_DROP_0_A, + sleep_ok); + t4_tp_mib_read(adap, st->ofld_vlan_drops, nchan, + TP_MIB_OFD_VLN_DROP_0_A, sleep_ok); + t4_tp_mib_read(adap, st->tcp6_in_errs, nchan, + TP_MIB_TCP_V6IN_ERR_0_A, sleep_ok); + t4_tp_mib_read(adap, &st->ofld_no_neigh, 2, TP_MIB_OFD_ARP_DROP_A, + sleep_ok); } /** * t4_tp_get_cpl_stats - read TP's CPL MIB counters * @adap: the adapter * @st: holds the counter values + * @sleep_ok: if true we may sleep while awaiting command completion * * Returns the values of TP's CPL counters. */ -void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st) +void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st, + bool sleep_ok) { int nchan = adap->params.arch.nchan; - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req, - nchan, TP_MIB_CPL_IN_REQ_0_A); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->rsp, - nchan, TP_MIB_CPL_OUT_RSP_0_A); + t4_tp_mib_read(adap, st->req, nchan, TP_MIB_CPL_IN_REQ_0_A, sleep_ok); + t4_tp_mib_read(adap, st->rsp, nchan, TP_MIB_CPL_OUT_RSP_0_A, sleep_ok); } /** * t4_tp_get_rdma_stats - read TP's RDMA MIB counters * @adap: the adapter * @st: holds the counter values + * @sleep_ok: if true we may sleep while awaiting command completion * * Returns the values of TP's RDMA counters. */ -void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st) +void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st, + bool sleep_ok) { - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->rqe_dfr_pkt, - 2, TP_MIB_RQE_DFR_PKT_A); + t4_tp_mib_read(adap, &st->rqe_dfr_pkt, 2, TP_MIB_RQE_DFR_PKT_A, + sleep_ok); } /** @@ -5347,20 +5486,24 @@ void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st) * @adap: the adapter * @idx: the port index * @st: holds the counter values + * @sleep_ok: if true we may sleep while awaiting command completion * * Returns the values of TP's FCoE counters for the selected port. */ void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx, - struct tp_fcoe_stats *st) + struct tp_fcoe_stats *st, bool sleep_ok) { u32 val[2]; - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->frames_ddp, - 1, TP_MIB_FCOE_DDP_0_A + idx); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->frames_drop, - 1, TP_MIB_FCOE_DROP_0_A + idx); - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val, - 2, TP_MIB_FCOE_BYTE_0_HI_A + 2 * idx); + t4_tp_mib_read(adap, &st->frames_ddp, 1, TP_MIB_FCOE_DDP_0_A + idx, + sleep_ok); + + t4_tp_mib_read(adap, &st->frames_drop, 1, + TP_MIB_FCOE_DROP_0_A + idx, sleep_ok); + + t4_tp_mib_read(adap, val, 2, TP_MIB_FCOE_BYTE_0_HI_A + 2 * idx, + sleep_ok); + st->octets_ddp = ((u64)val[0] << 32) | val[1]; } @@ -5368,15 +5511,16 @@ void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx, * t4_get_usm_stats - read TP's non-TCP DDP MIB counters * @adap: the adapter * @st: holds the counter values + * @sleep_ok: if true we may sleep while awaiting command completion * * Returns the values of TP's counters for non-TCP directly-placed packets. */ -void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st) +void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st, + bool sleep_ok) { u32 val[4]; - t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val, 4, - TP_MIB_USM_PKTS_A); + t4_tp_mib_read(adap, val, 4, TP_MIB_USM_PKTS_A, sleep_ok); st->frames = val[0]; st->drops = val[1]; st->octets = ((u64)val[2] << 32) | val[3]; @@ -8205,7 +8349,7 @@ struct flash_desc { u32 size_mb; }; -static int get_flash_params(struct adapter *adap) +static int t4_get_flash_params(struct adapter *adap) { /* Table for non-Numonix supported flash parts. Numonix parts are left * to the preexisting code. All flash parts have 64KB sectors. @@ -8214,40 +8358,137 @@ static int get_flash_params(struct adapter *adap) { 0x150201, 4 << 20 }, /* Spansion 4MB S25FL032P */ }; + unsigned int part, manufacturer; + unsigned int density, size; + u32 flashid = 0; int ret; - u32 info; + + /* Issue a Read ID Command to the Flash part. We decode supported + * Flash parts and their sizes from this. There's a newer Query + * Command which can retrieve detailed geometry information but many + * Flash parts don't support it. + */ ret = sf1_write(adap, 1, 1, 0, SF_RD_ID); if (!ret) - ret = sf1_read(adap, 3, 0, 1, &info); + ret = sf1_read(adap, 3, 0, 1, &flashid); t4_write_reg(adap, SF_OP_A, 0); /* unlock SF */ if (ret) return ret; - for (ret = 0; ret < ARRAY_SIZE(supported_flash); ++ret) - if (supported_flash[ret].vendor_and_model_id == info) { - adap->params.sf_size = supported_flash[ret].size_mb; + /* Check to see if it's one of our non-standard supported Flash parts. + */ + for (part = 0; part < ARRAY_SIZE(supported_flash); part++) + if (supported_flash[part].vendor_and_model_id == flashid) { + adap->params.sf_size = supported_flash[part].size_mb; adap->params.sf_nsec = adap->params.sf_size / SF_SEC_SIZE; - return 0; + goto found; } - if ((info & 0xff) != 0x20) /* not a Numonix flash */ - return -EINVAL; - info >>= 16; /* log2 of size */ - if (info >= 0x14 && info < 0x18) - adap->params.sf_nsec = 1 << (info - 16); - else if (info == 0x18) - adap->params.sf_nsec = 64; - else + /* Decode Flash part size. The code below looks repetative with + * common encodings, but that's not guaranteed in the JEDEC + * specification for the Read JADEC ID command. The only thing that + * we're guaranteed by the JADEC specification is where the + * Manufacturer ID is in the returned result. After that each + * Manufacturer ~could~ encode things completely differently. + * Note, all Flash parts must have 64KB sectors. + */ + manufacturer = flashid & 0xff; + switch (manufacturer) { + case 0x20: { /* Micron/Numonix */ + /* This Density -> Size decoding table is taken from Micron + * Data Sheets. + */ + density = (flashid >> 16) & 0xff; + switch (density) { + case 0x14: /* 1MB */ + size = 1 << 20; + break; + case 0x15: /* 2MB */ + size = 1 << 21; + break; + case 0x16: /* 4MB */ + size = 1 << 22; + break; + case 0x17: /* 8MB */ + size = 1 << 23; + break; + case 0x18: /* 16MB */ + size = 1 << 24; + break; + case 0x19: /* 32MB */ + size = 1 << 25; + break; + case 0x20: /* 64MB */ + size = 1 << 26; + break; + case 0x21: /* 128MB */ + size = 1 << 27; + break; + case 0x22: /* 256MB */ + size = 1 << 28; + break; + + default: + dev_err(adap->pdev_dev, "Micron Flash Part has bad size, ID = %#x, Density code = %#x\n", + flashid, density); + return -EINVAL; + } + break; + } + case 0xc2: { /* Macronix */ + /* This Density -> Size decoding table is taken from Macronix + * Data Sheets. + */ + density = (flashid >> 16) & 0xff; + switch (density) { + case 0x17: /* 8MB */ + size = 1 << 23; + break; + case 0x18: /* 16MB */ + size = 1 << 24; + break; + default: + dev_err(adap->pdev_dev, "Macronix Flash Part has bad size, ID = %#x, Density code = %#x\n", + flashid, density); + return -EINVAL; + } + break; + } + case 0xef: { /* Winbond */ + /* This Density -> Size decoding table is taken from Winbond + * Data Sheets. + */ + density = (flashid >> 16) & 0xff; + switch (density) { + case 0x17: /* 8MB */ + size = 1 << 23; + break; + case 0x18: /* 16MB */ + size = 1 << 24; + break; + default: + dev_err(adap->pdev_dev, "Winbond Flash Part has bad size, ID = %#x, Density code = %#x\n", + flashid, density); + return -EINVAL; + } + break; + } + default: + dev_err(adap->pdev_dev, "Unsupported Flash Part, ID = %#x\n", + flashid); return -EINVAL; - adap->params.sf_size = 1 << info; - adap->params.sf_fw_start = - t4_read_reg(adap, CIM_BOOT_CFG_A) & BOOTADDR_M; + } + + /* Store decoded Flash size and fall through into vetting code. */ + adap->params.sf_size = size; + adap->params.sf_nsec = size / SF_SEC_SIZE; +found: if (adap->params.sf_size < FLASH_MIN_SIZE) - dev_warn(adap->pdev_dev, "WARNING!!! FLASH size %#x < %#x!!!\n", - adap->params.sf_size, FLASH_MIN_SIZE); + dev_warn(adap->pdev_dev, "WARNING: Flash Part ID %#x, size %#x < %#x\n", + flashid, adap->params.sf_size, FLASH_MIN_SIZE); return 0; } @@ -8285,7 +8526,7 @@ int t4_prep_adapter(struct adapter *adapter) get_pci_mode(adapter, &adapter->params.pci); pl_rev = REV_G(t4_read_reg(adapter, PL_REV_A)); - ret = get_flash_params(adapter); + ret = t4_get_flash_params(adapter); if (ret < 0) { dev_err(adapter->pdev_dev, "error %d identifying flash\n", ret); return ret; @@ -8567,10 +8808,11 @@ int t4_init_sge_params(struct adapter *adapter) /** * t4_init_tp_params - initialize adap->params.tp * @adap: the adapter + * @sleep_ok: if true we may sleep while awaiting command completion * * Initialize various fields of the adapter's TP Parameters structure. */ -int t4_init_tp_params(struct adapter *adap) +int t4_init_tp_params(struct adapter *adap, bool sleep_ok) { int chan; u32 v; @@ -8586,19 +8828,11 @@ int t4_init_tp_params(struct adapter *adap) /* Cache the adapter's Compressed Filter Mode and global Incress * Configuration. */ - if (t4_use_ldst(adap)) { - t4_fw_tp_pio_rw(adap, &adap->params.tp.vlan_pri_map, 1, - TP_VLAN_PRI_MAP_A, 1); - t4_fw_tp_pio_rw(adap, &adap->params.tp.ingress_config, 1, - TP_INGRESS_CONFIG_A, 1); - } else { - t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, - &adap->params.tp.vlan_pri_map, 1, - TP_VLAN_PRI_MAP_A); - t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, - &adap->params.tp.ingress_config, 1, - TP_INGRESS_CONFIG_A); - } + t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1, + TP_VLAN_PRI_MAP_A, sleep_ok); + t4_tp_pio_read(adap, &adap->params.tp.ingress_config, 1, + TP_INGRESS_CONFIG_A, sleep_ok); + /* For T6, cache the adapter's compressed error vector * and passing outer header info for encapsulated packets. */ @@ -8611,11 +8845,21 @@ int t4_init_tp_params(struct adapter *adap) * shift positions of several elements of the Compressed Filter Tuple * for this adapter which we need frequently ... */ - adap->params.tp.vlan_shift = t4_filter_field_shift(adap, VLAN_F); - adap->params.tp.vnic_shift = t4_filter_field_shift(adap, VNIC_ID_F); + adap->params.tp.fcoe_shift = t4_filter_field_shift(adap, FCOE_F); adap->params.tp.port_shift = t4_filter_field_shift(adap, PORT_F); + adap->params.tp.vnic_shift = t4_filter_field_shift(adap, VNIC_ID_F); + adap->params.tp.vlan_shift = t4_filter_field_shift(adap, VLAN_F); + adap->params.tp.tos_shift = t4_filter_field_shift(adap, TOS_F); adap->params.tp.protocol_shift = t4_filter_field_shift(adap, PROTOCOL_F); + adap->params.tp.ethertype_shift = t4_filter_field_shift(adap, + ETHERTYPE_F); + adap->params.tp.macmatch_shift = t4_filter_field_shift(adap, + MACMATCH_F); + adap->params.tp.matchtype_shift = t4_filter_field_shift(adap, + MPSHITTYPE_F); + adap->params.tp.frag_shift = t4_filter_field_shift(adap, + FRAGMENTATION_F); /* If TP_INGRESS_CONFIG.VNID == 0, then TP_VLAN_PRI_MAP.VNIC_ID * represents the presence of an Outer VLAN instead of a VNIC ID. @@ -8623,6 +8867,10 @@ int t4_init_tp_params(struct adapter *adap) if ((adap->params.tp.ingress_config & VNIC_F) == 0) adap->params.tp.vnic_shift = -1; + v = t4_read_reg(adap, LE_3_DB_HASH_MASK_GEN_IPV4_T6_A); + adap->params.tp.hash_filter_mask = v; + v = t4_read_reg(adap, LE_4_DB_HASH_MASK_GEN_IPV4_T6_A); + adap->params.tp.hash_filter_mask |= ((u64)v << 32); return 0; } @@ -9342,6 +9590,125 @@ int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, return t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &cmd); } +/** + * t4_read_pace_tbl - read the pace table + * @adap: the adapter + * @pace_vals: holds the returned values + * + * Returns the values of TP's pace table in microseconds. + */ +void t4_read_pace_tbl(struct adapter *adap, unsigned int pace_vals[NTX_SCHED]) +{ + unsigned int i, v; + + for (i = 0; i < NTX_SCHED; i++) { + t4_write_reg(adap, TP_PACE_TABLE_A, 0xffff0000 + i); + v = t4_read_reg(adap, TP_PACE_TABLE_A); + pace_vals[i] = dack_ticks_to_usec(adap, v); + } +} + +/** + * t4_get_tx_sched - get the configuration of a Tx HW traffic scheduler + * @adap: the adapter + * @sched: the scheduler index + * @kbps: the byte rate in Kbps + * @ipg: the interpacket delay in tenths of nanoseconds + * @sleep_ok: if true we may sleep while awaiting command completion + * + * Return the current configuration of a HW Tx scheduler. + */ +void t4_get_tx_sched(struct adapter *adap, unsigned int sched, + unsigned int *kbps, unsigned int *ipg, bool sleep_ok) +{ + unsigned int v, addr, bpt, cpt; + + if (kbps) { + addr = TP_TX_MOD_Q1_Q0_RATE_LIMIT_A - sched / 2; + t4_tp_tm_pio_read(adap, &v, 1, addr, sleep_ok); + if (sched & 1) + v >>= 16; + bpt = (v >> 8) & 0xff; + cpt = v & 0xff; + if (!cpt) { + *kbps = 0; /* scheduler disabled */ + } else { + v = (adap->params.vpd.cclk * 1000) / cpt; /* ticks/s */ + *kbps = (v * bpt) / 125; + } + } + if (ipg) { + addr = TP_TX_MOD_Q1_Q0_TIMER_SEPARATOR_A - sched / 2; + t4_tp_tm_pio_read(adap, &v, 1, addr, sleep_ok); + if (sched & 1) + v >>= 16; + v &= 0xffff; + *ipg = (10000 * v) / core_ticks_per_usec(adap); + } +} + +/* t4_sge_ctxt_rd - read an SGE context through FW + * @adap: the adapter + * @mbox: mailbox to use for the FW command + * @cid: the context id + * @ctype: the context type + * @data: where to store the context data + * + * Issues a FW command through the given mailbox to read an SGE context. + */ +int t4_sge_ctxt_rd(struct adapter *adap, unsigned int mbox, unsigned int cid, + enum ctxt_type ctype, u32 *data) +{ + struct fw_ldst_cmd c; + int ret; + + if (ctype == CTXT_FLM) + ret = FW_LDST_ADDRSPC_SGE_FLMC; + else + ret = FW_LDST_ADDRSPC_SGE_CONMC; + + memset(&c, 0, sizeof(c)); + c.op_to_addrspace = cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F | + FW_LDST_CMD_ADDRSPACE_V(ret)); + c.cycles_to_len16 = cpu_to_be32(FW_LEN16(c)); + c.u.idctxt.physid = cpu_to_be32(cid); + + ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c); + if (ret == 0) { + data[0] = be32_to_cpu(c.u.idctxt.ctxt_data0); + data[1] = be32_to_cpu(c.u.idctxt.ctxt_data1); + data[2] = be32_to_cpu(c.u.idctxt.ctxt_data2); + data[3] = be32_to_cpu(c.u.idctxt.ctxt_data3); + data[4] = be32_to_cpu(c.u.idctxt.ctxt_data4); + data[5] = be32_to_cpu(c.u.idctxt.ctxt_data5); + } + return ret; +} + +/** + * t4_sge_ctxt_rd_bd - read an SGE context bypassing FW + * @adap: the adapter + * @cid: the context id + * @ctype: the context type + * @data: where to store the context data + * + * Reads an SGE context directly, bypassing FW. This is only for + * debugging when FW is unavailable. + */ +int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid, + enum ctxt_type ctype, u32 *data) +{ + int i, ret; + + t4_write_reg(adap, SGE_CTXT_CMD_A, CTXTQID_V(cid) | CTXTTYPE_V(ctype)); + ret = t4_wait_op_done(adap, SGE_CTXT_CMD_A, BUSY_F, 0, 3, 1); + if (!ret) + for (i = SGE_CTXT_DATA0_A; i <= SGE_CTXT_DATA5_A; i += 4) + *data++ = t4_read_reg(adap, i); + return ret; +} + int t4_sched_params(struct adapter *adapter, int type, int level, int mode, int rateunit, int ratemode, int channel, int class, int minrate, int maxrate, int weight, int pktsize) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index 7f59ca458431..a964ed184356 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -47,6 +47,7 @@ enum { TCB_SIZE = 128, /* TCB size */ NMTUS = 16, /* size of MTU table */ NCCTRL_WIN = 32, /* # of congestion control windows */ + NTX_SCHED = 8, /* # of HW Tx scheduling queues */ PM_NSTATS = 5, /* # of PM stats */ T6_PM_NSTATS = 7, /* # of PM stats in T6 */ MBOX_LEN = 64, /* mailbox size in bytes */ @@ -67,6 +68,12 @@ enum { ULPRX_LA_SIZE = 512, /* # of 256-bit words in ULP_RX LA */ }; +/* SGE context types */ +enum ctxt_type { + CTXT_FLM = 2, + CTXT_CNM, +}; + enum { SF_PAGE_SIZE = 256, /* serial flash page size */ SF_SEC_SIZE = 64 * 1024, /* serial flash sector size */ @@ -78,6 +85,7 @@ enum { MBOX_OWNER_NONE, MBOX_OWNER_FW, MBOX_OWNER_DRV }; /* mailbox owners */ enum { SGE_MAX_WR_LEN = 512, /* max WR size in bytes */ + SGE_CTXT_SIZE = 24, /* size of SGE context */ SGE_NTIMERS = 6, /* # of interrupt holdoff timer values */ SGE_NCOUNTERS = 4, /* # of interrupt packet counter values */ SGE_MAX_IQ_SIZE = 65520, diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index b0ff78da8aa2..7e12f241145b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -50,6 +50,7 @@ enum { CPL_RX_DATA_ACK = 0xD, CPL_TX_PKT = 0xE, CPL_L2T_WRITE_REQ = 0x12, + CPL_SMT_WRITE_REQ = 0x14, CPL_TID_RELEASE = 0x1A, CPL_TX_DATA_ISO = 0x1F, @@ -60,6 +61,7 @@ enum { CPL_PEER_CLOSE = 0x26, CPL_ABORT_REQ_RSS = 0x2B, CPL_ABORT_RPL_RSS = 0x2D, + CPL_SMT_WRITE_RPL = 0x2E, CPL_RX_PHYS_ADDR = 0x30, CPL_CLOSE_CON_RPL = 0x32, @@ -284,6 +286,7 @@ struct work_request_hdr { #define RX_CHANNEL_S 26 #define RX_CHANNEL_V(x) ((x) << RX_CHANNEL_S) +#define RX_CHANNEL_F RX_CHANNEL_V(1U) #define WND_SCALE_EN_S 28 #define WND_SCALE_EN_V(x) ((x) << WND_SCALE_EN_S) @@ -313,6 +316,10 @@ struct cpl_pass_open_req { #define DELACK_V(x) ((x) << DELACK_S) #define DELACK_F DELACK_V(1U) +#define NON_OFFLOAD_S 7 +#define NON_OFFLOAD_V(x) ((x) << NON_OFFLOAD_S) +#define NON_OFFLOAD_F NON_OFFLOAD_V(1U) + #define DSCP_S 22 #define DSCP_M 0x3F #define DSCP_V(x) ((x) << DSCP_S) @@ -681,8 +688,8 @@ struct cpl_set_tcb_field { }; /* cpl_set_tcb_field.word_cookie fields */ -#define TCB_WORD_S 0 -#define TCB_WORD(x) ((x) << TCB_WORD_S) +#define TCB_WORD_S 0 +#define TCB_WORD_V(x) ((x) << TCB_WORD_S) #define TCB_COOKIE_S 5 #define TCB_COOKIE_M 0x7 @@ -1266,6 +1273,44 @@ struct cpl_l2t_write_rpl { u8 rsvd[3]; }; +struct cpl_smt_write_req { + WR_HDR; + union opcode_tid ot; + __be32 params; + __be16 pfvf1; + u8 src_mac1[6]; + __be16 pfvf0; + u8 src_mac0[6]; +}; + +struct cpl_t6_smt_write_req { + WR_HDR; + union opcode_tid ot; + __be32 params; + __be64 tag; + __be16 pfvf0; + u8 src_mac0[6]; + __be32 local_ip; + __be32 rsvd; +}; + +struct cpl_smt_write_rpl { + union opcode_tid ot; + u8 status; + u8 rsvd[3]; +}; + +/* cpl_smt_{read,write}_req.params fields */ +#define SMTW_OVLAN_IDX_S 16 +#define SMTW_OVLAN_IDX_V(x) ((x) << SMTW_OVLAN_IDX_S) + +#define SMTW_IDX_S 20 +#define SMTW_IDX_V(x) ((x) << SMTW_IDX_S) + +#define SMTW_NORPL_S 31 +#define SMTW_NORPL_V(x) ((x) << SMTW_NORPL_S) +#define SMTW_NORPL_F SMTW_NORPL_V(1U) + struct cpl_rdma_terminate { union opcode_tid ot; __be16 rsvd; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index aa28299aef5f..60cf9e02de5d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -176,6 +176,13 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x50a2), /* Custom T540-KR4 */ CH_PCI_ID_TABLE_FENTRY(0x50a3), /* Custom T580-KR4 */ CH_PCI_ID_TABLE_FENTRY(0x50a4), /* Custom 2x T540-CR */ + CH_PCI_ID_TABLE_FENTRY(0x50a5), /* Custom T522-BT */ + CH_PCI_ID_TABLE_FENTRY(0x50a6), /* Custom T522-BT-SO */ + CH_PCI_ID_TABLE_FENTRY(0x50a7), /* Custom T580-CR */ + CH_PCI_ID_TABLE_FENTRY(0x50a8), /* Custom T580-KR */ + CH_PCI_ID_TABLE_FENTRY(0x50a9), /* Custom T580-KR */ + CH_PCI_ID_TABLE_FENTRY(0x50aa), /* Custom T580-CR */ + CH_PCI_ID_TABLE_FENTRY(0x50ab), /* Custom T520-CR */ /* T6 adapters: */ @@ -197,6 +204,8 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x6082), /* Custom T6225-CR SFP28 */ CH_PCI_ID_TABLE_FENTRY(0x6083), /* Custom T62100-CR QSFP28 */ CH_PCI_ID_TABLE_FENTRY(0x6084), /* Custom T64100-CR QSFP28 */ + CH_PCI_ID_TABLE_FENTRY(0x6085), /* Custom T6240-SO */ + CH_PCI_ID_TABLE_FENTRY(0x6086), /* Custom T6225-SO-CR */ CH_PCI_DEVICE_ID_TABLE_DEFINE_END; #endif /* __T4_PCI_ID_TBL_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index dac90837842b..a7cfece72828 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -65,6 +65,9 @@ #define PCIE_FW_REG(reg_addr, idx) ((reg_addr) + (idx) * 4) +#define NUM_LE_DB_DBGI_REQ_DATA_INSTANCES 17 +#define NUM_LE_DB_DBGI_RSP_DATA_INSTANCES 17 + #define SGE_PF_KDOORBELL_A 0x0 #define QID_S 15 @@ -150,6 +153,23 @@ #define T6_DBVFIFO_SIZE_M 0x1fffU #define T6_DBVFIFO_SIZE_G(x) (((x) >> T6_DBVFIFO_SIZE_S) & T6_DBVFIFO_SIZE_M) +#define SGE_CTXT_CMD_A 0x11fc + +#define BUSY_S 31 +#define BUSY_V(x) ((x) << BUSY_S) +#define BUSY_F BUSY_V(1U) + +#define CTXTTYPE_S 24 +#define CTXTTYPE_M 0x3U +#define CTXTTYPE_V(x) ((x) << CTXTTYPE_S) + +#define CTXTQID_S 0 +#define CTXTQID_M 0x1ffffU +#define CTXTQID_V(x) ((x) << CTXTQID_S) + +#define SGE_CTXT_DATA0_A 0x1200 +#define SGE_CTXT_DATA5_A 0x1214 + #define GLOBALENABLE_S 0 #define GLOBALENABLE_V(x) ((x) << GLOBALENABLE_S) #define GLOBALENABLE_F GLOBALENABLE_V(1U) @@ -319,6 +339,16 @@ #define SGE_IMSG_CTXT_BADDR_A 0x1088 #define SGE_FLM_CACHE_BADDR_A 0x108c +#define SGE_FLM_CFG_A 0x1090 + +#define NOHDR_S 18 +#define NOHDR_V(x) ((x) << NOHDR_S) +#define NOHDR_F NOHDR_V(1U) + +#define HDRSTARTFLQ_S 11 +#define HDRSTARTFLQ_M 0x7U +#define HDRSTARTFLQ_G(x) (((x) >> HDRSTARTFLQ_S) & HDRSTARTFLQ_M) + #define SGE_INGRESS_RX_THRESHOLD_A 0x10a0 #define THRESHOLD_0_S 24 @@ -1415,6 +1445,7 @@ #define ROWINDEX_V(x) ((x) << ROWINDEX_S) #define TP_CCTRL_TABLE_A 0x7ddc +#define TP_PACE_TABLE_A 0x7dd8 #define TP_MTU_TABLE_A 0x7de4 #define MTUINDEX_S 24 @@ -1447,6 +1478,17 @@ #define LKPTBLQUEUE0_M 0x3ffU #define LKPTBLQUEUE0_G(x) (((x) >> LKPTBLQUEUE0_S) & LKPTBLQUEUE0_M) +#define TP_TM_PIO_ADDR_A 0x7e18 +#define TP_TM_PIO_DATA_A 0x7e1c +#define TP_MOD_CONFIG_A 0x7e24 + +#define TIMERMODE_S 8 +#define TIMERMODE_M 0xffU +#define TIMERMODE_G(x) (((x) >> TIMERMODE_S) & TIMERMODE_M) + +#define TP_TX_MOD_Q1_Q0_TIMER_SEPARATOR_A 0x3 +#define TP_TX_MOD_Q1_Q0_RATE_LIMIT_A 0x8 + #define TP_PIO_ADDR_A 0x7e40 #define TP_PIO_DATA_A 0x7e44 #define TP_MIB_INDEX_A 0x7e50 @@ -1627,6 +1669,10 @@ #define IESPI_PAR_ERROR_V(x) ((x) << IESPI_PAR_ERROR_S) #define IESPI_PAR_ERROR_F IESPI_PAR_ERROR_V(1U) +#define ULP_TX_LA_RDPTR_0_A 0x8ec0 +#define ULP_TX_LA_RDDATA_0_A 0x8ec4 +#define ULP_TX_LA_WRPTR_0_A 0x8ec8 + #define PMRX_E_PCMD_PAR_ERROR_S 0 #define PMRX_E_PCMD_PAR_ERROR_V(x) ((x) << PMRX_E_PCMD_PAR_ERROR_S) #define PMRX_E_PCMD_PAR_ERROR_F PMRX_E_PCMD_PAR_ERROR_V(1U) @@ -2257,6 +2303,35 @@ #define CHNENABLE_V(x) ((x) << CHNENABLE_S) #define CHNENABLE_F CHNENABLE_V(1U) +#define LE_DB_DBGI_CONFIG_A 0x19cf0 + +#define DBGICMDBUSY_S 3 +#define DBGICMDBUSY_V(x) ((x) << DBGICMDBUSY_S) +#define DBGICMDBUSY_F DBGICMDBUSY_V(1U) + +#define DBGICMDSTRT_S 2 +#define DBGICMDSTRT_V(x) ((x) << DBGICMDSTRT_S) +#define DBGICMDSTRT_F DBGICMDSTRT_V(1U) + +#define DBGICMDMODE_S 0 +#define DBGICMDMODE_M 0x3U +#define DBGICMDMODE_V(x) ((x) << DBGICMDMODE_S) + +#define LE_DB_DBGI_REQ_TCAM_CMD_A 0x19cf4 + +#define DBGICMD_S 20 +#define DBGICMD_M 0xfU +#define DBGICMD_V(x) ((x) << DBGICMD_S) + +#define DBGITID_S 0 +#define DBGITID_M 0xfffffU +#define DBGITID_V(x) ((x) << DBGITID_S) + +#define LE_DB_DBGI_REQ_DATA_A 0x19d00 +#define LE_DB_DBGI_RSP_STATUS_A 0x19d94 + +#define LE_DB_DBGI_RSP_DATA_A 0x19da0 + #define PRTENABLE_S 29 #define PRTENABLE_V(x) ((x) << PRTENABLE_S) #define PRTENABLE_F PRTENABLE_V(1U) @@ -2433,6 +2508,18 @@ #define MPS_CLS_TCAM_DATA0_A 0xf000 #define MPS_CLS_TCAM_DATA1_A 0xf004 +#define CTLREQID_S 30 +#define CTLREQID_V(x) ((x) << CTLREQID_S) + +#define MPS_VF_RPLCT_MAP0_A 0x1111c +#define MPS_VF_RPLCT_MAP1_A 0x11120 +#define MPS_VF_RPLCT_MAP2_A 0x11124 +#define MPS_VF_RPLCT_MAP3_A 0x11128 +#define MPS_VF_RPLCT_MAP4_A 0x11300 +#define MPS_VF_RPLCT_MAP5_A 0x11304 +#define MPS_VF_RPLCT_MAP6_A 0x11308 +#define MPS_VF_RPLCT_MAP7_A 0x1130c + #define VIDL_S 16 #define VIDL_M 0xffffU #define VIDL_G(x) (((x) >> VIDL_S) & VIDL_M) @@ -2457,6 +2544,10 @@ #define DATAVIDH1_M 0x7fU #define DATAVIDH1_G(x) (((x) >> DATAVIDH1_S) & DATAVIDH1_M) +#define MPS_CLS_TCAM_RDATA0_REQ_ID1_A 0xf020 +#define MPS_CLS_TCAM_RDATA1_REQ_ID1_A 0xf024 +#define MPS_CLS_TCAM_RDATA2_REQ_ID1_A 0xf028 + #define USED_S 16 #define USED_M 0x7ffU #define USED_G(x) (((x) >> USED_S) & USED_M) @@ -2850,10 +2941,20 @@ #define T6_LIPMISS_F T6_LIPMISS_V(1U) #define LE_DB_CONFIG_A 0x19c04 +#define LE_DB_ROUTING_TABLE_INDEX_A 0x19c10 +#define LE_DB_ACTIVE_TABLE_START_INDEX_A 0x19c10 +#define LE_DB_FILTER_TABLE_INDEX_A 0x19c14 #define LE_DB_SERVER_INDEX_A 0x19c18 #define LE_DB_SRVR_START_INDEX_A 0x19c18 +#define LE_DB_CLIP_TABLE_INDEX_A 0x19c1c #define LE_DB_ACT_CNT_IPV4_A 0x19c20 #define LE_DB_ACT_CNT_IPV6_A 0x19c24 +#define LE_DB_HASH_CONFIG_A 0x19c28 + +#define HASHTIDSIZE_S 16 +#define HASHTIDSIZE_M 0x3fU +#define HASHTIDSIZE_G(x) (((x) >> HASHTIDSIZE_S) & HASHTIDSIZE_M) + #define LE_DB_HASH_TID_BASE_A 0x19c30 #define LE_DB_HASH_TBL_BASE_ADDR_A 0x19c30 #define LE_DB_INT_CAUSE_A 0x19c3c @@ -2900,6 +3001,23 @@ #define SSRAMINTPERR_V(x) ((x) << SSRAMINTPERR_S) #define SSRAMINTPERR_F SSRAMINTPERR_V(1U) +#define LE_DB_RSP_CODE_0_A 0x19c74 + +#define TCAM_ACTV_HIT_S 0 +#define TCAM_ACTV_HIT_M 0x1fU +#define TCAM_ACTV_HIT_V(x) ((x) << TCAM_ACTV_HIT_S) +#define TCAM_ACTV_HIT_G(x) (((x) >> TCAM_ACTV_HIT_S) & TCAM_ACTV_HIT_M) + +#define LE_DB_RSP_CODE_1_A 0x19c78 + +#define HASH_ACTV_HIT_S 25 +#define HASH_ACTV_HIT_M 0x1fU +#define HASH_ACTV_HIT_V(x) ((x) << HASH_ACTV_HIT_S) +#define HASH_ACTV_HIT_G(x) (((x) >> HASH_ACTV_HIT_S) & HASH_ACTV_HIT_M) + +#define LE_3_DB_HASH_MASK_GEN_IPV4_T6_A 0x19eac +#define LE_4_DB_HASH_MASK_GEN_IPV4_T6_A 0x19eb0 + #define NCSI_INT_CAUSE_A 0x1a0d8 #define CIM_DM_PRTY_ERR_S 8 diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h new file mode 100644 index 000000000000..3297ce025e8b --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h @@ -0,0 +1,69 @@ +/* + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __T4_TCB_H +#define __T4_TCB_H + +#define TCB_SMAC_SEL_W 0 +#define TCB_SMAC_SEL_S 24 +#define TCB_SMAC_SEL_M 0xffULL +#define TCB_SMAC_SEL_V(x) ((x) << TCB_SMAC_SEL_S) + +#define TCB_T_FLAGS_W 1 + +#define TF_CCTRL_ECE_S 60 +#define TF_CCTRL_CWR_S 61 +#define TF_CCTRL_RFR_S 62 + +#define TCB_RSS_INFO_W 3 +#define TCB_RSS_INFO_S 0 +#define TCB_RSS_INFO_M 0x3ffULL +#define TCB_RSS_INFO_V(x) ((x) << TCB_RSS_INFO_S) + +#define TCB_TIMESTAMP_W 5 +#define TCB_TIMESTAMP_S 0 +#define TCB_TIMESTAMP_M 0xffffffffULL +#define TCB_TIMESTAMP_V(x) ((x) << TCB_TIMESTAMP_S) + +#define TCB_RTT_TS_RECENT_AGE_W 6 +#define TCB_RTT_TS_RECENT_AGE_S 0 +#define TCB_RTT_TS_RECENT_AGE_M 0xffffffffULL +#define TCB_RTT_TS_RECENT_AGE_V(x) ((x) << TCB_RTT_TS_RECENT_AGE_S) + +#define TCB_SND_UNA_RAW_W 10 +#define TCB_RX_FRAG2_PTR_RAW_W 27 +#define TCB_RX_FRAG3_LEN_RAW_W 29 +#define TCB_RX_FRAG3_START_IDX_OFFSET_RAW_W 30 +#define TCB_PDU_HDR_LEN_W 31 +#endif /* __T4_TCB_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index ca2756dcefc5..57eb4ad3485d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -105,7 +105,8 @@ enum fw_wr_opcodes { FW_ISCSI_TX_DATA_WR = 0x45, FW_PTP_TX_PKT_WR = 0x46, FW_CRYPTO_LOOKASIDE_WR = 0X6d, - FW_LASTC2E_WR = 0x70 + FW_LASTC2E_WR = 0x70, + FW_FILTER2_WR = 0x77 }; struct fw_wr_hdr { @@ -201,6 +202,51 @@ struct fw_filter_wr { __u8 sma[6]; }; +struct fw_filter2_wr { + __be32 op_pkd; + __be32 len16_pkd; + __be64 r3; + __be32 tid_to_iq; + __be32 del_filter_to_l2tix; + __be16 ethtype; + __be16 ethtypem; + __u8 frag_to_ovlan_vldm; + __u8 smac_sel; + __be16 rx_chan_rx_rpl_iq; + __be32 maci_to_matchtypem; + __u8 ptcl; + __u8 ptclm; + __u8 ttyp; + __u8 ttypm; + __be16 ivlan; + __be16 ivlanm; + __be16 ovlan; + __be16 ovlanm; + __u8 lip[16]; + __u8 lipm[16]; + __u8 fip[16]; + __u8 fipm[16]; + __be16 lp; + __be16 lpm; + __be16 fp; + __be16 fpm; + __be16 r7; + __u8 sma[6]; + __be16 r8; + __u8 filter_type_swapmac; + __u8 natmode_to_ulp_type; + __be16 newlport; + __be16 newfport; + __u8 newlip[16]; + __u8 newfip[16]; + __be32 natseqcheck; + __be32 r9; + __be64 r10; + __be64 r11; + __be64 r12; + __be64 r13; +}; + #define FW_FILTER_WR_TID_S 12 #define FW_FILTER_WR_TID_M 0xfffff #define FW_FILTER_WR_TID_V(x) ((x) << FW_FILTER_WR_TID_S) @@ -385,6 +431,32 @@ struct fw_filter_wr { #define FW_FILTER_WR_RX_RPL_IQ_G(x) \ (((x) >> FW_FILTER_WR_RX_RPL_IQ_S) & FW_FILTER_WR_RX_RPL_IQ_M) +#define FW_FILTER2_WR_FILTER_TYPE_S 1 +#define FW_FILTER2_WR_FILTER_TYPE_M 0x1 +#define FW_FILTER2_WR_FILTER_TYPE_V(x) ((x) << FW_FILTER2_WR_FILTER_TYPE_S) +#define FW_FILTER2_WR_FILTER_TYPE_G(x) \ + (((x) >> FW_FILTER2_WR_FILTER_TYPE_S) & FW_FILTER2_WR_FILTER_TYPE_M) +#define FW_FILTER2_WR_FILTER_TYPE_F FW_FILTER2_WR_FILTER_TYPE_V(1U) + +#define FW_FILTER2_WR_NATMODE_S 5 +#define FW_FILTER2_WR_NATMODE_M 0x7 +#define FW_FILTER2_WR_NATMODE_V(x) ((x) << FW_FILTER2_WR_NATMODE_S) +#define FW_FILTER2_WR_NATMODE_G(x) \ + (((x) >> FW_FILTER2_WR_NATMODE_S) & FW_FILTER2_WR_NATMODE_M) + +#define FW_FILTER2_WR_NATFLAGCHECK_S 4 +#define FW_FILTER2_WR_NATFLAGCHECK_M 0x1 +#define FW_FILTER2_WR_NATFLAGCHECK_V(x) ((x) << FW_FILTER2_WR_NATFLAGCHECK_S) +#define FW_FILTER2_WR_NATFLAGCHECK_G(x) \ + (((x) >> FW_FILTER2_WR_NATFLAGCHECK_S) & FW_FILTER2_WR_NATFLAGCHECK_M) +#define FW_FILTER2_WR_NATFLAGCHECK_F FW_FILTER2_WR_NATFLAGCHECK_V(1U) + +#define FW_FILTER2_WR_ULP_TYPE_S 0 +#define FW_FILTER2_WR_ULP_TYPE_M 0xf +#define FW_FILTER2_WR_ULP_TYPE_V(x) ((x) << FW_FILTER2_WR_ULP_TYPE_S) +#define FW_FILTER2_WR_ULP_TYPE_G(x) \ + (((x) >> FW_FILTER2_WR_ULP_TYPE_S) & FW_FILTER2_WR_ULP_TYPE_M) + #define FW_FILTER_WR_MACI_S 23 #define FW_FILTER_WR_MACI_M 0x1ff #define FW_FILTER_WR_MACI_V(x) ((x) << FW_FILTER_WR_MACI_S) @@ -1020,6 +1092,7 @@ enum fw_caps_config_switch { enum fw_caps_config_nic { FW_CAPS_CONFIG_NIC = 0x00000001, FW_CAPS_CONFIG_NIC_VM = 0x00000002, + FW_CAPS_CONFIG_NIC_HASHFILTER = 0x00000020, }; enum fw_caps_config_ofld { @@ -1127,6 +1200,7 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_SCFGREV = 0x1A, FW_PARAMS_PARAM_DEV_VPDREV = 0x1B, FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR = 0x1C, + FW_PARAMS_PARAM_DEV_FILTER2_WR = 0x1D, FW_PARAMS_PARAM_DEV_MPSBGMAP = 0x1E, }; @@ -1171,9 +1245,12 @@ enum fw_params_param_pfvf { FW_PARAMS_PARAM_PFVF_EQ_END = 0x2C, FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_START = 0x2D, FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_END = 0x2E, + FW_PARAMS_PARAM_PFVF_ETHOFLD_START = 0x2F, FW_PARAMS_PARAM_PFVF_ETHOFLD_END = 0x30, FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP = 0x31, - FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x32, + FW_PARAMS_PARAM_PFVF_HPFILTER_START = 0x32, + FW_PARAMS_PARAM_PFVF_HPFILTER_END = 0x33, + FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x39, FW_PARAMS_PARAM_PFVF_PORT_CAPS32 = 0x3A, }; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 8996ebbd222e..b48361cfdc78 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1401,6 +1401,63 @@ static int cxgb4vf_get_link_ksettings(struct net_device *dev, return 0; } +/* Translate the Firmware FEC value into the ethtool value. */ +static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec) +{ + unsigned int eth_fec = 0; + + if (fw_fec & FW_PORT_CAP32_FEC_RS) + eth_fec |= ETHTOOL_FEC_RS; + if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS) + eth_fec |= ETHTOOL_FEC_BASER; + + /* if nothing is set, then FEC is off */ + if (!eth_fec) + eth_fec = ETHTOOL_FEC_OFF; + + return eth_fec; +} + +/* Translate Common Code FEC value into ethtool value. */ +static inline unsigned int cc_to_eth_fec(unsigned int cc_fec) +{ + unsigned int eth_fec = 0; + + if (cc_fec & FEC_AUTO) + eth_fec |= ETHTOOL_FEC_AUTO; + if (cc_fec & FEC_RS) + eth_fec |= ETHTOOL_FEC_RS; + if (cc_fec & FEC_BASER_RS) + eth_fec |= ETHTOOL_FEC_BASER; + + /* if nothing is set, then FEC is off */ + if (!eth_fec) + eth_fec = ETHTOOL_FEC_OFF; + + return eth_fec; +} + +static int cxgb4vf_get_fecparam(struct net_device *dev, + struct ethtool_fecparam *fec) +{ + const struct port_info *pi = netdev_priv(dev); + const struct link_config *lc = &pi->link_cfg; + + /* Translate the Firmware FEC Support into the ethtool value. We + * always support IEEE 802.3 "automatic" selection of Link FEC type if + * any FEC is supported. + */ + fec->fec = fwcap_to_eth_fec(lc->pcaps); + if (fec->fec != ETHTOOL_FEC_OFF) + fec->fec |= ETHTOOL_FEC_AUTO; + + /* Translate the current internal FEC parameters into the + * ethtool values. + */ + fec->active_fec = cc_to_eth_fec(lc->fec); + return 0; +} + /* * Return our driver information. */ @@ -1774,6 +1831,7 @@ static void cxgb4vf_get_wol(struct net_device *dev, static const struct ethtool_ops cxgb4vf_ethtool_ops = { .get_link_ksettings = cxgb4vf_get_link_ksettings, + .get_fecparam = cxgb4vf_get_fecparam, .get_drvinfo = cxgb4vf_get_drvinfo, .get_msglevel = cxgb4vf_get_msglevel, .set_msglevel = cxgb4vf_set_msglevel, diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 05498e7f2840..14d7e673c656 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2058,9 +2058,9 @@ irq_handler_t t4vf_intr_handler(struct adapter *adapter) * when out of memory a queue can become empty. We schedule NAPI to do * the actual refill. */ -static void sge_rx_timer_cb(unsigned long data) +static void sge_rx_timer_cb(struct timer_list *t) { - struct adapter *adapter = (struct adapter *)data; + struct adapter *adapter = from_timer(adapter, t, sge.rx_timer); struct sge *s = &adapter->sge; unsigned int i; @@ -2117,9 +2117,9 @@ static void sge_rx_timer_cb(unsigned long data) * when no new packets are being submitted. This is essential for pktgen, * at least. */ -static void sge_tx_timer_cb(unsigned long data) +static void sge_tx_timer_cb(struct timer_list *t) { - struct adapter *adapter = (struct adapter *)data; + struct adapter *adapter = from_timer(adapter, t, sge.tx_timer); struct sge *s = &adapter->sge; unsigned int i, budget; @@ -2676,8 +2676,8 @@ int t4vf_sge_init(struct adapter *adapter) /* * Set up tasklet timers. */ - setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adapter); - setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adapter); + timer_setup(&s->rx_timer, sge_rx_timer_cb, 0); + timer_setup(&s->tx_timer, sge_tx_timer_cb, 0); /* * Initialize Forwarded Interrupt Queue lock. diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index a8d94963b4d0..67aec59a14e6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -1812,7 +1812,7 @@ int t4vf_eth_eq_free(struct adapter *adapter, unsigned int eqid) * * Returns a string representation of the Link Down Reason Code. */ -const char *t4vf_link_down_rc_str(unsigned char link_down_rc) +static const char *t4vf_link_down_rc_str(unsigned char link_down_rc) { static const char * const reason[] = { "Link Down", @@ -1838,8 +1838,8 @@ const char *t4vf_link_down_rc_str(unsigned char link_down_rc) * * Processes a GET_PORT_INFO FW reply message. */ -void t4vf_handle_get_port_info(struct port_info *pi, - const struct fw_port_cmd *cmd) +static void t4vf_handle_get_port_info(struct port_info *pi, + const struct fw_port_cmd *cmd) { int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16)); struct adapter *adapter = pi->adapter; diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index ba032ac9ae86..6a9527004cb1 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -33,7 +33,7 @@ #define DRV_NAME "enic" #define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver" -#define DRV_VERSION "2.3.0.42" +#define DRV_VERSION "2.3.0.45" #define DRV_COPYRIGHT "Copyright 2008-2013 Cisco Systems, Inc" #define ENIC_BARS_MAX 6 diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.h b/drivers/net/ethernet/cisco/enic/enic_clsf.h index d3c4a1cb0610..0ae83e091a62 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.h +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.h @@ -20,9 +20,8 @@ void enic_flow_may_expire(unsigned long data); static inline void enic_rfs_timer_start(struct enic *enic) { - init_timer(&enic->rfs_h.rfs_may_expire); - enic->rfs_h.rfs_may_expire.function = enic_flow_may_expire; - enic->rfs_h.rfs_may_expire.data = (unsigned long)enic; + setup_timer(&enic->rfs_h.rfs_may_expire, enic_flow_may_expire, + (unsigned long)enic); mod_timer(&enic->rfs_h.rfs_may_expire, jiffies + HZ/4); } diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index fd3980cc1e34..462d0ce51240 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -176,6 +176,81 @@ static void enic_get_strings(struct net_device *netdev, u32 stringset, } } +static void enic_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct enic *enic = netdev_priv(netdev); + struct vnic_enet_config *c = &enic->config; + + ring->rx_max_pending = ENIC_MAX_RQ_DESCS; + ring->rx_pending = c->rq_desc_count; + ring->tx_max_pending = ENIC_MAX_WQ_DESCS; + ring->tx_pending = c->wq_desc_count; +} + +static int enic_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct enic *enic = netdev_priv(netdev); + struct vnic_enet_config *c = &enic->config; + int running = netif_running(netdev); + unsigned int rx_pending; + unsigned int tx_pending; + int err = 0; + + if (ring->rx_mini_max_pending || ring->rx_mini_pending) { + netdev_info(netdev, + "modifying mini ring params is not supported"); + return -EINVAL; + } + if (ring->rx_jumbo_max_pending || ring->rx_jumbo_pending) { + netdev_info(netdev, + "modifying jumbo ring params is not supported"); + return -EINVAL; + } + rx_pending = c->rq_desc_count; + tx_pending = c->wq_desc_count; + if (ring->rx_pending > ENIC_MAX_RQ_DESCS || + ring->rx_pending < ENIC_MIN_RQ_DESCS) { + netdev_info(netdev, "rx pending (%u) not in range [%u,%u]", + ring->rx_pending, ENIC_MIN_RQ_DESCS, + ENIC_MAX_RQ_DESCS); + return -EINVAL; + } + if (ring->tx_pending > ENIC_MAX_WQ_DESCS || + ring->tx_pending < ENIC_MIN_WQ_DESCS) { + netdev_info(netdev, "tx pending (%u) not in range [%u,%u]", + ring->tx_pending, ENIC_MIN_WQ_DESCS, + ENIC_MAX_WQ_DESCS); + return -EINVAL; + } + if (running) + dev_close(netdev); + c->rq_desc_count = + ring->rx_pending & 0xffffffe0; /* must be aligned to groups of 32 */ + c->wq_desc_count = + ring->tx_pending & 0xffffffe0; /* must be aligned to groups of 32 */ + enic_free_vnic_resources(enic); + err = enic_alloc_vnic_resources(enic); + if (err) { + netdev_err(netdev, + "Failed to alloc vNIC resources, aborting\n"); + enic_free_vnic_resources(enic); + goto err_out; + } + enic_init_vnic_resources(enic); + if (running) { + err = dev_open(netdev); + if (err) + goto err_out; + } + return 0; +err_out: + c->rq_desc_count = rx_pending; + c->wq_desc_count = tx_pending; + return err; +} + static int enic_get_sset_count(struct net_device *netdev, int sset) { switch (sset) { @@ -509,6 +584,8 @@ static const struct ethtool_ops enic_ethtool_ops = { .set_msglevel = enic_set_msglevel, .get_link = ethtool_op_get_link, .get_strings = enic_get_strings, + .get_ringparam = enic_get_ringparam, + .set_ringparam = enic_set_ringparam, .get_sset_count = enic_get_sset_count, .get_ethtool_stats = enic_get_ethtool_stats, .get_coalesce = enic_get_coalesce, diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index d24ee1ad3be1..4a11baffe02d 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2846,9 +2846,8 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup notification timer, HW reset task, and wq locks */ - init_timer(&enic->notify_timer); - enic->notify_timer.function = enic_notify_timer; - enic->notify_timer.data = (unsigned long)enic; + setup_timer(&enic->notify_timer, enic_notify_timer, + (unsigned long)enic); enic_set_rx_coal_setting(enic); INIT_WORK(&enic->reset, enic_reset); diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.c b/drivers/net/ethernet/cisco/enic/vnic_rq.c index 36bc2c71fba9..f8aa326d1d58 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.c +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.c @@ -139,20 +139,8 @@ void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, unsigned int error_interrupt_enable, unsigned int error_interrupt_offset) { - u32 fetch_index = 0; - - /* Use current fetch_index as the ring starting point */ - fetch_index = ioread32(&rq->ctrl->fetch_index); - - if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone */ - /* Hardware surprise removal: reset fetch_index */ - fetch_index = 0; - } - - vnic_rq_init_start(rq, cq_index, - fetch_index, fetch_index, - error_interrupt_enable, - error_interrupt_offset); + vnic_rq_init_start(rq, cq_index, 0, 0, error_interrupt_enable, + error_interrupt_offset); } unsigned int vnic_rq_error_status(struct vnic_rq *rq) diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c index c87b8cc42963..13430f75496c 100644 --- a/drivers/net/ethernet/dec/tulip/de2104x.c +++ b/drivers/net/ethernet/dec/tulip/de2104x.c @@ -333,8 +333,8 @@ static void de_set_rx_mode (struct net_device *dev); static void de_tx (struct de_private *de); static void de_clean_rings (struct de_private *de); static void de_media_interrupt (struct de_private *de, u32 status); -static void de21040_media_timer (unsigned long data); -static void de21041_media_timer (unsigned long data); +static void de21040_media_timer (struct timer_list *t); +static void de21041_media_timer (struct timer_list *t); static unsigned int de_ok_to_advertise (struct de_private *de, u32 new_media); @@ -959,9 +959,9 @@ static void de_next_media (struct de_private *de, const u32 *media, } } -static void de21040_media_timer (unsigned long data) +static void de21040_media_timer (struct timer_list *t) { - struct de_private *de = (struct de_private *) data; + struct de_private *de = from_timer(de, t, media_timer); struct net_device *dev = de->dev; u32 status = dr32(SIAStatus); unsigned int carrier; @@ -1040,9 +1040,9 @@ static unsigned int de_ok_to_advertise (struct de_private *de, u32 new_media) return 1; } -static void de21041_media_timer (unsigned long data) +static void de21041_media_timer (struct timer_list *t) { - struct de_private *de = (struct de_private *) data; + struct de_private *de = from_timer(de, t, media_timer); struct net_device *dev = de->dev; u32 status = dr32(SIAStatus); unsigned int carrier; @@ -1999,12 +1999,9 @@ static int de_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) de->msg_enable = (debug < 0 ? DE_DEF_MSG_ENABLE : debug); de->board_idx = board_idx; spin_lock_init (&de->lock); - init_timer(&de->media_timer); - if (de->de21040) - de->media_timer.function = de21040_media_timer; - else - de->media_timer.function = de21041_media_timer; - de->media_timer.data = (unsigned long) de; + timer_setup(&de->media_timer, + de->de21040 ? de21040_media_timer : de21041_media_timer, + 0); netif_carrier_off(dev); diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 0affee9c8aa2..a31b4df3e7ff 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -912,7 +912,7 @@ static int de4x5_init(struct net_device *dev); static int de4x5_sw_reset(struct net_device *dev); static int de4x5_rx(struct net_device *dev); static int de4x5_tx(struct net_device *dev); -static void de4x5_ast(struct net_device *dev); +static void de4x5_ast(struct timer_list *t); static int de4x5_txur(struct net_device *dev); static int de4x5_rx_ovfc(struct net_device *dev); @@ -1147,9 +1147,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev) lp->timeout = -1; lp->gendev = gendev; spin_lock_init(&lp->lock); - init_timer(&lp->timer); - lp->timer.function = (void (*)(unsigned long))de4x5_ast; - lp->timer.data = (unsigned long)dev; + timer_setup(&lp->timer, de4x5_ast, 0); de4x5_parse_params(dev); /* @@ -1742,9 +1740,10 @@ de4x5_tx(struct net_device *dev) } static void -de4x5_ast(struct net_device *dev) +de4x5_ast(struct timer_list *t) { - struct de4x5_private *lp = netdev_priv(dev); + struct de4x5_private *lp = from_timer(lp, t, timer); + struct net_device *dev = dev_get_drvdata(lp->gendev); int next_tick = DE4X5_AUTOSENSE_MS; int dt; @@ -2370,7 +2369,7 @@ autoconf_media(struct net_device *dev) lp->media = INIT; lp->tcount = 0; - de4x5_ast(dev); + de4x5_ast(&lp->timer); return lp->media; } diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 07e10a45beaa..17ef7a28873d 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -331,7 +331,7 @@ static void dmfe_phy_write_1bit(void __iomem *, u32); static u16 dmfe_phy_read_1bit(void __iomem *); static u8 dmfe_sense_speed(struct dmfe_board_info *); static void dmfe_process_mode(struct dmfe_board_info *); -static void dmfe_timer(unsigned long); +static void dmfe_timer(struct timer_list *); static inline u32 cal_CRC(unsigned char *, unsigned int, u8); static void dmfe_rx_packet(struct net_device *, struct dmfe_board_info *); static void dmfe_free_tx_pkt(struct net_device *, struct dmfe_board_info *); @@ -596,10 +596,8 @@ static int dmfe_open(struct net_device *dev) netif_wake_queue(dev); /* set and active a timer process */ - init_timer(&db->timer); + timer_setup(&db->timer, dmfe_timer, 0); db->timer.expires = DMFE_TIMER_WUT + HZ * 2; - db->timer.data = (unsigned long)dev; - db->timer.function = dmfe_timer; add_timer(&db->timer); return 0; @@ -1130,10 +1128,10 @@ static const struct ethtool_ops netdev_ethtool_ops = { * Dynamic media sense, allocate Rx buffer... */ -static void dmfe_timer(unsigned long data) +static void dmfe_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct dmfe_board_info *db = netdev_priv(dev); + struct dmfe_board_info *db = from_timer(db, t, timer); + struct net_device *dev = pci_get_drvdata(db->pdev); void __iomem *ioaddr = db->ioaddr; u32 tmp_cr8; unsigned char tmp_cr12; diff --git a/drivers/net/ethernet/dec/tulip/interrupt.c b/drivers/net/ethernet/dec/tulip/interrupt.c index 8df80880ecaa..c1ca0765d56d 100644 --- a/drivers/net/ethernet/dec/tulip/interrupt.c +++ b/drivers/net/ethernet/dec/tulip/interrupt.c @@ -102,10 +102,10 @@ int tulip_refill_rx(struct net_device *dev) #ifdef CONFIG_TULIP_NAPI -void oom_timer(unsigned long data) +void oom_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = from_timer(tp, t, oom_timer); + napi_schedule(&tp->napi); } diff --git a/drivers/net/ethernet/dec/tulip/pnic.c b/drivers/net/ethernet/dec/tulip/pnic.c index 7bcccf5cac7a..3fb39e32e1b4 100644 --- a/drivers/net/ethernet/dec/tulip/pnic.c +++ b/drivers/net/ethernet/dec/tulip/pnic.c @@ -84,10 +84,10 @@ void pnic_lnk_change(struct net_device *dev, int csr5) } } -void pnic_timer(unsigned long data) +void pnic_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = from_timer(tp, t, timer); + struct net_device *dev = tp->dev; void __iomem *ioaddr = tp->base_addr; int next_tick = 60*HZ; diff --git a/drivers/net/ethernet/dec/tulip/pnic2.c b/drivers/net/ethernet/dec/tulip/pnic2.c index 5895fc43f6e0..412adaa7fdf8 100644 --- a/drivers/net/ethernet/dec/tulip/pnic2.c +++ b/drivers/net/ethernet/dec/tulip/pnic2.c @@ -76,10 +76,10 @@ #include <linux/delay.h> -void pnic2_timer(unsigned long data) +void pnic2_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = from_timer(tp, t, timer); + struct net_device *dev = tp->dev; void __iomem *ioaddr = tp->base_addr; int next_tick = 60*HZ; diff --git a/drivers/net/ethernet/dec/tulip/timer.c b/drivers/net/ethernet/dec/tulip/timer.c index 523d9dde50a2..642e9dfc5451 100644 --- a/drivers/net/ethernet/dec/tulip/timer.c +++ b/drivers/net/ethernet/dec/tulip/timer.c @@ -137,10 +137,10 @@ void tulip_media_task(struct work_struct *work) } -void mxic_timer(unsigned long data) +void mxic_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = from_timer(tp, t, timer); + struct net_device *dev = tp->dev; void __iomem *ioaddr = tp->base_addr; int next_tick = 60*HZ; @@ -154,10 +154,10 @@ void mxic_timer(unsigned long data) } -void comet_timer(unsigned long data) +void comet_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = from_timer(tp, t, timer); + struct net_device *dev = tp->dev; int next_tick = 2*HZ; if (tulip_debug > 1) diff --git a/drivers/net/ethernet/dec/tulip/tulip.h b/drivers/net/ethernet/dec/tulip/tulip.h index 06660dbc44b7..b458140aeaef 100644 --- a/drivers/net/ethernet/dec/tulip/tulip.h +++ b/drivers/net/ethernet/dec/tulip/tulip.h @@ -43,7 +43,7 @@ struct tulip_chip_table { int io_size; int valid_intrs; /* CSR7 interrupt enable settings */ int flags; - void (*media_timer) (unsigned long); + void (*media_timer) (struct timer_list *); work_func_t media_task; }; @@ -476,7 +476,7 @@ void t21142_lnk_change(struct net_device *dev, int csr5); /* PNIC2.c */ void pnic2_lnk_change(struct net_device *dev, int csr5); -void pnic2_timer(unsigned long data); +void pnic2_timer(struct timer_list *t); void pnic2_start_nway(struct net_device *dev); void pnic2_lnk_change(struct net_device *dev, int csr5); @@ -504,19 +504,19 @@ void tulip_find_mii (struct net_device *dev, int board_idx); /* pnic.c */ void pnic_do_nway(struct net_device *dev); void pnic_lnk_change(struct net_device *dev, int csr5); -void pnic_timer(unsigned long data); +void pnic_timer(struct timer_list *t); /* timer.c */ void tulip_media_task(struct work_struct *work); -void mxic_timer(unsigned long data); -void comet_timer(unsigned long data); +void mxic_timer(struct timer_list *t); +void comet_timer(struct timer_list *t); /* tulip_core.c */ extern int tulip_debug; extern const char * const medianame[]; extern const char tulip_media_cap[]; extern const struct tulip_chip_table tulip_tbl[]; -void oom_timer(unsigned long data); +void oom_timer(struct timer_list *t); extern u8 t21040_csr13[]; static inline void tulip_start_rxtx(struct tulip_private *tp) diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 851b6d1f5a42..00d02a0967d0 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -123,10 +123,10 @@ int tulip_debug = TULIP_DEBUG; int tulip_debug = 1; #endif -static void tulip_timer(unsigned long data) +static void tulip_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = from_timer(tp, t, timer); + struct net_device *dev = tp->dev; if (netif_running(dev)) schedule_work(&tp->media_work); @@ -505,7 +505,7 @@ media_picked: tp->timer.expires = RUN_AT(next_tick); add_timer(&tp->timer); #ifdef CONFIG_TULIP_NAPI - setup_timer(&tp->oom_timer, oom_timer, (unsigned long)dev); + timer_setup(&tp->oom_timer, oom_timer, 0); #endif } @@ -780,8 +780,7 @@ static void tulip_down (struct net_device *dev) spin_unlock_irqrestore (&tp->lock, flags); - setup_timer(&tp->timer, tulip_tbl[tp->chip_id].media_timer, - (unsigned long)dev); + timer_setup(&tp->timer, tulip_tbl[tp->chip_id].media_timer, 0); dev->if_port = tp->saved_if_port; @@ -1470,8 +1469,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->csr0 = csr0; spin_lock_init(&tp->lock); spin_lock_init(&tp->mii_lock); - setup_timer(&tp->timer, tulip_tbl[tp->chip_id].media_timer, - (unsigned long)dev); + timer_setup(&tp->timer, tulip_tbl[tp->chip_id].media_timer, 0); INIT_WORK(&tp->media_work, tulip_tbl[tp->chip_id].media_task); diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index 7fc248efc4ba..488a744084c9 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -241,7 +241,7 @@ static void phy_write_1bit(struct uli526x_board_info *db, u32); static u16 phy_read_1bit(struct uli526x_board_info *db); static u8 uli526x_sense_speed(struct uli526x_board_info *); static void uli526x_process_mode(struct uli526x_board_info *); -static void uli526x_timer(unsigned long); +static void uli526x_timer(struct timer_list *t); static void uli526x_rx_packet(struct net_device *, struct uli526x_board_info *); static void uli526x_free_tx_pkt(struct net_device *, struct uli526x_board_info *); static void uli526x_reuse_skb(struct uli526x_board_info *, struct sk_buff *); @@ -491,10 +491,8 @@ static int uli526x_open(struct net_device *dev) netif_wake_queue(dev); /* set and active a timer process */ - init_timer(&db->timer); + timer_setup(&db->timer, uli526x_timer, 0); db->timer.expires = ULI526X_TIMER_WUT + HZ * 2; - db->timer.data = (unsigned long)dev; - db->timer.function = uli526x_timer; add_timer(&db->timer); return 0; @@ -1023,10 +1021,10 @@ static const struct ethtool_ops netdev_ethtool_ops = { * Dynamic media sense, allocate Rx buffer... */ -static void uli526x_timer(unsigned long data) +static void uli526x_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct uli526x_board_info *db = netdev_priv(dev); + struct uli526x_board_info *db = from_timer(db, t, timer); + struct net_device *dev = pci_get_drvdata(db->pdev); struct uli_phy_ops *phy = &db->phy; void __iomem *ioaddr = db->ioaddr; unsigned long flags; diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 32d7229544fa..70cb2d689c2c 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -327,7 +327,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int netdev_open(struct net_device *dev); static int update_link(struct net_device *dev); -static void netdev_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); static void init_rxtx_rings(struct net_device *dev); static void free_rxtx_rings(struct netdev_private *np); static void init_registers(struct net_device *dev); @@ -655,10 +655,8 @@ static int netdev_open(struct net_device *dev) netdev_dbg(dev, "Done netdev_open()\n"); /* Set the timer to check for link beat. */ - init_timer(&np->timer); + timer_setup(&np->timer, netdev_timer, 0); np->timer.expires = jiffies + 1*HZ; - np->timer.data = (unsigned long)dev; - np->timer.function = netdev_timer; /* timer handler */ add_timer(&np->timer); return 0; out_err: @@ -774,10 +772,10 @@ static inline void update_csr6(struct net_device *dev, int new) np->mii_if.full_duplex = 1; } -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = pci_get_drvdata(np->pci_dev); void __iomem *ioaddr = np->base_addr; if (debug > 2) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 778f974e2928..f0536b16b3c3 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -68,7 +68,7 @@ static const int max_intrloop = 50; static const int multicast_filter_limit = 0x40; static int rio_open (struct net_device *dev); -static void rio_timer (unsigned long data); +static void rio_timer (struct timer_list *t); static void rio_tx_timeout (struct net_device *dev); static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev); static irqreturn_t rio_interrupt (int irq, void *dev_instance); @@ -313,7 +313,7 @@ find_miiphy (struct net_device *dev) { struct netdev_private *np = netdev_priv(dev); int i, phy_found = 0; - np = netdev_priv(dev); + np->phy_addr = 1; for (i = 31; i >= 0; i--) { @@ -644,7 +644,7 @@ static int rio_open(struct net_device *dev) return i; } - setup_timer(&np->timer, rio_timer, (unsigned long)dev); + timer_setup(&np->timer, rio_timer, 0); np->timer.expires = jiffies + 1 * HZ; add_timer(&np->timer); @@ -655,10 +655,10 @@ static int rio_open(struct net_device *dev) } static void -rio_timer (unsigned long data) +rio_timer (struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = pci_get_drvdata(np->pdev); unsigned int entry; int next_tick = 1*HZ; unsigned long flags; diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 2704bcf023be..1a27176381fb 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -431,7 +431,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int mdio_wait_link(struct net_device *dev, int wait); static int netdev_open(struct net_device *dev); static void check_duplex(struct net_device *dev); -static void netdev_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); static void tx_timeout(struct net_device *dev); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); @@ -913,10 +913,8 @@ static int netdev_open(struct net_device *dev) ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + MACCtrl0)); /* Set the timer to check for link beat. */ - init_timer(&np->timer); + timer_setup(&np->timer, netdev_timer, 0); np->timer.expires = jiffies + 3*HZ; - np->timer.data = (unsigned long)dev; - np->timer.function = netdev_timer; /* timer handler */ add_timer(&np->timer); /* Enable interrupts by setting the interrupt mask. */ @@ -953,10 +951,10 @@ static void check_duplex(struct net_device *dev) } } -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->mii_if.dev; void __iomem *ioaddr = np->base; int next_tick = 10*HZ; diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 9ed8e4b81530..78db8e62a83f 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -21,6 +21,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/clk.h> #include <linux/dma-mapping.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> @@ -59,6 +60,9 @@ /* Min number of tx ring entries before stopping queue */ #define TX_THRESHOLD (MAX_SKB_FRAGS + 1) +#define FTGMAC_100MHZ 100000000 +#define FTGMAC_25MHZ 25000000 + struct ftgmac100 { /* Registers */ struct resource *res; @@ -96,6 +100,7 @@ struct ftgmac100 { struct napi_struct napi; struct work_struct reset_task; struct mii_bus *mii_bus; + struct clk *clk; /* Link management */ int cur_speed; @@ -1734,6 +1739,22 @@ static void ftgmac100_ncsi_handler(struct ncsi_dev *nd) nd->link_up ? "up" : "down"); } +static void ftgmac100_setup_clk(struct ftgmac100 *priv) +{ + priv->clk = devm_clk_get(priv->dev, NULL); + if (IS_ERR(priv->clk)) + return; + + clk_prepare_enable(priv->clk); + + /* Aspeed specifies a 100MHz clock is required for up to + * 1000Mbit link speeds. As NCSI is limited to 100Mbit, 25MHz + * is sufficient + */ + clk_set_rate(priv->clk, priv->use_ncsi ? FTGMAC_25MHZ : + FTGMAC_100MHZ); +} + static int ftgmac100_probe(struct platform_device *pdev) { struct resource *res; @@ -1830,6 +1851,9 @@ static int ftgmac100_probe(struct platform_device *pdev) goto err_setup_mdio; } + if (priv->is_aspeed) + ftgmac100_setup_clk(priv); + /* Default ring sizes */ priv->rx_q_entries = priv->new_rx_q_entries = DEF_RX_QUEUE_ENTRIES; priv->tx_q_entries = priv->new_tx_q_entries = DEF_TX_QUEUE_ENTRIES; @@ -1883,6 +1907,8 @@ static int ftgmac100_remove(struct platform_device *pdev) unregister_netdev(netdev); + clk_disable_unprepare(priv->clk); + /* There's a small chance the reset task will have been re-queued, * during stop, make sure it's gone before we free the structure. */ diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 66928a922824..aecc76504b69 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -402,6 +402,7 @@ static bool ftmac100_rx_packet(struct ftmac100 *priv, int *processed) struct page *page; dma_addr_t map; int length; + bool ret; rxdes = ftmac100_rx_locate_first_segment(priv); if (!rxdes) @@ -416,8 +417,8 @@ static bool ftmac100_rx_packet(struct ftmac100 *priv, int *processed) * It is impossible to get multi-segment packets * because we always provide big enough receive buffers. */ - if (unlikely(!ftmac100_rxdes_last_segment(rxdes))) - BUG(); + ret = ftmac100_rxdes_last_segment(rxdes); + BUG_ON(!ret); /* start processing */ skb = netdev_alloc_skb_ip_align(netdev, 128); diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index e92859dab7ae..23053919ebf5 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -426,8 +426,8 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val static int netdev_open(struct net_device *dev); static void getlinktype(struct net_device *dev); static void getlinkstatus(struct net_device *dev); -static void netdev_timer(unsigned long data); -static void reset_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); +static void reset_timer(struct timer_list *t); static void fealnx_tx_timeout(struct net_device *dev); static void init_ring(struct net_device *dev); static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); @@ -909,17 +909,13 @@ static int netdev_open(struct net_device *dev) printk(KERN_DEBUG "%s: Done netdev_open().\n", dev->name); /* Set the timer to check for link beat. */ - init_timer(&np->timer); + timer_setup(&np->timer, netdev_timer, 0); np->timer.expires = RUN_AT(3 * HZ); - np->timer.data = (unsigned long) dev; - np->timer.function = netdev_timer; /* timer handler */ add_timer(&np->timer); - init_timer(&np->reset_timer); - np->reset_timer.data = (unsigned long) dev; - np->reset_timer.function = reset_timer; + timer_setup(&np->reset_timer, reset_timer, 0); np->reset_timer_armed = 0; return rc; } @@ -1082,10 +1078,10 @@ static void allocate_rx_buffers(struct net_device *dev) } -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->mii.dev; void __iomem *ioaddr = np->mem; int old_crvalue = np->crvalue; unsigned int old_linkok = np->linkok; @@ -1171,10 +1167,10 @@ static void enable_rxtx(struct net_device *dev) } -static void reset_timer(unsigned long data) +static void reset_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, reset_timer); + struct net_device *dev = np->mii.dev; unsigned long flags; printk(KERN_WARNING "%s: resetting tx and rx machinery\n", dev->name); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 42258060f142..7caa8da48421 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -351,7 +351,7 @@ static int dpaa_setup_tc(struct net_device *net_dev, enum tc_setup_type type, u8 num_tc; int i; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; @@ -385,34 +385,19 @@ out: static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev) { - struct platform_device *of_dev; struct dpaa_eth_data *eth_data; - struct device *dpaa_dev, *dev; - struct device_node *mac_node; + struct device *dpaa_dev; struct mac_device *mac_dev; dpaa_dev = &pdev->dev; eth_data = dpaa_dev->platform_data; - if (!eth_data) + if (!eth_data) { + dev_err(dpaa_dev, "eth_data missing\n"); return ERR_PTR(-ENODEV); - - mac_node = eth_data->mac_node; - - of_dev = of_find_device_by_node(mac_node); - if (!of_dev) { - dev_err(dpaa_dev, "of_find_device_by_node(%pOF) failed\n", - mac_node); - of_node_put(mac_node); - return ERR_PTR(-EINVAL); } - of_node_put(mac_node); - - dev = &of_dev->dev; - - mac_dev = dev_get_drvdata(dev); + mac_dev = eth_data->mac_dev; if (!mac_dev) { - dev_err(dpaa_dev, "dev_get_drvdata(%s) failed\n", - dev_name(dev)); + dev_err(dpaa_dev, "mac_dev missing\n"); return ERR_PTR(-EINVAL); } @@ -1736,6 +1721,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, /* Iterate through the SGT entries and add data buffers to the skb */ sgt = vaddr + fd_off; + skb = NULL; for (i = 0; i < DPAA_SGT_MAX_ENTRIES; i++) { /* Extension bit is not supported */ WARN_ON(qm_sg_entry_is_ext(&sgt[i])); @@ -1753,7 +1739,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, count_ptr = this_cpu_ptr(dpaa_bp->percpu_count); dma_unmap_single(dpaa_bp->dev, sg_addr, dpaa_bp->size, DMA_FROM_DEVICE); - if (i == 0) { + if (!skb) { sz = dpaa_bp->size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); skb = build_skb(sg_vaddr, sz); @@ -2435,6 +2421,44 @@ static void dpaa_eth_napi_disable(struct dpaa_priv *priv) } } +static void dpaa_adjust_link(struct net_device *net_dev) +{ + struct mac_device *mac_dev; + struct dpaa_priv *priv; + + priv = netdev_priv(net_dev); + mac_dev = priv->mac_dev; + mac_dev->adjust_link(mac_dev); +} + +static int dpaa_phy_init(struct net_device *net_dev) +{ + struct mac_device *mac_dev; + struct phy_device *phy_dev; + struct dpaa_priv *priv; + + priv = netdev_priv(net_dev); + mac_dev = priv->mac_dev; + + phy_dev = of_phy_connect(net_dev, mac_dev->phy_node, + &dpaa_adjust_link, 0, + mac_dev->phy_if); + if (!phy_dev) { + netif_err(priv, ifup, net_dev, "init_phy() failed\n"); + return -ENODEV; + } + + /* Remove any features not supported by the controller */ + phy_dev->supported &= mac_dev->if_support; + phy_dev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause); + phy_dev->advertising = phy_dev->supported; + + mac_dev->phy_dev = phy_dev; + net_dev->phydev = phy_dev; + + return 0; +} + static int dpaa_open(struct net_device *net_dev) { struct mac_device *mac_dev; @@ -2445,12 +2469,9 @@ static int dpaa_open(struct net_device *net_dev) mac_dev = priv->mac_dev; dpaa_eth_napi_enable(priv); - net_dev->phydev = mac_dev->init_phy(net_dev, priv->mac_dev); - if (!net_dev->phydev) { - netif_err(priv, ifup, net_dev, "init_phy() failed\n"); - err = -ENODEV; + err = dpaa_phy_init(net_dev); + if (err) goto phy_init_failed; - } for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) { err = fman_port_enable(mac_dev->port[i]); @@ -2649,7 +2670,6 @@ static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl) static int dpaa_eth_probe(struct platform_device *pdev) { struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM] = {NULL}; - struct dpaa_percpu_priv *percpu_priv; struct net_device *net_dev = NULL; struct dpaa_fq *dpaa_fq, *tmp; struct dpaa_priv *priv = NULL; @@ -2658,7 +2678,13 @@ static int dpaa_eth_probe(struct platform_device *pdev) int err = 0, i, channel; struct device *dev; - dev = &pdev->dev; + /* device used for DMA mapping */ + dev = pdev->dev.parent; + err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40)); + if (err) { + dev_err(dev, "dma_coerce_mask_and_coherent() failed\n"); + return err; + } /* Allocate this early, so we can store relevant information in * the private area @@ -2666,7 +2692,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA_ETH_TXQ_NUM); if (!net_dev) { dev_err(dev, "alloc_etherdev_mq() failed\n"); - goto alloc_etherdev_mq_failed; + return -ENOMEM; } /* Do this here, so we can be verbose early */ @@ -2682,7 +2708,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) if (IS_ERR(mac_dev)) { dev_err(dev, "dpaa_mac_dev_get() failed\n"); err = PTR_ERR(mac_dev); - goto mac_probe_failed; + goto free_netdev; } /* If fsl_fm_max_frm is set to a higher value than the all-common 1500, @@ -2700,21 +2726,13 @@ static int dpaa_eth_probe(struct platform_device *pdev) priv->buf_layout[RX].priv_data_size = DPAA_RX_PRIV_DATA_SIZE; /* Rx */ priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */ - /* device used for DMA mapping */ - set_dma_ops(dev, get_dma_ops(&pdev->dev)); - err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40)); - if (err) { - dev_err(dev, "dma_coerce_mask_and_coherent() failed\n"); - goto dev_mask_failed; - } - /* bp init */ for (i = 0; i < DPAA_BPS_NUM; i++) { - int err; - dpaa_bps[i] = dpaa_bp_alloc(dev); - if (IS_ERR(dpaa_bps[i])) - return PTR_ERR(dpaa_bps[i]); + if (IS_ERR(dpaa_bps[i])) { + err = PTR_ERR(dpaa_bps[i]); + goto free_dpaa_bps; + } /* the raw size of the buffers used for reception */ dpaa_bps[i]->raw_size = bpool_buffer_raw_size(i, DPAA_BPS_NUM); /* avoid runtime computations by keeping the usable size here */ @@ -2722,11 +2740,8 @@ static int dpaa_eth_probe(struct platform_device *pdev) dpaa_bps[i]->dev = dev; err = dpaa_bp_alloc_pool(dpaa_bps[i]); - if (err < 0) { - dpaa_bps_free(priv); - priv->dpaa_bps[i] = NULL; - goto bp_create_failed; - } + if (err < 0) + goto free_dpaa_bps; priv->dpaa_bps[i] = dpaa_bps[i]; } @@ -2737,7 +2752,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) err = dpaa_alloc_all_fqs(dev, &priv->dpaa_fq_list, &port_fqs); if (err < 0) { dev_err(dev, "dpaa_alloc_all_fqs() failed\n"); - goto fq_probe_failed; + goto free_dpaa_bps; } priv->mac_dev = mac_dev; @@ -2746,7 +2761,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) if (channel < 0) { dev_err(dev, "dpaa_get_channel() failed\n"); err = channel; - goto get_channel_failed; + goto free_dpaa_bps; } priv->channel = (u16)channel; @@ -2766,20 +2781,20 @@ static int dpaa_eth_probe(struct platform_device *pdev) err = dpaa_eth_cgr_init(priv); if (err < 0) { dev_err(dev, "Error initializing CGR\n"); - goto tx_cgr_init_failed; + goto free_dpaa_bps; } err = dpaa_ingress_cgr_init(priv); if (err < 0) { dev_err(dev, "Error initializing ingress CGR\n"); - goto rx_cgr_init_failed; + goto delete_egress_cgr; } /* Add the FQs to the interface, and make them active */ list_for_each_entry_safe(dpaa_fq, tmp, &priv->dpaa_fq_list, list) { err = dpaa_fq_init(dpaa_fq, false); if (err < 0) - goto fq_alloc_failed; + goto free_dpaa_fqs; } priv->tx_headroom = dpaa_get_headroom(&priv->buf_layout[TX]); @@ -2789,7 +2804,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) err = dpaa_eth_init_ports(mac_dev, dpaa_bps, DPAA_BPS_NUM, &port_fqs, &priv->buf_layout[0], dev); if (err) - goto init_ports_failed; + goto free_dpaa_fqs; /* Rx traffic distribution based on keygen hashing defaults to on */ priv->keygen_in_use = true; @@ -2798,11 +2813,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) if (!priv->percpu_priv) { dev_err(dev, "devm_alloc_percpu() failed\n"); err = -ENOMEM; - goto alloc_percpu_failed; - } - for_each_possible_cpu(i) { - percpu_priv = per_cpu_ptr(priv->percpu_priv, i); - memset(percpu_priv, 0, sizeof(*percpu_priv)); + goto free_dpaa_fqs; } priv->num_tc = 1; @@ -2811,11 +2822,11 @@ static int dpaa_eth_probe(struct platform_device *pdev) /* Initialize NAPI */ err = dpaa_napi_add(net_dev); if (err < 0) - goto napi_add_failed; + goto delete_dpaa_napi; err = dpaa_netdev_init(net_dev, &dpaa_ops, tx_timeout); if (err < 0) - goto netdev_init_failed; + goto delete_dpaa_napi; dpaa_eth_sysfs_init(&net_dev->dev); @@ -2824,32 +2835,21 @@ static int dpaa_eth_probe(struct platform_device *pdev) return 0; -netdev_init_failed: -napi_add_failed: +delete_dpaa_napi: dpaa_napi_del(net_dev); -alloc_percpu_failed: -init_ports_failed: +free_dpaa_fqs: dpaa_fq_free(dev, &priv->dpaa_fq_list); -fq_alloc_failed: qman_delete_cgr_safe(&priv->ingress_cgr); qman_release_cgrid(priv->ingress_cgr.cgrid); -rx_cgr_init_failed: +delete_egress_cgr: qman_delete_cgr_safe(&priv->cgr_data.cgr); qman_release_cgrid(priv->cgr_data.cgr.cgrid); -tx_cgr_init_failed: -get_channel_failed: +free_dpaa_bps: dpaa_bps_free(priv); -bp_create_failed: -fq_probe_failed: -dev_mask_failed: -mac_probe_failed: +free_netdev: dev_set_drvdata(dev, NULL); free_netdev(net_dev); -alloc_etherdev_mq_failed: - for (i = 0; i < DPAA_BPS_NUM && dpaa_bps[i]; i++) { - if (atomic_read(&dpaa_bps[i]->refs) == 0) - devm_kfree(dev, dpaa_bps[i]); - } + return err; } diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 44720f83af27..5385074b3b7d 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -583,12 +583,11 @@ struct fec_enet_private { u64 ethtool_stats[0]; }; -void fec_ptp_init(struct platform_device *pdev); +void fec_ptp_init(struct platform_device *pdev, int irq_idx); void fec_ptp_stop(struct platform_device *pdev); void fec_ptp_start_cyclecounter(struct net_device *ndev); int fec_ptp_set(struct net_device *ndev, struct ifreq *ifr); int fec_ptp_get(struct net_device *ndev, struct ifreq *ifr); -uint fec_ptp_check_pps_event(struct fec_enet_private *fep); /****************************************************************************/ #endif /* FEC_H */ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 3dc2d771a222..610573855213 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1602,10 +1602,6 @@ fec_enet_interrupt(int irq, void *dev_id) ret = IRQ_HANDLED; complete(&fep->mdio_done); } - - if (fep->ptp_clock) - if (fec_ptp_check_pps_event(fep)) - ret = IRQ_HANDLED; return ret; } @@ -3312,6 +3308,19 @@ fec_enet_get_queue_num(struct platform_device *pdev, int *num_tx, int *num_rx) } +static int fec_enet_get_irq_cnt(struct platform_device *pdev) +{ + int irq_cnt = platform_irq_count(pdev); + + if (irq_cnt > FEC_IRQ_NUM) + irq_cnt = FEC_IRQ_NUM; /* last for pps */ + else if (irq_cnt == 2) + irq_cnt = 1; /* last for pps */ + else if (irq_cnt <= 0) + irq_cnt = 1; /* At least 1 irq is needed */ + return irq_cnt; +} + static int fec_probe(struct platform_device *pdev) { @@ -3325,6 +3334,8 @@ fec_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node, *phy_node; int num_tx_qs; int num_rx_qs; + char irq_name[8]; + int irq_cnt; fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs); @@ -3465,18 +3476,20 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_reset; + irq_cnt = fec_enet_get_irq_cnt(pdev); if (fep->bufdesc_ex) - fec_ptp_init(pdev); + fec_ptp_init(pdev, irq_cnt); ret = fec_enet_init(ndev); if (ret) goto failed_init; - for (i = 0; i < FEC_IRQ_NUM; i++) { - irq = platform_get_irq(pdev, i); + for (i = 0; i < irq_cnt; i++) { + sprintf(irq_name, "int%d", i); + irq = platform_get_irq_byname(pdev, irq_name); + if (irq < 0) + irq = platform_get_irq(pdev, i); if (irq < 0) { - if (i) - break; ret = irq; goto failed_irq; } diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 6ebad3fac81d..f81439796ac7 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -549,6 +549,37 @@ static void fec_time_keep(struct work_struct *work) schedule_delayed_work(&fep->time_keep, HZ); } +/* This function checks the pps event and reloads the timer compare counter. */ +static irqreturn_t fec_pps_interrupt(int irq, void *dev_id) +{ + struct net_device *ndev = dev_id; + struct fec_enet_private *fep = netdev_priv(ndev); + u32 val; + u8 channel = fep->pps_channel; + struct ptp_clock_event event; + + val = readl(fep->hwp + FEC_TCSR(channel)); + if (val & FEC_T_TF_MASK) { + /* Write the next next compare(not the next according the spec) + * value to the register + */ + writel(fep->next_counter, fep->hwp + FEC_TCCR(channel)); + do { + writel(val, fep->hwp + FEC_TCSR(channel)); + } while (readl(fep->hwp + FEC_TCSR(channel)) & FEC_T_TF_MASK); + + /* Update the counter; */ + fep->next_counter = (fep->next_counter + fep->reload_period) & + fep->cc.mask; + + event.type = PTP_CLOCK_PPS; + ptp_clock_event(fep->ptp_clock, &event); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + /** * fec_ptp_init * @ndev: The FEC network adapter @@ -558,10 +589,12 @@ static void fec_time_keep(struct work_struct *work) * cyclecounter init routine and exits. */ -void fec_ptp_init(struct platform_device *pdev) +void fec_ptp_init(struct platform_device *pdev, int irq_idx) { struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); + int irq; + int ret; fep->ptp_caps.owner = THIS_MODULE; snprintf(fep->ptp_caps.name, 16, "fec ptp"); @@ -587,6 +620,20 @@ void fec_ptp_init(struct platform_device *pdev) INIT_DELAYED_WORK(&fep->time_keep, fec_time_keep); + irq = platform_get_irq_byname(pdev, "pps"); + if (irq < 0) + irq = platform_get_irq(pdev, irq_idx); + /* Failure to get an irq is not fatal, + * only the PTP_CLOCK_PPS clock events should stop + */ + if (irq >= 0) { + ret = devm_request_irq(&pdev->dev, irq, fec_pps_interrupt, + 0, pdev->name, ndev); + if (ret < 0) + dev_warn(&pdev->dev, "request for pps irq failed(%d)\n", + ret); + } + fep->ptp_clock = ptp_clock_register(&fep->ptp_caps, &pdev->dev); if (IS_ERR(fep->ptp_clock)) { fep->ptp_clock = NULL; @@ -605,36 +652,3 @@ void fec_ptp_stop(struct platform_device *pdev) if (fep->ptp_clock) ptp_clock_unregister(fep->ptp_clock); } - -/** - * fec_ptp_check_pps_event - * @fep: the fec_enet_private structure handle - * - * This function check the pps event and reload the timer compare counter. - */ -uint fec_ptp_check_pps_event(struct fec_enet_private *fep) -{ - u32 val; - u8 channel = fep->pps_channel; - struct ptp_clock_event event; - - val = readl(fep->hwp + FEC_TCSR(channel)); - if (val & FEC_T_TF_MASK) { - /* Write the next next compare(not the next according the spec) - * value to the register - */ - writel(fep->next_counter, fep->hwp + FEC_TCCR(channel)); - do { - writel(val, fep->hwp + FEC_TCSR(channel)); - } while (readl(fep->hwp + FEC_TCSR(channel)) & FEC_T_TF_MASK); - - /* Update the counter; */ - fep->next_counter = (fep->next_counter + fep->reload_period) & fep->cc.mask; - - event.type = PTP_CLOCK_PPS; - ptp_clock_event(fep->ptp_clock, &event); - return 1; - } - - return 0; -} diff --git a/drivers/net/ethernet/freescale/fman/Makefile b/drivers/net/ethernet/freescale/fman/Makefile index f83a3653b63b..b618091db091 100644 --- a/drivers/net/ethernet/freescale/fman/Makefile +++ b/drivers/net/ethernet/freescale/fman/Makefile @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 subdir-ccflags-y += -I$(srctree)/drivers/net/ethernet/freescale/fman -obj-$(CONFIG_FSL_FMAN) += fsl_fman.o -obj-$(CONFIG_FSL_FMAN) += fsl_fman_port.o -obj-$(CONFIG_FSL_FMAN) += fsl_mac.o +obj-$(CONFIG_FSL_FMAN) += fsl_dpaa_fman.o +obj-$(CONFIG_FSL_FMAN) += fsl_dpaa_fman_port.o +obj-$(CONFIG_FSL_FMAN) += fsl_dpaa_mac.o -fsl_fman-objs := fman_muram.o fman.o fman_sp.o fman_keygen.o -fsl_fman_port-objs := fman_port.o -fsl_mac-objs:= mac.o fman_dtsec.o fman_memac.o fman_tgec.o +fsl_dpaa_fman-objs := fman_muram.o fman.o fman_sp.o fman_keygen.o +fsl_dpaa_fman_port-objs := fman_port.o +fsl_dpaa_mac-objs:= mac.o fman_dtsec.o fman_memac.o fman_tgec.o diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index 1789b206be58..6552d68ea6e1 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1339,8 +1339,10 @@ int fman_port_config(struct fman_port *port, struct fman_port_params *params) switch (port->port_type) { case FMAN_PORT_TYPE_RX: set_rx_dflt_cfg(port, params); + /* fall through */ case FMAN_PORT_TYPE_TX: set_tx_dflt_cfg(port, params, &port->dts_params); + /* fall through */ default: set_dflt_cfg(port, params); } diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 387eb4a88b72..88c0a0636b44 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -57,9 +57,7 @@ struct mac_priv_s { struct device *dev; void __iomem *vaddr; u8 cell_index; - phy_interface_t phy_if; struct fman *fman; - struct device_node *phy_node; struct device_node *internal_phy_node; /* List of multicast addresses */ struct list_head mc_addr_list; @@ -106,7 +104,7 @@ static void set_fman_mac_params(struct mac_device *mac_dev, resource_size(mac_dev->res)); memcpy(¶ms->addr, mac_dev->addr, sizeof(mac_dev->addr)); params->max_speed = priv->max_speed; - params->phy_if = priv->phy_if; + params->phy_if = mac_dev->phy_if; params->basex_if = false; params->mac_id = priv->cell_index; params->fm = (void *)priv->fman; @@ -419,15 +417,12 @@ void fman_get_pause_cfg(struct mac_device *mac_dev, bool *rx_pause, } EXPORT_SYMBOL(fman_get_pause_cfg); -static void adjust_link_void(struct net_device *net_dev) +static void adjust_link_void(struct mac_device *mac_dev) { } -static void adjust_link_dtsec(struct net_device *net_dev) +static void adjust_link_dtsec(struct mac_device *mac_dev) { - struct device *dev = net_dev->dev.parent; - struct dpaa_eth_data *eth_data = dev->platform_data; - struct mac_device *mac_dev = eth_data->mac_dev; struct phy_device *phy_dev = mac_dev->phy_dev; struct fman_mac *fman_mac; bool rx_pause, tx_pause; @@ -444,14 +439,12 @@ static void adjust_link_dtsec(struct net_device *net_dev) fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause); err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause); if (err < 0) - netdev_err(net_dev, "fman_set_mac_active_pause() = %d\n", err); + dev_err(mac_dev->priv->dev, "fman_set_mac_active_pause() = %d\n", + err); } -static void adjust_link_memac(struct net_device *net_dev) +static void adjust_link_memac(struct mac_device *mac_dev) { - struct device *dev = net_dev->dev.parent; - struct dpaa_eth_data *eth_data = dev->platform_data; - struct mac_device *mac_dev = eth_data->mac_dev; struct phy_device *phy_dev = mac_dev->phy_dev; struct fman_mac *fman_mac; bool rx_pause, tx_pause; @@ -463,60 +456,12 @@ static void adjust_link_memac(struct net_device *net_dev) fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause); err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause); if (err < 0) - netdev_err(net_dev, "fman_set_mac_active_pause() = %d\n", err); -} - -/* Initializes driver's PHY state, and attaches to the PHY. - * Returns 0 on success. - */ -static struct phy_device *init_phy(struct net_device *net_dev, - struct mac_device *mac_dev, - void (*adj_lnk)(struct net_device *)) -{ - struct phy_device *phy_dev; - struct mac_priv_s *priv = mac_dev->priv; - - phy_dev = of_phy_connect(net_dev, priv->phy_node, adj_lnk, 0, - priv->phy_if); - if (!phy_dev) { - netdev_err(net_dev, "Could not connect to PHY\n"); - return NULL; - } - - /* Remove any features not supported by the controller */ - phy_dev->supported &= mac_dev->if_support; - /* Enable the symmetric and asymmetric PAUSE frame advertisements, - * as most of the PHY drivers do not enable them by default. - */ - phy_dev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause); - phy_dev->advertising = phy_dev->supported; - - mac_dev->phy_dev = phy_dev; - - return phy_dev; -} - -static struct phy_device *dtsec_init_phy(struct net_device *net_dev, - struct mac_device *mac_dev) -{ - return init_phy(net_dev, mac_dev, &adjust_link_dtsec); -} - -static struct phy_device *tgec_init_phy(struct net_device *net_dev, - struct mac_device *mac_dev) -{ - return init_phy(net_dev, mac_dev, adjust_link_void); -} - -static struct phy_device *memac_init_phy(struct net_device *net_dev, - struct mac_device *mac_dev) -{ - return init_phy(net_dev, mac_dev, &adjust_link_memac); + dev_err(mac_dev->priv->dev, "fman_set_mac_active_pause() = %d\n", + err); } static void setup_dtsec(struct mac_device *mac_dev) { - mac_dev->init_phy = dtsec_init_phy; mac_dev->init = dtsec_initialization; mac_dev->set_promisc = dtsec_set_promiscuous; mac_dev->change_addr = dtsec_modify_mac_address; @@ -528,14 +473,13 @@ static void setup_dtsec(struct mac_device *mac_dev) mac_dev->set_multi = set_multi; mac_dev->start = start; mac_dev->stop = stop; - + mac_dev->adjust_link = adjust_link_dtsec; mac_dev->priv->enable = dtsec_enable; mac_dev->priv->disable = dtsec_disable; } static void setup_tgec(struct mac_device *mac_dev) { - mac_dev->init_phy = tgec_init_phy; mac_dev->init = tgec_initialization; mac_dev->set_promisc = tgec_set_promiscuous; mac_dev->change_addr = tgec_modify_mac_address; @@ -547,14 +491,13 @@ static void setup_tgec(struct mac_device *mac_dev) mac_dev->set_multi = set_multi; mac_dev->start = start; mac_dev->stop = stop; - + mac_dev->adjust_link = adjust_link_void; mac_dev->priv->enable = tgec_enable; mac_dev->priv->disable = tgec_disable; } static void setup_memac(struct mac_device *mac_dev) { - mac_dev->init_phy = memac_init_phy; mac_dev->init = memac_initialization; mac_dev->set_promisc = memac_set_promiscuous; mac_dev->change_addr = memac_modify_mac_address; @@ -566,7 +509,7 @@ static void setup_memac(struct mac_device *mac_dev) mac_dev->set_multi = set_multi; mac_dev->start = start; mac_dev->stop = stop; - + mac_dev->adjust_link = adjust_link_memac; mac_dev->priv->enable = memac_enable; mac_dev->priv->disable = memac_disable; } @@ -599,8 +542,7 @@ static const u16 phy2speed[] = { }; static struct platform_device *dpaa_eth_add_device(int fman_id, - struct mac_device *mac_dev, - struct device_node *node) + struct mac_device *mac_dev) { struct platform_device *pdev; struct dpaa_eth_data data; @@ -613,17 +555,14 @@ static struct platform_device *dpaa_eth_add_device(int fman_id, data.mac_dev = mac_dev; data.mac_hw_id = priv->cell_index; data.fman_hw_id = fman_id; - data.mac_node = node; mutex_lock(ð_lock); - pdev = platform_device_alloc("dpaa-ethernet", dpaa_eth_dev_cnt); if (!pdev) { ret = -ENOMEM; goto no_mem; } - pdev->dev.of_node = node; pdev->dev.parent = priv->dev; set_dma_ops(&pdev->dev, get_dma_ops(priv->dev)); @@ -676,7 +615,6 @@ static int mac_probe(struct platform_device *_of_dev) mac_dev = devm_kzalloc(dev, sizeof(*mac_dev), GFP_KERNEL); if (!mac_dev) { err = -ENOMEM; - dev_err(dev, "devm_kzalloc() = %d\n", err); goto _return; } priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); @@ -706,9 +644,6 @@ static int mac_probe(struct platform_device *_of_dev) goto _return; } - /* Register mac_dev */ - dev_set_drvdata(dev, mac_dev); - INIT_LIST_HEAD(&priv->mc_addr_list); /* Get the FM node */ @@ -717,7 +652,7 @@ static int mac_probe(struct platform_device *_of_dev) dev_err(dev, "of_get_parent(%pOF) failed\n", mac_node); err = -EINVAL; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } of_dev = of_find_device_by_node(dev_node); @@ -751,7 +686,7 @@ static int mac_probe(struct platform_device *_of_dev) if (err < 0) { dev_err(dev, "of_address_to_resource(%pOF) = %d\n", mac_node, err); - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } mac_dev->res = __devm_request_region(dev, @@ -761,7 +696,7 @@ static int mac_probe(struct platform_device *_of_dev) if (!mac_dev->res) { dev_err(dev, "__devm_request_mem_region(mac) failed\n"); err = -EBUSY; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } priv->vaddr = devm_ioremap(dev, mac_dev->res->start, @@ -769,16 +704,12 @@ static int mac_probe(struct platform_device *_of_dev) if (!priv->vaddr) { dev_err(dev, "devm_ioremap() failed\n"); err = -EIO; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } if (!of_device_is_available(mac_node)) { - devm_iounmap(dev, priv->vaddr); - __devm_release_region(dev, fman_get_mem_region(priv->fman), - res.start, res.end + 1 - res.start); - devm_kfree(dev, mac_dev); - dev_set_drvdata(dev, NULL); - return -ENODEV; + err = -ENODEV; + goto _return_of_get_parent; } /* Get the cell-index */ @@ -786,7 +717,7 @@ static int mac_probe(struct platform_device *_of_dev) if (err) { dev_err(dev, "failed to read cell-index for %pOF\n", mac_node); err = -EINVAL; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } priv->cell_index = (u8)val; @@ -795,7 +726,7 @@ static int mac_probe(struct platform_device *_of_dev) if (!mac_addr) { dev_err(dev, "of_get_mac_address(%pOF) failed\n", mac_node); err = -EINVAL; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } memcpy(mac_dev->addr, mac_addr, sizeof(mac_dev->addr)); @@ -805,14 +736,14 @@ static int mac_probe(struct platform_device *_of_dev) dev_err(dev, "of_count_phandle_with_args(%pOF, fsl,fman-ports) failed\n", mac_node); err = nph; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } if (nph != ARRAY_SIZE(mac_dev->port)) { dev_err(dev, "Not supported number of fman-ports handles of mac node %pOF from device tree\n", mac_node); err = -EINVAL; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) { @@ -851,13 +782,13 @@ static int mac_probe(struct platform_device *_of_dev) mac_node); phy_if = PHY_INTERFACE_MODE_SGMII; } - priv->phy_if = phy_if; + mac_dev->phy_if = phy_if; - priv->speed = phy2speed[priv->phy_if]; + priv->speed = phy2speed[mac_dev->phy_if]; priv->max_speed = priv->speed; mac_dev->if_support = DTSEC_SUPPORTED; /* We don't support half-duplex in SGMII mode */ - if (priv->phy_if == PHY_INTERFACE_MODE_SGMII) + if (mac_dev->phy_if == PHY_INTERFACE_MODE_SGMII) mac_dev->if_support &= ~(SUPPORTED_10baseT_Half | SUPPORTED_100baseT_Half); @@ -866,30 +797,31 @@ static int mac_probe(struct platform_device *_of_dev) mac_dev->if_support |= SUPPORTED_1000baseT_Full; /* The 10G interface only supports one mode */ - if (priv->phy_if == PHY_INTERFACE_MODE_XGMII) + if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII) mac_dev->if_support = SUPPORTED_10000baseT_Full; /* Get the rest of the PHY information */ - priv->phy_node = of_parse_phandle(mac_node, "phy-handle", 0); - if (!priv->phy_node && of_phy_is_fixed_link(mac_node)) { + mac_dev->phy_node = of_parse_phandle(mac_node, "phy-handle", 0); + if (!mac_dev->phy_node && of_phy_is_fixed_link(mac_node)) { struct phy_device *phy; err = of_phy_register_fixed_link(mac_node); if (err) - goto _return_dev_set_drvdata; + goto _return_of_get_parent; priv->fixed_link = kzalloc(sizeof(*priv->fixed_link), GFP_KERNEL); if (!priv->fixed_link) { err = -ENOMEM; - goto _return_dev_set_drvdata; + goto _return_of_get_parent; } - priv->phy_node = of_node_get(mac_node); - phy = of_phy_find_device(priv->phy_node); + mac_dev->phy_node = of_node_get(mac_node); + phy = of_phy_find_device(mac_dev->phy_node); if (!phy) { err = -EINVAL; - goto _return_dev_set_drvdata; + of_node_put(mac_dev->phy_node); + goto _return_of_get_parent; } priv->fixed_link->link = phy->link; @@ -904,8 +836,8 @@ static int mac_probe(struct platform_device *_of_dev) err = mac_dev->init(mac_dev); if (err < 0) { dev_err(dev, "mac_dev->init() = %d\n", err); - of_node_put(priv->phy_node); - goto _return_dev_set_drvdata; + of_node_put(mac_dev->phy_node); + goto _return_of_get_parent; } /* pause frame autonegotiation enabled */ @@ -926,7 +858,7 @@ static int mac_probe(struct platform_device *_of_dev) mac_dev->addr[0], mac_dev->addr[1], mac_dev->addr[2], mac_dev->addr[3], mac_dev->addr[4], mac_dev->addr[5]); - priv->eth_dev = dpaa_eth_add_device(fman_id, mac_dev, mac_node); + priv->eth_dev = dpaa_eth_add_device(fman_id, mac_dev); if (IS_ERR(priv->eth_dev)) { dev_err(dev, "failed to add Ethernet platform device for MAC %d\n", priv->cell_index); @@ -937,9 +869,8 @@ static int mac_probe(struct platform_device *_of_dev) _return_of_node_put: of_node_put(dev_node); -_return_dev_set_drvdata: +_return_of_get_parent: kfree(priv->fixed_link); - dev_set_drvdata(dev, NULL); _return: return err; } diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h index d7313f0c5135..eefb3357e304 100644 --- a/drivers/net/ethernet/freescale/fman/mac.h +++ b/drivers/net/ethernet/freescale/fman/mac.h @@ -50,6 +50,8 @@ struct mac_device { struct fman_port *port[2]; u32 if_support; struct phy_device *phy_dev; + phy_interface_t phy_if; + struct device_node *phy_node; bool autoneg_pause; bool rx_pause_req; @@ -58,11 +60,10 @@ struct mac_device { bool tx_pause_active; bool promisc; - struct phy_device *(*init_phy)(struct net_device *net_dev, - struct mac_device *mac_dev); int (*init)(struct mac_device *mac_dev); int (*start)(struct mac_device *mac_dev); int (*stop)(struct mac_device *mac_dev); + void (*adjust_link)(struct mac_device *mac_dev); int (*set_promisc)(struct fman_mac *mac_dev, bool enable); int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr); int (*set_multi)(struct net_device *net_dev, @@ -82,7 +83,6 @@ struct mac_device { }; struct dpaa_eth_data { - struct device_node *mac_node; struct mac_device *mac_dev; int mac_hw_id; int fman_hw_id; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 753259091b22..7892f2f0c6b5 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -1023,8 +1023,6 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->ethtool_ops = &fs_ethtool_ops; - init_timer(&fep->phy_timer_list); - netif_carrier_off(ndev); ndev->features |= NETIF_F_SG; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h index 168e10ea487f..92e06b37a199 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h @@ -138,7 +138,6 @@ struct fs_enet_private { cbd_t __iomem *cur_rx; cbd_t __iomem *cur_tx; int tx_free; - struct timer_list phy_timer_list; const struct phy_info *phy; u32 msg_enable; struct mii_if_info mii_if; diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index f77ba9fa257b..a96b838cffce 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3857,8 +3857,9 @@ static int ucc_geth_probe(struct platform_device* ofdev) } if (netif_msg_probe(&debug)) - pr_info("UCC%1d at 0x%8x (irq = %d)\n", - ug_info->uf_info.ucc_num + 1, ug_info->uf_info.regs, + pr_info("UCC%1d at 0x%8llx (irq = %d)\n", + ug_info->uf_info.ucc_num + 1, + (u64)ug_info->uf_info.regs, ug_info->uf_info.irq); /* Create an ethernet device instance */ diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index 91c7bdb9b43c..30000b6aa7b8 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -78,7 +78,7 @@ config HNS_ENET config HNS3 tristate "Hisilicon Network Subsystem Support HNS3 (Framework)" - depends on PCI + depends on PCI ---help--- This selects the framework support for Hisilicon Network Subsystem 3. This layer facilitates clients like ENET, RoCE and user-space ethernet @@ -87,7 +87,7 @@ config HNS3 config HNS3_HCLGE tristate "Hisilicon HNS3 HCLGE Acceleration Engine & Compatibility Layer Support" - depends on PCI_MSI + depends on PCI_MSI depends on HNS3 ---help--- This selects the HNS3_HCLGE network acceleration engine & its hardware @@ -96,11 +96,20 @@ config HNS3_HCLGE config HNS3_ENET tristate "Hisilicon HNS3 Ethernet Device Support" - depends on 64BIT && PCI + depends on 64BIT && PCI depends on HNS3 && HNS3_HCLGE ---help--- This selects the Ethernet Driver for Hisilicon Network Subsystem 3 for hip08 family of SoCs. This module depends upon HNAE3 driver to access the HNAE3 devices and their associated operations. +config HNS3_DCB + bool "Hisilicon HNS3 Data Center Bridge Support" + default n + depends on HNS3 && HNS3_HCLGE && DCB + ---help--- + Say Y here if you want to use Data Center Bridging (DCB) in the HNS3 driver. + + If unsure, say N. + endif # NET_VENDOR_HISILICON diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index e77192683dba..1ccb6443d2ed 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -2159,9 +2159,9 @@ static void hns_nic_task_schedule(struct hns_nic_priv *priv) (void)schedule_work(&priv->service_task); } -static void hns_nic_service_timer(unsigned long data) +static void hns_nic_service_timer(struct timer_list *t) { - struct hns_nic_priv *priv = (struct hns_nic_priv *)data; + struct hns_nic_priv *priv = from_timer(priv, t, service_timer); (void)mod_timer(&priv->service_timer, jiffies + SERVICE_TIMER_HZ); @@ -2451,8 +2451,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev) /* carrier off reporting is important to ethtool even BEFORE open */ netif_carrier_off(ndev); - setup_timer(&priv->service_timer, hns_nic_service_timer, - (unsigned long)priv); + timer_setup(&priv->service_timer, hns_nic_service_timer, 0); INIT_WORK(&priv->service_task, hns_nic_service_task); set_bit(NIC_STATE_SERVICE_INITED, &priv->state); diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 1a01cadfe5f3..67c59e1039f2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -28,6 +28,7 @@ */ #include <linux/acpi.h> +#include <linux/dcbnl.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/module.h> @@ -109,6 +110,21 @@ enum hnae3_media_type { HNAE3_MEDIA_TYPE_BACKPLANE, }; +enum hnae3_reset_notify_type { + HNAE3_UP_CLIENT, + HNAE3_DOWN_CLIENT, + HNAE3_INIT_CLIENT, + HNAE3_UNINIT_CLIENT, +}; + +enum hnae3_reset_type { + HNAE3_FUNC_RESET, + HNAE3_CORE_RESET, + HNAE3_GLOBAL_RESET, + HNAE3_IMP_RESET, + HNAE3_NONE_RESET, +}; + struct hnae3_vector_info { u8 __iomem *io_addr; int vector; @@ -131,6 +147,9 @@ struct hnae3_client_ops { int (*init_instance)(struct hnae3_handle *handle); void (*uninit_instance)(struct hnae3_handle *handle, bool reset); void (*link_status_change)(struct hnae3_handle *handle, bool state); + int (*setup_tc)(struct hnae3_handle *handle, u8 tc); + int (*reset_notify)(struct hnae3_handle *handle, + enum hnae3_reset_notify_type type); }; #define HNAE3_CLIENT_NAME_LENGTH 16 @@ -337,6 +356,10 @@ struct hnae3_ae_ops { u8 *hfunc); int (*set_rss)(struct hnae3_handle *handle, const u32 *indir, const u8 *key, const u8 hfunc); + int (*set_rss_tuple)(struct hnae3_handle *handle, + struct ethtool_rxnfc *cmd); + int (*get_rss_tuple)(struct hnae3_handle *handle, + struct ethtool_rxnfc *cmd); int (*get_tc_size)(struct hnae3_handle *handle); @@ -361,6 +384,23 @@ struct hnae3_ae_ops { u16 vlan_id, bool is_kill); int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid, u16 vlan, u8 qos, __be16 proto); + void (*reset_event)(struct hnae3_handle *handle, + enum hnae3_reset_type reset); +}; + +struct hnae3_dcb_ops { + /* IEEE 802.1Qaz std */ + int (*ieee_getets)(struct hnae3_handle *, struct ieee_ets *); + int (*ieee_setets)(struct hnae3_handle *, struct ieee_ets *); + int (*ieee_getpfc)(struct hnae3_handle *, struct ieee_pfc *); + int (*ieee_setpfc)(struct hnae3_handle *, struct ieee_pfc *); + + /* DCBX configuration */ + u8 (*getdcbx)(struct hnae3_handle *); + u8 (*setdcbx)(struct hnae3_handle *, u8); + + int (*map_update)(struct hnae3_handle *); + int (*setup_tc)(struct hnae3_handle *, u8, u8 *); }; struct hnae3_ae_algo { @@ -394,6 +434,7 @@ struct hnae3_knic_private_info { u16 num_tqps; /* total number of TQPs in this handle */ struct hnae3_queue **tqp; /* array base of all TQPs in this instance */ + const struct hnae3_dcb_ops *dcb_ops; }; struct hnae3_roce_private_info { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile index 162e8a42acd0..d2b20d01a58c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile @@ -7,5 +7,9 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_HNS3_HCLGE) += hclge.o hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o +hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o + obj-$(CONFIG_HNS3_ENET) += hns3.o hns3-objs = hns3_enet.o hns3_ethtool.o + +hns3-$(CONFIG_HNS3_DCB) += hns3_dcbnl.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 8b511e6e0ce9..ff13d1876d9e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -62,7 +62,7 @@ static void hclge_free_cmd_desc(struct hclge_cmq_ring *ring) ring->desc = NULL; } -static int hclge_init_cmd_queue(struct hclge_dev *hdev, int ring_type) +static int hclge_alloc_cmd_queue(struct hclge_dev *hdev, int ring_type) { struct hclge_hw *hw = &hdev->hw; struct hclge_cmq_ring *ring = @@ -79,12 +79,18 @@ static int hclge_init_cmd_queue(struct hclge_dev *hdev, int ring_type) return ret; } - ring->next_to_clean = 0; - ring->next_to_use = 0; - return 0; } +void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read) +{ + desc->flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN); + if (is_read) + desc->flag |= cpu_to_le16(HCLGE_CMD_FLAG_WR); + else + desc->flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR); +} + void hclge_cmd_setup_basic_desc(struct hclge_desc *desc, enum hclge_opcode_type opcode, bool is_read) { @@ -208,7 +214,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) * which will be use for hardware to write back */ ntc = hw->cmq.csq.next_to_use; - opcode = desc[0].opcode; + opcode = le16_to_cpu(desc[0].opcode); while (handle < num) { desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use]; *desc_to_use = desc[handle]; @@ -225,7 +231,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) * If the command is sync, wait for the firmware to write back, * if multi descriptors to be sent, use the first one to check */ - if (HCLGE_SEND_SYNC(desc->flag)) { + if (HCLGE_SEND_SYNC(le16_to_cpu(desc->flag))) { do { if (hclge_cmd_csq_done(hw)) break; @@ -244,9 +250,9 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) pr_debug("Get cmd desc:\n"); if (likely(!hclge_is_special_opcode(opcode))) - desc_ret = desc[handle].retval; + desc_ret = le16_to_cpu(desc[handle].retval); else - desc_ret = desc[0].retval; + desc_ret = le16_to_cpu(desc[0].retval); if ((enum hclge_cmd_return_status)desc_ret == HCLGE_CMD_EXEC_SUCCESS) @@ -276,15 +282,15 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) return retval; } -enum hclge_cmd_status hclge_cmd_query_firmware_version(struct hclge_hw *hw, - u32 *version) +static enum hclge_cmd_status hclge_cmd_query_firmware_version( + struct hclge_hw *hw, u32 *version) { - struct hclge_query_version *resp; + struct hclge_query_version_cmd *resp; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FW_VER, 1); - resp = (struct hclge_query_version *)desc.data; + resp = (struct hclge_query_version_cmd *)desc.data; ret = hclge_cmd_send(hw, &desc, 1); if (!ret) @@ -293,37 +299,52 @@ enum hclge_cmd_status hclge_cmd_query_firmware_version(struct hclge_hw *hw, return ret; } -int hclge_cmd_init(struct hclge_dev *hdev) +int hclge_cmd_queue_init(struct hclge_dev *hdev) { - u32 version; int ret; /* Setup the queue entries for use cmd queue */ hdev->hw.cmq.csq.desc_num = HCLGE_NIC_CMQ_DESC_NUM; hdev->hw.cmq.crq.desc_num = HCLGE_NIC_CMQ_DESC_NUM; - /* Setup the lock for command queue */ - spin_lock_init(&hdev->hw.cmq.csq.lock); - spin_lock_init(&hdev->hw.cmq.crq.lock); - /* Setup Tx write back timeout */ hdev->hw.cmq.tx_timeout = HCLGE_CMDQ_TX_TIMEOUT; /* Setup queue rings */ - ret = hclge_init_cmd_queue(hdev, HCLGE_TYPE_CSQ); + ret = hclge_alloc_cmd_queue(hdev, HCLGE_TYPE_CSQ); if (ret) { dev_err(&hdev->pdev->dev, "CSQ ring setup error %d\n", ret); return ret; } - ret = hclge_init_cmd_queue(hdev, HCLGE_TYPE_CRQ); + ret = hclge_alloc_cmd_queue(hdev, HCLGE_TYPE_CRQ); if (ret) { dev_err(&hdev->pdev->dev, "CRQ ring setup error %d\n", ret); goto err_csq; } + return 0; +err_csq: + hclge_free_cmd_desc(&hdev->hw.cmq.csq); + return ret; +} + +int hclge_cmd_init(struct hclge_dev *hdev) +{ + u32 version; + int ret; + + hdev->hw.cmq.csq.next_to_clean = 0; + hdev->hw.cmq.csq.next_to_use = 0; + hdev->hw.cmq.crq.next_to_clean = 0; + hdev->hw.cmq.crq.next_to_use = 0; + + /* Setup the lock for command queue */ + spin_lock_init(&hdev->hw.cmq.csq.lock); + spin_lock_init(&hdev->hw.cmq.crq.lock); + hclge_cmd_init_regs(&hdev->hw); ret = hclge_cmd_query_firmware_version(&hdev->hw, &version); @@ -337,9 +358,6 @@ int hclge_cmd_init(struct hclge_dev *hdev) dev_info(&hdev->pdev->dev, "The firmware version is %08x\n", version); return 0; -err_csq: - hclge_free_cmd_desc(&hdev->hw.cmq.csq); - return ret; } static void hclge_destroy_queue(struct hclge_cmq_ring *ring) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 758cf3948131..ce5ed8845042 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -63,6 +63,11 @@ enum hclge_cmd_status { HCLGE_ERR_CSQ_ERROR = -3, }; +struct hclge_misc_vector { + u8 __iomem *addr; + int vector_irq; +}; + struct hclge_cmq { struct hclge_cmq_ring csq; struct hclge_cmq_ring crq; @@ -221,12 +226,12 @@ enum hclge_opcode_type { #define HCLGE_RCB_INIT_QUERY_TIMEOUT 10 #define HCLGE_RCB_INIT_FLAG_EN_B 0 #define HCLGE_RCB_INIT_FLAG_FINI_B 8 -struct hclge_config_rcb_init { +struct hclge_config_rcb_init_cmd { __le16 rcb_init_flag; u8 rsv[22]; }; -struct hclge_tqp_map { +struct hclge_tqp_map_cmd { __le16 tqp_id; /* Absolute tqp id for in this pf */ u8 tqp_vf; /* VF id */ #define HCLGE_TQP_MAP_TYPE_PF 0 @@ -246,15 +251,15 @@ enum hclge_int_type { HCLGE_INT_EVENT, }; -struct hclge_ctrl_vector_chain { +struct hclge_ctrl_vector_chain_cmd { u8 int_vector_id; u8 int_cause_num; #define HCLGE_INT_TYPE_S 0 -#define HCLGE_INT_TYPE_M 0x3 +#define HCLGE_INT_TYPE_M GENMASK(1, 0) #define HCLGE_TQP_ID_S 2 -#define HCLGE_TQP_ID_M (0x7ff << HCLGE_TQP_ID_S) +#define HCLGE_TQP_ID_M GENMASK(12, 2) #define HCLGE_INT_GL_IDX_S 13 -#define HCLGE_INT_GL_IDX_M (0x3 << HCLGE_INT_GL_IDX_S) +#define HCLGE_INT_GL_IDX_M GENMASK(14, 13) __le16 tqp_type_and_id[HCLGE_VECTOR_ELEMENTS_PER_CMD]; u8 vfid; u8 rsv; @@ -263,18 +268,18 @@ struct hclge_ctrl_vector_chain { #define HCLGE_TC_NUM 8 #define HCLGE_TC0_PRI_BUF_EN_B 15 /* Bit 15 indicate enable or not */ #define HCLGE_BUF_UNIT_S 7 /* Buf size is united by 128 bytes */ -struct hclge_tx_buff_alloc { +struct hclge_tx_buff_alloc_cmd { __le16 tx_pkt_buff[HCLGE_TC_NUM]; u8 tx_buff_rsv[8]; }; -struct hclge_rx_priv_buff { +struct hclge_rx_priv_buff_cmd { __le16 buf_num[HCLGE_TC_NUM]; __le16 shared_buf; u8 rsv[6]; }; -struct hclge_query_version { +struct hclge_query_version_cmd { __le32 firmware; __le32 firmware_rsv[5]; }; @@ -311,6 +316,7 @@ struct hclge_tc_thrd { struct hclge_priv_buf { struct hclge_waterline wl; /* Waterline for low and high*/ u32 buf_size; /* TC private buffer size */ + u32 tx_buf_size; u32 enable; /* Enable TC private buffer or not */ }; @@ -321,15 +327,20 @@ struct hclge_shared_buf { u32 buf_size; }; +struct hclge_pkt_buf_alloc { + struct hclge_priv_buf priv_buf[HCLGE_MAX_TC_NUM]; + struct hclge_shared_buf s_buf; +}; + #define HCLGE_RX_COM_WL_EN_B 15 -struct hclge_rx_com_wl_buf { +struct hclge_rx_com_wl_buf_cmd { __le16 high_wl; __le16 low_wl; u8 rsv[20]; }; #define HCLGE_RX_PKT_EN_B 15 -struct hclge_rx_pkt_buf { +struct hclge_rx_pkt_buf_cmd { __le16 high_pkt; __le16 low_pkt; u8 rsv[20]; @@ -342,7 +353,7 @@ struct hclge_rx_pkt_buf { #define HCLGE_PF_MAC_NUM_MASK 0x3 #define HCLGE_PF_STATE_MAIN BIT(HCLGE_PF_STATE_MAIN_B) #define HCLGE_PF_STATE_DONE BIT(HCLGE_PF_STATE_DONE_B) -struct hclge_func_status { +struct hclge_func_status_cmd { __le32 vf_rst_state[4]; u8 pf_state; u8 mac_id; @@ -353,7 +364,7 @@ struct hclge_func_status { u8 rsv[2]; }; -struct hclge_pf_res { +struct hclge_pf_res_cmd { __le16 tqp_num; __le16 buf_size; __le16 msixcap_localid_ba_nic; @@ -366,30 +377,30 @@ struct hclge_pf_res { }; #define HCLGE_CFG_OFFSET_S 0 -#define HCLGE_CFG_OFFSET_M 0xfffff /* Byte (8-10.3) */ +#define HCLGE_CFG_OFFSET_M GENMASK(19, 0) #define HCLGE_CFG_RD_LEN_S 24 -#define HCLGE_CFG_RD_LEN_M (0xf << HCLGE_CFG_RD_LEN_S) +#define HCLGE_CFG_RD_LEN_M GENMASK(27, 24) #define HCLGE_CFG_RD_LEN_BYTES 16 #define HCLGE_CFG_RD_LEN_UNIT 4 #define HCLGE_CFG_VMDQ_S 0 -#define HCLGE_CFG_VMDQ_M (0xff << HCLGE_CFG_VMDQ_S) +#define HCLGE_CFG_VMDQ_M GENMASK(7, 0) #define HCLGE_CFG_TC_NUM_S 8 -#define HCLGE_CFG_TC_NUM_M (0xff << HCLGE_CFG_TC_NUM_S) +#define HCLGE_CFG_TC_NUM_M GENMASK(15, 8) #define HCLGE_CFG_TQP_DESC_N_S 16 -#define HCLGE_CFG_TQP_DESC_N_M (0xffff << HCLGE_CFG_TQP_DESC_N_S) +#define HCLGE_CFG_TQP_DESC_N_M GENMASK(31, 16) #define HCLGE_CFG_PHY_ADDR_S 0 -#define HCLGE_CFG_PHY_ADDR_M (0x1f << HCLGE_CFG_PHY_ADDR_S) +#define HCLGE_CFG_PHY_ADDR_M GENMASK(7, 0) #define HCLGE_CFG_MEDIA_TP_S 8 -#define HCLGE_CFG_MEDIA_TP_M (0xff << HCLGE_CFG_MEDIA_TP_S) +#define HCLGE_CFG_MEDIA_TP_M GENMASK(15, 8) #define HCLGE_CFG_RX_BUF_LEN_S 16 -#define HCLGE_CFG_RX_BUF_LEN_M (0xffff << HCLGE_CFG_RX_BUF_LEN_S) +#define HCLGE_CFG_RX_BUF_LEN_M GENMASK(31, 16) #define HCLGE_CFG_MAC_ADDR_H_S 0 -#define HCLGE_CFG_MAC_ADDR_H_M (0xffff << HCLGE_CFG_MAC_ADDR_H_S) +#define HCLGE_CFG_MAC_ADDR_H_M GENMASK(15, 0) #define HCLGE_CFG_DEFAULT_SPEED_S 16 -#define HCLGE_CFG_DEFAULT_SPEED_M (0xff << HCLGE_CFG_DEFAULT_SPEED_S) +#define HCLGE_CFG_DEFAULT_SPEED_M GENMASK(23, 16) -struct hclge_cfg_param { +struct hclge_cfg_param_cmd { __le32 offset; __le32 rsv; __le32 param[4]; @@ -399,7 +410,7 @@ struct hclge_cfg_param { #define HCLGE_DESC_NUM 0x40 #define HCLGE_ALLOC_VALID_B 0 -struct hclge_vf_num { +struct hclge_vf_num_cmd { u8 alloc_valid; u8 rsv[23]; }; @@ -407,13 +418,13 @@ struct hclge_vf_num { #define HCLGE_RSS_DEFAULT_OUTPORT_B 4 #define HCLGE_RSS_HASH_KEY_OFFSET_B 4 #define HCLGE_RSS_HASH_KEY_NUM 16 -struct hclge_rss_config { +struct hclge_rss_config_cmd { u8 hash_config; u8 rsv[7]; u8 hash_key[HCLGE_RSS_HASH_KEY_NUM]; }; -struct hclge_rss_input_tuple { +struct hclge_rss_input_tuple_cmd { u8 ipv4_tcp_en; u8 ipv4_udp_en; u8 ipv4_sctp_en; @@ -427,26 +438,26 @@ struct hclge_rss_input_tuple { #define HCLGE_RSS_CFG_TBL_SIZE 16 -struct hclge_rss_indirection_table { - u16 start_table_index; - u16 rss_set_bitmap; +struct hclge_rss_indirection_table_cmd { + __le16 start_table_index; + __le16 rss_set_bitmap; u8 rsv[4]; u8 rss_result[HCLGE_RSS_CFG_TBL_SIZE]; }; #define HCLGE_RSS_TC_OFFSET_S 0 -#define HCLGE_RSS_TC_OFFSET_M (0x3ff << HCLGE_RSS_TC_OFFSET_S) +#define HCLGE_RSS_TC_OFFSET_M GENMASK(9, 0) #define HCLGE_RSS_TC_SIZE_S 12 -#define HCLGE_RSS_TC_SIZE_M (0x7 << HCLGE_RSS_TC_SIZE_S) +#define HCLGE_RSS_TC_SIZE_M GENMASK(14, 12) #define HCLGE_RSS_TC_VALID_B 15 -struct hclge_rss_tc_mode { - u16 rss_tc_mode[HCLGE_MAX_TC_NUM]; +struct hclge_rss_tc_mode_cmd { + __le16 rss_tc_mode[HCLGE_MAX_TC_NUM]; u8 rsv[8]; }; #define HCLGE_LINK_STS_B 0 #define HCLGE_LINK_STATUS BIT(HCLGE_LINK_STS_B) -struct hclge_link_status { +struct hclge_link_status_cmd { u8 status; u8 rsv[23]; }; @@ -461,7 +472,7 @@ struct hclge_promisc_param { #define HCLGE_PROMISC_EN_UC 0x1 #define HCLGE_PROMISC_EN_MC 0x2 #define HCLGE_PROMISC_EN_BC 0x4 -struct hclge_promisc_cfg { +struct hclge_promisc_cfg_cmd { u8 flag; u8 vf_id; __le16 rsv0; @@ -489,18 +500,18 @@ enum hclge_promisc_type { #define HCLGE_MAC_TX_UNDER_MIN_ERR_B 21 #define HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B 22 -struct hclge_config_mac_mode { +struct hclge_config_mac_mode_cmd { __le32 txrx_pad_fcs_loop_en; u8 rsv[20]; }; #define HCLGE_CFG_SPEED_S 0 -#define HCLGE_CFG_SPEED_M (0x3f << HCLGE_CFG_SPEED_S) +#define HCLGE_CFG_SPEED_M GENMASK(5, 0) #define HCLGE_CFG_DUPLEX_B 7 #define HCLGE_CFG_DUPLEX_M BIT(HCLGE_CFG_DUPLEX_B) -struct hclge_config_mac_speed_dup { +struct hclge_config_mac_speed_dup_cmd { u8 speed_dup; #define HCLGE_CFG_MAC_SPEED_CHANGE_EN_B 0 @@ -512,17 +523,17 @@ struct hclge_config_mac_speed_dup { #define HCLGE_QUERY_AN_B 0 #define HCLGE_QUERY_DUPLEX_B 2 -#define HCLGE_QUERY_SPEED_M (0x1f << HCLGE_QUERY_SPEED_S) +#define HCLGE_QUERY_SPEED_M GENMASK(4, 0) #define HCLGE_QUERY_AN_M BIT(HCLGE_QUERY_AN_B) #define HCLGE_QUERY_DUPLEX_M BIT(HCLGE_QUERY_DUPLEX_B) -struct hclge_query_an_speed_dup { +struct hclge_query_an_speed_dup_cmd { u8 an_syn_dup_speed; u8 pause; u8 rsv[23]; }; -#define HCLGE_RING_ID_MASK 0x3ff +#define HCLGE_RING_ID_MASK GENMASK(9, 0) #define HCLGE_TQP_ENABLE_B 0 #define HCLGE_MAC_CFG_AN_EN_B 0 @@ -533,7 +544,7 @@ struct hclge_query_an_speed_dup { #define HCLGE_MAC_CFG_AN_EN BIT(HCLGE_MAC_CFG_AN_EN_B) -struct hclge_config_auto_neg { +struct hclge_config_auto_neg_cmd { __le32 cfg_an_cmd_flag; u8 rsv[20]; }; @@ -542,7 +553,7 @@ struct hclge_config_auto_neg { #define HCLGE_MAC_MAX_MTU 9728 #define HCLGE_MAC_UPLINK_PORT 0x100 -struct hclge_config_max_frm_size { +struct hclge_config_max_frm_size_cmd { __le16 max_frm_size; u8 rsv[22]; }; @@ -559,10 +570,10 @@ enum hclge_mac_vlan_tbl_opcode { #define HCLGE_MAC_EPORT_SW_EN_B 0xc #define HCLGE_MAC_EPORT_TYPE_B 0xb #define HCLGE_MAC_EPORT_VFID_S 0x3 -#define HCLGE_MAC_EPORT_VFID_M (0xff << HCLGE_MAC_EPORT_VFID_S) +#define HCLGE_MAC_EPORT_VFID_M GENMASK(10, 3) #define HCLGE_MAC_EPORT_PFID_S 0x0 -#define HCLGE_MAC_EPORT_PFID_M (0x7 << HCLGE_MAC_EPORT_PFID_S) -struct hclge_mac_vlan_tbl_entry { +#define HCLGE_MAC_EPORT_PFID_M GENMASK(2, 0) +struct hclge_mac_vlan_tbl_entry_cmd { u8 flags; u8 resp_code; __le16 vlan_tag; @@ -577,15 +588,15 @@ struct hclge_mac_vlan_tbl_entry { }; #define HCLGE_CFG_MTA_MAC_SEL_S 0x0 -#define HCLGE_CFG_MTA_MAC_SEL_M (0x3 << HCLGE_CFG_MTA_MAC_SEL_S) +#define HCLGE_CFG_MTA_MAC_SEL_M GENMASK(1, 0) #define HCLGE_CFG_MTA_MAC_EN_B 0x7 -struct hclge_mta_filter_mode { +struct hclge_mta_filter_mode_cmd { u8 dmac_sel_en; /* Use lowest 2 bit as sel_mode, bit 7 as enable */ u8 rsv[23]; }; #define HCLGE_CFG_FUNC_MTA_ACCEPT_B 0x0 -struct hclge_cfg_func_mta_filter { +struct hclge_cfg_func_mta_filter_cmd { u8 accept; /* Only used lowest 1 bit */ u8 function_id; u8 rsv[22]; @@ -593,14 +604,14 @@ struct hclge_cfg_func_mta_filter { #define HCLGE_CFG_MTA_ITEM_ACCEPT_B 0x0 #define HCLGE_CFG_MTA_ITEM_IDX_S 0x0 -#define HCLGE_CFG_MTA_ITEM_IDX_M (0xfff << HCLGE_CFG_MTA_ITEM_IDX_S) -struct hclge_cfg_func_mta_item { - u16 item_idx; /* Only used lowest 12 bit */ +#define HCLGE_CFG_MTA_ITEM_IDX_M GENMASK(11, 0) +struct hclge_cfg_func_mta_item_cmd { + __le16 item_idx; /* Only used lowest 12 bit */ u8 accept; /* Only used lowest 1 bit */ u8 rsv[21]; }; -struct hclge_mac_vlan_add { +struct hclge_mac_vlan_add_cmd { __le16 flags; __le16 mac_addr_hi16; __le32 mac_addr_lo32; @@ -613,7 +624,7 @@ struct hclge_mac_vlan_add { }; #define HNS3_MAC_VLAN_CFG_FLAG_BIT 0 -struct hclge_mac_vlan_remove { +struct hclge_mac_vlan_remove_cmd { __le16 flags; __le16 mac_addr_hi16; __le32 mac_addr_lo32; @@ -625,21 +636,21 @@ struct hclge_mac_vlan_remove { u8 rsv[4]; }; -struct hclge_vlan_filter_ctrl { +struct hclge_vlan_filter_ctrl_cmd { u8 vlan_type; u8 vlan_fe; u8 rsv[22]; }; -struct hclge_vlan_filter_pf_cfg { +struct hclge_vlan_filter_pf_cfg_cmd { u8 vlan_offset; u8 vlan_cfg; u8 rsv[2]; u8 vlan_offset_bitmap[20]; }; -struct hclge_vlan_filter_vf_cfg { - u16 vlan_id; +struct hclge_vlan_filter_vf_cfg_cmd { + __le16 vlan_id; u8 resp_code; u8 rsv; u8 vlan_cfg; @@ -647,14 +658,14 @@ struct hclge_vlan_filter_vf_cfg { u8 vf_bitmap[16]; }; -struct hclge_cfg_com_tqp_queue { +struct hclge_cfg_com_tqp_queue_cmd { __le16 tqp_id; __le16 stream_id; u8 enable; u8 rsv[19]; }; -struct hclge_cfg_tx_queue_pointer { +struct hclge_cfg_tx_queue_pointer_cmd { __le16 tqp_id; __le16 tx_tail; __le16 tx_head; @@ -664,12 +675,12 @@ struct hclge_cfg_tx_queue_pointer { }; #define HCLGE_TSO_MSS_MIN_S 0 -#define HCLGE_TSO_MSS_MIN_M (0x3FFF << HCLGE_TSO_MSS_MIN_S) +#define HCLGE_TSO_MSS_MIN_M GENMASK(13, 0) #define HCLGE_TSO_MSS_MAX_S 16 -#define HCLGE_TSO_MSS_MAX_M (0x3FFF << HCLGE_TSO_MSS_MAX_S) +#define HCLGE_TSO_MSS_MAX_M GENMASK(29, 16) -struct hclge_cfg_tso_status { +struct hclge_cfg_tso_status_cmd { __le16 tso_mss_min; __le16 tso_mss_max; u8 rsv[20]; @@ -679,13 +690,20 @@ struct hclge_cfg_tso_status { #define HCLGE_TSO_MSS_MAX 9668 #define HCLGE_TQP_RESET_B 0 -struct hclge_reset_tqp_queue { +struct hclge_reset_tqp_queue_cmd { __le16 tqp_id; u8 reset_req; u8 ready_to_reset; u8 rsv[20]; }; +#define HCLGE_CFG_RESET_MAC_B 3 +#define HCLGE_CFG_RESET_FUNC_B 7 +struct hclge_reset_cmd { + u8 mac_func_reset; + u8 fun_reset_vfid; + u8 rsv[22]; +}; #define HCLGE_DEFAULT_TX_BUF 0x4000 /* 16k bytes */ #define HCLGE_TOTAL_PKT_BUF 0x108000 /* 1.03125M bytes */ #define HCLGE_DEFAULT_DV 0xA000 /* 40k byte */ @@ -733,6 +751,7 @@ struct hclge_hw; int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num); void hclge_cmd_setup_basic_desc(struct hclge_desc *desc, enum hclge_opcode_type opcode, bool is_read); +void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read); int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, struct hclge_promisc_param *param); @@ -743,4 +762,5 @@ enum hclge_cmd_status hclge_cmd_mdio_read(struct hclge_hw *hw, struct hclge_desc *desc); void hclge_destroy_cmd_queue(struct hclge_hw *hw); +int hclge_cmd_queue_init(struct hclge_dev *hdev); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c new file mode 100644 index 000000000000..5018d6633133 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2016-2017 Hisilicon Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "hclge_main.h" +#include "hclge_tm.h" +#include "hnae3.h" + +#define BW_PERCENT 100 + +static int hclge_ieee_ets_to_tm_info(struct hclge_dev *hdev, + struct ieee_ets *ets) +{ + u8 i; + + for (i = 0; i < HNAE3_MAX_TC; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + hdev->tm_info.tc_info[i].tc_sch_mode = + HCLGE_SCH_MODE_SP; + hdev->tm_info.pg_info[0].tc_dwrr[i] = 0; + break; + case IEEE_8021QAZ_TSA_ETS: + hdev->tm_info.tc_info[i].tc_sch_mode = + HCLGE_SCH_MODE_DWRR; + hdev->tm_info.pg_info[0].tc_dwrr[i] = + ets->tc_tx_bw[i]; + break; + default: + /* Hardware only supports SP (strict priority) + * or ETS (enhanced transmission selection) + * algorithms, if we receive some other value + * from dcbnl, then throw an error. + */ + return -EINVAL; + } + } + + return hclge_tm_prio_tc_info_update(hdev, ets->prio_tc); +} + +static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev, + struct ieee_ets *ets) +{ + u32 i; + + memset(ets, 0, sizeof(*ets)); + ets->willing = 1; + ets->ets_cap = hdev->tc_max; + + for (i = 0; i < HNAE3_MAX_TC; i++) { + ets->prio_tc[i] = hdev->tm_info.prio_tc[i]; + ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i]; + + if (hdev->tm_info.tc_info[i].tc_sch_mode == + HCLGE_SCH_MODE_SP) + ets->tc_tsa[i] = IEEE_8021QAZ_TSA_STRICT; + else + ets->tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; + } +} + +/* IEEE std */ +static int hclge_ieee_getets(struct hnae3_handle *h, struct ieee_ets *ets) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + + hclge_tm_info_to_ieee_ets(hdev, ets); + + return 0; +} + +static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, + u8 *tc, bool *changed) +{ + u32 total_ets_bw = 0; + u8 max_tc = 0; + u8 i; + + for (i = 0; i < HNAE3_MAX_TC; i++) { + if (ets->prio_tc[i] >= hdev->tc_max || + i >= hdev->tc_max) + return -EINVAL; + + if (ets->prio_tc[i] != hdev->tm_info.prio_tc[i]) + *changed = true; + + if (ets->prio_tc[i] > max_tc) + max_tc = ets->prio_tc[i]; + + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + if (hdev->tm_info.tc_info[i].tc_sch_mode != + HCLGE_SCH_MODE_SP) + *changed = true; + break; + case IEEE_8021QAZ_TSA_ETS: + if (hdev->tm_info.tc_info[i].tc_sch_mode != + HCLGE_SCH_MODE_DWRR) + *changed = true; + + total_ets_bw += ets->tc_tx_bw[i]; + break; + default: + return -EINVAL; + } + } + + if (total_ets_bw != BW_PERCENT) + return -EINVAL; + + *tc = max_tc + 1; + if (*tc != hdev->tm_info.num_tc) + *changed = true; + + return 0; +} + +static int hclge_map_update(struct hnae3_handle *h) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + int ret; + + ret = hclge_tm_map_cfg(hdev); + if (ret) + return ret; + + ret = hclge_tm_schd_mode_hw(hdev); + if (ret) + return ret; + + ret = hclge_pause_setup_hw(hdev); + if (ret) + return ret; + + ret = hclge_buffer_alloc(hdev); + if (ret) + return ret; + + return hclge_rss_init_hw(hdev); +} + +static int hclge_client_setup_tc(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = hdev->vport; + struct hnae3_client *client; + struct hnae3_handle *handle; + int ret; + u32 i; + + for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { + handle = &vport[i].nic; + client = handle->client; + + if (!client || !client->ops || !client->ops->setup_tc) + continue; + + ret = client->ops->setup_tc(handle, hdev->tm_info.num_tc); + if (ret) + return ret; + } + + return 0; +} + +static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + bool map_changed = false; + u8 num_tc = 0; + int ret; + + if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || + hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) + return -EINVAL; + + ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed); + if (ret) + return ret; + + hclge_tm_schd_info_update(hdev, num_tc); + + ret = hclge_ieee_ets_to_tm_info(hdev, ets); + if (ret) + return ret; + + if (map_changed) { + ret = hclge_client_setup_tc(hdev); + if (ret) + return ret; + } + + return hclge_tm_dwrr_cfg(hdev); +} + +static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + u8 i, j, pfc_map, *prio_tc; + + memset(pfc, 0, sizeof(*pfc)); + pfc->pfc_cap = hdev->pfc_max; + prio_tc = hdev->tm_info.prio_tc; + pfc_map = hdev->tm_info.hw_pfc_map; + + /* Pfc setting is based on TC */ + for (i = 0; i < hdev->tm_info.num_tc; i++) { + for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) { + if ((prio_tc[j] == i) && (pfc_map & BIT(i))) + pfc->pfc_en |= BIT(j); + } + } + + return 0; +} + +static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + u8 i, j, pfc_map, *prio_tc; + + if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || + hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) + return -EINVAL; + + prio_tc = hdev->tm_info.prio_tc; + pfc_map = 0; + + for (i = 0; i < hdev->tm_info.num_tc; i++) { + for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) { + if ((prio_tc[j] == i) && (pfc->pfc_en & BIT(j))) { + pfc_map |= BIT(i); + break; + } + } + } + + if (pfc_map == hdev->tm_info.hw_pfc_map) + return 0; + + hdev->tm_info.hw_pfc_map = pfc_map; + + return hclge_pause_setup_hw(hdev); +} + +/* DCBX configuration */ +static u8 hclge_getdcbx(struct hnae3_handle *h) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + + if (hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) + return 0; + + return hdev->dcbx_cap; +} + +static u8 hclge_setdcbx(struct hnae3_handle *h, u8 mode) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + + /* No support for LLD_MANAGED modes or CEE */ + if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || + (mode & DCB_CAP_DCBX_VER_CEE) || + !(mode & DCB_CAP_DCBX_HOST)) + return 1; + + hdev->dcbx_cap = mode; + + return 0; +} + +/* Set up TC for hardware offloaded mqprio in channel mode */ +static int hclge_setup_tc(struct hnae3_handle *h, u8 tc, u8 *prio_tc) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + int ret; + + if (hdev->flag & HCLGE_FLAG_DCB_ENABLE) + return -EINVAL; + + if (tc > hdev->tc_max) { + dev_err(&hdev->pdev->dev, + "setup tc failed, tc(%u) > tc_max(%u)\n", + tc, hdev->tc_max); + return -EINVAL; + } + + hclge_tm_schd_info_update(hdev, tc); + + ret = hclge_tm_prio_tc_info_update(hdev, prio_tc); + if (ret) + return ret; + + ret = hclge_tm_init_hw(hdev); + if (ret) + return ret; + + hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; + + if (tc > 1) + hdev->flag |= HCLGE_FLAG_MQPRIO_ENABLE; + else + hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE; + + return 0; +} + +static const struct hnae3_dcb_ops hns3_dcb_ops = { + .ieee_getets = hclge_ieee_getets, + .ieee_setets = hclge_ieee_setets, + .ieee_getpfc = hclge_ieee_getpfc, + .ieee_setpfc = hclge_ieee_setpfc, + .getdcbx = hclge_getdcbx, + .setdcbx = hclge_setdcbx, + .map_update = hclge_map_update, + .setup_tc = hclge_setup_tc, +}; + +void hclge_dcb_ops_set(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = hdev->vport; + struct hnae3_knic_private_info *kinfo; + + /* Hdev does not support DCB or vport is + * not a pf, then dcb_ops is not set. + */ + if (!hnae3_dev_dcb_supported(hdev) || + vport->vport_id != 0) + return; + + kinfo = &vport->nic.kinfo; + kinfo->dcb_ops = &hns3_dcb_ops; + hdev->dcbx_cap = DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_HOST; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h new file mode 100644 index 000000000000..7d808ee96694 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2016~2017 Hisilicon Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __HCLGE_DCB_H__ +#define __HCLGE_DCB_H__ + +#include "hclge_main.h" + +#ifdef CONFIG_HNS3_DCB +void hclge_dcb_ops_set(struct hclge_dev *hdev); +#else +static inline void hclge_dcb_ops_set(struct hclge_dev *hdev) {} +#endif + +#endif /* __HCLGE_DCB_H__ */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c1cdbfd83bdb..59ed806a52c3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -19,6 +19,7 @@ #include <linux/platform_device.h> #include "hclge_cmd.h" +#include "hclge_dcb.h" #include "hclge_main.h" #include "hclge_mdio.h" #include "hclge_tm.h" @@ -30,11 +31,11 @@ #define HCLGE_64BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_64_bit_stats, f)) #define HCLGE_32BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_32_bit_stats, f)) -static int hclge_rss_init_hw(struct hclge_dev *hdev); static int hclge_set_mta_filter_mode(struct hclge_dev *hdev, enum hclge_mta_dmac_sel_type mta_mac_sel, bool enable); static int hclge_init_vlan_config(struct hclge_dev *hdev); +static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev); static struct hnae3_ae_algo ae_algo; @@ -362,7 +363,7 @@ static int hclge_64_bit_update_stats(struct hclge_dev *hdev) #define HCLGE_64_BIT_RTN_DATANUM 4 u64 *data = (u64 *)(&hdev->hw_stats.all_64_bit_stats); struct hclge_desc desc[HCLGE_64_BIT_CMD_NUM]; - u64 *desc_data; + __le64 *desc_data; int i, k, n; int ret; @@ -376,14 +377,14 @@ static int hclge_64_bit_update_stats(struct hclge_dev *hdev) for (i = 0; i < HCLGE_64_BIT_CMD_NUM; i++) { if (unlikely(i == 0)) { - desc_data = (u64 *)(&desc[i].data[0]); + desc_data = (__le64 *)(&desc[i].data[0]); n = HCLGE_64_BIT_RTN_DATANUM - 1; } else { - desc_data = (u64 *)(&desc[i]); + desc_data = (__le64 *)(&desc[i]); n = HCLGE_64_BIT_RTN_DATANUM; } for (k = 0; k < n; k++) { - *data++ += cpu_to_le64(*desc_data); + *data++ += le64_to_cpu(*desc_data); desc_data++; } } @@ -411,7 +412,7 @@ static int hclge_32_bit_update_stats(struct hclge_dev *hdev) struct hclge_desc desc[HCLGE_32_BIT_CMD_NUM]; struct hclge_32_bit_stats *all_32_bit_stats; - u32 *desc_data; + __le32 *desc_data; int i, k, n; u64 *data; int ret; @@ -431,21 +432,27 @@ static int hclge_32_bit_update_stats(struct hclge_dev *hdev) hclge_reset_partial_32bit_counter(all_32_bit_stats); for (i = 0; i < HCLGE_32_BIT_CMD_NUM; i++) { if (unlikely(i == 0)) { + __le16 *desc_data_16bit; + all_32_bit_stats->igu_rx_err_pkt += - cpu_to_le32(desc[i].data[0]); + le32_to_cpu(desc[i].data[0]); + + desc_data_16bit = (__le16 *)&desc[i].data[1]; all_32_bit_stats->igu_rx_no_eof_pkt += - cpu_to_le32(desc[i].data[1] & 0xffff); + le16_to_cpu(*desc_data_16bit); + + desc_data_16bit++; all_32_bit_stats->igu_rx_no_sof_pkt += - cpu_to_le32((desc[i].data[1] >> 16) & 0xffff); + le16_to_cpu(*desc_data_16bit); - desc_data = (u32 *)(&desc[i].data[2]); + desc_data = &desc[i].data[2]; n = HCLGE_32_BIT_RTN_DATANUM - 4; } else { - desc_data = (u32 *)(&desc[i]); + desc_data = (__le32 *)&desc[i]; n = HCLGE_32_BIT_RTN_DATANUM; } for (k = 0; k < n; k++) { - *data++ += cpu_to_le32(*desc_data); + *data++ += le32_to_cpu(*desc_data); desc_data++; } } @@ -460,7 +467,7 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev) u64 *data = (u64 *)(&hdev->hw_stats.mac_stats); struct hclge_desc desc[HCLGE_MAC_CMD_NUM]; - u64 *desc_data; + __le64 *desc_data; int i, k, n; int ret; @@ -475,14 +482,14 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev) for (i = 0; i < HCLGE_MAC_CMD_NUM; i++) { if (unlikely(i == 0)) { - desc_data = (u64 *)(&desc[i].data[0]); + desc_data = (__le64 *)(&desc[i].data[0]); n = HCLGE_RTN_DATA_NUM - 2; } else { - desc_data = (u64 *)(&desc[i]); + desc_data = (__le64 *)(&desc[i]); n = HCLGE_RTN_DATA_NUM; } for (k = 0; k < n; k++) { - *data++ += cpu_to_le64(*desc_data); + *data++ += le64_to_cpu(*desc_data); desc_data++; } } @@ -508,7 +515,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle) HCLGE_OPC_QUERY_RX_STATUS, true); - desc[0].data[0] = (tqp->index & 0x1ff); + desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff)); ret = hclge_cmd_send(&hdev->hw, desc, 1); if (ret) { dev_err(&hdev->pdev->dev, @@ -517,7 +524,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle) return ret; } tqp->tqp_stats.rcb_rx_ring_pktnum_rcd += - cpu_to_le32(desc[0].data[4]); + le32_to_cpu(desc[0].data[4]); } for (i = 0; i < kinfo->num_tqps; i++) { @@ -528,7 +535,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle) HCLGE_OPC_QUERY_TX_STATUS, true); - desc[0].data[0] = (tqp->index & 0x1ff); + desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff)); ret = hclge_cmd_send(&hdev->hw, desc, 1); if (ret) { dev_err(&hdev->pdev->dev, @@ -537,7 +544,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle) return ret; } tqp->tqp_stats.rcb_tx_ring_pktnum_rcd += - cpu_to_le32(desc[0].data[4]); + le32_to_cpu(desc[0].data[4]); } return 0; @@ -552,12 +559,12 @@ static u64 *hclge_tqps_get_stats(struct hnae3_handle *handle, u64 *data) for (i = 0; i < kinfo->num_tqps; i++) { tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q); - *buff++ = cpu_to_le64(tqp->tqp_stats.rcb_tx_ring_pktnum_rcd); + *buff++ = tqp->tqp_stats.rcb_tx_ring_pktnum_rcd; } for (i = 0; i < kinfo->num_tqps; i++) { tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q); - *buff++ = cpu_to_le64(tqp->tqp_stats.rcb_rx_ring_pktnum_rcd); + *buff++ = tqp->tqp_stats.rcb_rx_ring_pktnum_rcd; } return buff; @@ -820,7 +827,7 @@ static void hclge_get_stats(struct hnae3_handle *handle, u64 *data) } static int hclge_parse_func_status(struct hclge_dev *hdev, - struct hclge_func_status *status) + struct hclge_func_status_cmd *status) { if (!(status->pf_state & HCLGE_PF_STATE_DONE)) return -EINVAL; @@ -831,19 +838,18 @@ static int hclge_parse_func_status(struct hclge_dev *hdev, else hdev->flag &= ~HCLGE_FLAG_MAIN; - hdev->num_req_vfs = status->vf_num / status->pf_num; return 0; } static int hclge_query_function_status(struct hclge_dev *hdev) { - struct hclge_func_status *req; + struct hclge_func_status_cmd *req; struct hclge_desc desc; int timeout = 0; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FUNC_STATUS, true); - req = (struct hclge_func_status *)desc.data; + req = (struct hclge_func_status_cmd *)desc.data; do { ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -868,7 +874,7 @@ static int hclge_query_function_status(struct hclge_dev *hdev) static int hclge_query_pf_resource(struct hclge_dev *hdev) { - struct hclge_pf_res *req; + struct hclge_pf_res_cmd *req; struct hclge_desc desc; int ret; @@ -880,19 +886,19 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev) return ret; } - req = (struct hclge_pf_res *)desc.data; + req = (struct hclge_pf_res_cmd *)desc.data; hdev->num_tqps = __le16_to_cpu(req->tqp_num); hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S; if (hnae3_dev_roce_supported(hdev)) { - hdev->num_roce_msix = + hdev->num_roce_msi = hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number), HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S); /* PF should have NIC vectors and Roce vectors, * NIC vectors are queued before Roce vectors. */ - hdev->num_msi = hdev->num_roce_msix + HCLGE_ROCE_VECTOR_OFFSET; + hdev->num_msi = hdev->num_roce_msi + HCLGE_ROCE_VECTOR_OFFSET; } else { hdev->num_msi = hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number), @@ -938,12 +944,12 @@ static int hclge_parse_speed(int speed_cmd, int *speed) static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) { - struct hclge_cfg_param *req; + struct hclge_cfg_param_cmd *req; u64 mac_addr_tmp_high; u64 mac_addr_tmp; int i; - req = (struct hclge_cfg_param *)desc[0].data; + req = (struct hclge_cfg_param_cmd *)desc[0].data; /* get the configuration */ cfg->vmdq_vport_num = hnae_get_field(__le32_to_cpu(req->param[0]), @@ -978,7 +984,7 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) for (i = 0; i < ETH_ALEN; i++) cfg->mac_addr[i] = (mac_addr_tmp >> (8 * i)) & 0xff; - req = (struct hclge_cfg_param *)desc[1].data; + req = (struct hclge_cfg_param_cmd *)desc[1].data; cfg->numa_node_map = __le32_to_cpu(req->param[0]); } @@ -989,20 +995,21 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) static int hclge_get_cfg(struct hclge_dev *hdev, struct hclge_cfg *hcfg) { struct hclge_desc desc[HCLGE_PF_CFG_DESC_NUM]; - struct hclge_cfg_param *req; + struct hclge_cfg_param_cmd *req; int i, ret; for (i = 0; i < HCLGE_PF_CFG_DESC_NUM; i++) { - req = (struct hclge_cfg_param *)desc[i].data; + u32 offset = 0; + + req = (struct hclge_cfg_param_cmd *)desc[i].data; hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_GET_CFG_PARAM, true); - hnae_set_field(req->offset, HCLGE_CFG_OFFSET_M, + hnae_set_field(offset, HCLGE_CFG_OFFSET_M, HCLGE_CFG_OFFSET_S, i * HCLGE_CFG_RD_LEN_BYTES); /* Len should be united by 4 bytes when send to hardware */ - hnae_set_field(req->offset, HCLGE_CFG_RD_LEN_M, - HCLGE_CFG_RD_LEN_S, + hnae_set_field(offset, HCLGE_CFG_RD_LEN_M, HCLGE_CFG_RD_LEN_S, HCLGE_CFG_RD_LEN_BYTES / HCLGE_CFG_RD_LEN_UNIT); - req->offset = cpu_to_le32(req->offset); + req->offset = cpu_to_le32(offset); } ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PF_CFG_DESC_NUM); @@ -1058,7 +1065,7 @@ static int hclge_configure(struct hclge_dev *hdev) hdev->hw.mac.phy_addr = cfg.phy_addr; hdev->num_desc = cfg.tqp_desc_num; hdev->tm_info.num_pg = 1; - hdev->tm_info.num_tc = cfg.tc_num; + hdev->tc_max = cfg.tc_num; hdev->tm_info.hw_pfc_map = 0; ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed); @@ -1067,15 +1074,25 @@ static int hclge_configure(struct hclge_dev *hdev) return ret; } - if ((hdev->tm_info.num_tc > HNAE3_MAX_TC) || - (hdev->tm_info.num_tc < 1)) { + if ((hdev->tc_max > HNAE3_MAX_TC) || + (hdev->tc_max < 1)) { dev_warn(&hdev->pdev->dev, "TC num = %d.\n", - hdev->tm_info.num_tc); - hdev->tm_info.num_tc = 1; + hdev->tc_max); + hdev->tc_max = 1; + } + + /* Dev does not support DCB */ + if (!hnae3_dev_dcb_supported(hdev)) { + hdev->tc_max = 1; + hdev->pfc_max = 0; + } else { + hdev->pfc_max = hdev->tc_max; } + hdev->tm_info.num_tc = hdev->tc_max; + /* Currently not support uncontiuous tc */ - for (i = 0; i < cfg.tc_num; i++) + for (i = 0; i < hdev->tm_info.num_tc; i++) hnae_set_bit(hdev->hw_tc_map, i, 1); if (!hdev->num_vmdq_vport && !hdev->num_req_vfs) @@ -1089,16 +1106,23 @@ static int hclge_configure(struct hclge_dev *hdev) static int hclge_config_tso(struct hclge_dev *hdev, int tso_mss_min, int tso_mss_max) { - struct hclge_cfg_tso_status *req; + struct hclge_cfg_tso_status_cmd *req; struct hclge_desc desc; + u16 tso_mss; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TSO_GENERIC_CONFIG, false); - req = (struct hclge_cfg_tso_status *)desc.data; - hnae_set_field(req->tso_mss_min, HCLGE_TSO_MSS_MIN_M, + req = (struct hclge_cfg_tso_status_cmd *)desc.data; + + tso_mss = 0; + hnae_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M, HCLGE_TSO_MSS_MIN_S, tso_mss_min); - hnae_set_field(req->tso_mss_max, HCLGE_TSO_MSS_MIN_M, + req->tso_mss_min = cpu_to_le16(tso_mss); + + tso_mss = 0; + hnae_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M, HCLGE_TSO_MSS_MIN_S, tso_mss_max); + req->tso_mss_max = cpu_to_le16(tso_mss); return hclge_cmd_send(&hdev->hw, &desc, 1); } @@ -1134,15 +1158,15 @@ static int hclge_alloc_tqps(struct hclge_dev *hdev) static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id, u16 tqp_pid, u16 tqp_vid, bool is_pf) { - struct hclge_tqp_map *req; + struct hclge_tqp_map_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SET_TQP_MAP, false); - req = (struct hclge_tqp_map *)desc.data; + req = (struct hclge_tqp_map_cmd *)desc.data; req->tqp_id = cpu_to_le16(tqp_pid); - req->tqp_vf = cpu_to_le16(func_id); + req->tqp_vf = func_id; req->tqp_flag = !is_pf << HCLGE_TQP_MAP_TYPE_B | 1 << HCLGE_TQP_MAP_EN_B; req->tqp_vid = cpu_to_le16(tqp_vid); @@ -1161,11 +1185,7 @@ static int hclge_assign_tqp(struct hclge_vport *vport, struct hnae3_queue **tqp, u16 num_tqps) { struct hclge_dev *hdev = vport->back; - int i, alloced, func_id, ret; - bool is_pf; - - func_id = vport->vport_id; - is_pf = (vport->vport_id == 0) ? true : false; + int i, alloced; for (i = 0, alloced = 0; i < hdev->num_tqps && alloced < num_tqps; i++) { @@ -1174,12 +1194,6 @@ static int hclge_assign_tqp(struct hclge_vport *vport, hdev->htqp[i].q.tqp_index = alloced; tqp[alloced] = &hdev->htqp[i].q; hdev->htqp[i].alloced = true; - ret = hclge_map_tqps_to_func(hdev, func_id, - hdev->htqp[i].index, - alloced, is_pf); - if (ret) - return ret; - alloced++; } } @@ -1231,6 +1245,49 @@ static int hclge_knic_setup(struct hclge_vport *vport, u16 num_tqps) return 0; } +static int hclge_map_tqp_to_vport(struct hclge_dev *hdev, + struct hclge_vport *vport) +{ + struct hnae3_handle *nic = &vport->nic; + struct hnae3_knic_private_info *kinfo; + u16 i; + + kinfo = &nic->kinfo; + for (i = 0; i < kinfo->num_tqps; i++) { + struct hclge_tqp *q = + container_of(kinfo->tqp[i], struct hclge_tqp, q); + bool is_pf; + int ret; + + is_pf = !(vport->vport_id); + ret = hclge_map_tqps_to_func(hdev, vport->vport_id, q->index, + i, is_pf); + if (ret) + return ret; + } + + return 0; +} + +static int hclge_map_tqp(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = hdev->vport; + u16 i, num_vport; + + num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1; + for (i = 0; i < num_vport; i++) { + int ret; + + ret = hclge_map_tqp_to_vport(hdev, vport); + if (ret) + return ret; + + vport++; + } + + return 0; +} + static void hclge_unic_setup(struct hclge_vport *vport, u16 num_tqps) { /* this would be initialized later */ @@ -1324,23 +1381,27 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) return 0; } -static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size) +static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { /* TX buffer size is unit by 128 byte */ #define HCLGE_BUF_SIZE_UNIT_SHIFT 7 #define HCLGE_BUF_SIZE_UPDATE_EN_MSK BIT(15) - struct hclge_tx_buff_alloc *req; + struct hclge_tx_buff_alloc_cmd *req; struct hclge_desc desc; int ret; u8 i; - req = (struct hclge_tx_buff_alloc *)desc.data; + req = (struct hclge_tx_buff_alloc_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0); - for (i = 0; i < HCLGE_TC_NUM; i++) + for (i = 0; i < HCLGE_TC_NUM; i++) { + u32 buf_size = buf_alloc->priv_buf[i].tx_buf_size; + req->tx_pkt_buff[i] = cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) | HCLGE_BUF_SIZE_UPDATE_EN_MSK); + } ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { @@ -1352,9 +1413,10 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size) return 0; } -static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, u32 buf_size) +static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { - int ret = hclge_cmd_alloc_tx_buff(hdev, buf_size); + int ret = hclge_cmd_alloc_tx_buff(hdev, buf_alloc); if (ret) { dev_err(&hdev->pdev->dev, @@ -1387,13 +1449,14 @@ static int hclge_get_pfc_enalbe_num(struct hclge_dev *hdev) } /* Get the number of pfc enabled TCs, which have private buffer */ -static int hclge_get_pfc_priv_num(struct hclge_dev *hdev) +static int hclge_get_pfc_priv_num(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_priv_buf *priv; int i, cnt = 0; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if ((hdev->tm_info.hw_pfc_map & BIT(i)) && priv->enable) cnt++; @@ -1403,13 +1466,14 @@ static int hclge_get_pfc_priv_num(struct hclge_dev *hdev) } /* Get the number of pfc disabled TCs, which have private buffer */ -static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev) +static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_priv_buf *priv; int i, cnt = 0; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (hdev->hw_tc_map & BIT(i) && !(hdev->tm_info.hw_pfc_map & BIT(i)) && priv->enable) @@ -1419,21 +1483,33 @@ static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev) return cnt; } -static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev) +static u32 hclge_get_rx_priv_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_priv_buf *priv; u32 rx_priv = 0; int i; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (priv->enable) rx_priv += priv->buf_size; } return rx_priv; } -static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all) +static u32 hclge_get_tx_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc) +{ + u32 i, total_tx_size = 0; + + for (i = 0; i < HCLGE_MAX_TC_NUM; i++) + total_tx_size += buf_alloc->priv_buf[i].tx_buf_size; + + return total_tx_size; +} + +static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc, + u32 rx_all) { u32 shared_buf_min, shared_buf_tc, shared_std; int tc_num, pfc_enable_num; @@ -1454,46 +1530,74 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all) hdev->mps; shared_std = max_t(u32, shared_buf_min, shared_buf_tc); - rx_priv = hclge_get_rx_priv_buff_alloced(hdev); + rx_priv = hclge_get_rx_priv_buff_alloced(buf_alloc); if (rx_all <= rx_priv + shared_std) return false; shared_buf = rx_all - rx_priv; - hdev->s_buf.buf_size = shared_buf; - hdev->s_buf.self.high = shared_buf; - hdev->s_buf.self.low = 2 * hdev->mps; + buf_alloc->s_buf.buf_size = shared_buf; + buf_alloc->s_buf.self.high = shared_buf; + buf_alloc->s_buf.self.low = 2 * hdev->mps; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { if ((hdev->hw_tc_map & BIT(i)) && (hdev->tm_info.hw_pfc_map & BIT(i))) { - hdev->s_buf.tc_thrd[i].low = hdev->mps; - hdev->s_buf.tc_thrd[i].high = 2 * hdev->mps; + buf_alloc->s_buf.tc_thrd[i].low = hdev->mps; + buf_alloc->s_buf.tc_thrd[i].high = 2 * hdev->mps; } else { - hdev->s_buf.tc_thrd[i].low = 0; - hdev->s_buf.tc_thrd[i].high = hdev->mps; + buf_alloc->s_buf.tc_thrd[i].low = 0; + buf_alloc->s_buf.tc_thrd[i].high = hdev->mps; } } return true; } +static int hclge_tx_buffer_calc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) +{ + u32 i, total_size; + + total_size = hdev->pkt_buf_size; + + /* alloc tx buffer for all enabled tc */ + for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { + struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i]; + + if (total_size < HCLGE_DEFAULT_TX_BUF) + return -ENOMEM; + + if (hdev->hw_tc_map & BIT(i)) + priv->tx_buf_size = HCLGE_DEFAULT_TX_BUF; + else + priv->tx_buf_size = 0; + + total_size -= priv->tx_buf_size; + } + + return 0; +} + /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs * @hdev: pointer to struct hclge_dev - * @tx_size: the allocated tx buffer for all TCs + * @buf_alloc: pointer to buffer calculation data * @return: 0: calculate sucessful, negative: fail */ -int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size) +static int hclge_rx_buffer_calc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { - u32 rx_all = hdev->pkt_buf_size - tx_size; + u32 rx_all = hdev->pkt_buf_size; int no_pfc_priv_num, pfc_priv_num; struct hclge_priv_buf *priv; int i; + rx_all -= hclge_get_tx_buff_alloced(buf_alloc); + /* When DCB is not supported, rx private * buffer is not allocated. */ if (!hnae3_dev_dcb_supported(hdev)) { - if (!hclge_is_rx_buf_ok(hdev, rx_all)) + if (!hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return -ENOMEM; return 0; @@ -1501,7 +1605,7 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size) /* step 1, try to alloc private buffer for all enabled tc */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (hdev->hw_tc_map & BIT(i)) { priv->enable = 1; if (hdev->tm_info.hw_pfc_map & BIT(i)) { @@ -1522,14 +1626,14 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size) } } - if (hclge_is_rx_buf_ok(hdev, rx_all)) + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return 0; /* step 2, try to decrease the buffer size of * no pfc TC's private buffer */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; priv->enable = 0; priv->wl.low = 0; @@ -1552,18 +1656,18 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size) } } - if (hclge_is_rx_buf_ok(hdev, rx_all)) + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return 0; /* step 3, try to reduce the number of pfc disabled TCs, * which have private buffer */ /* get the total no pfc enable TC number, which have private buffer */ - no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev); + no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev, buf_alloc); /* let the last to be cleared first */ for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (hdev->hw_tc_map & BIT(i) && !(hdev->tm_info.hw_pfc_map & BIT(i))) { @@ -1575,22 +1679,22 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size) no_pfc_priv_num--; } - if (hclge_is_rx_buf_ok(hdev, rx_all) || + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) || no_pfc_priv_num == 0) break; } - if (hclge_is_rx_buf_ok(hdev, rx_all)) + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return 0; /* step 4, try to reduce the number of pfc enabled TCs * which have private buffer. */ - pfc_priv_num = hclge_get_pfc_priv_num(hdev); + pfc_priv_num = hclge_get_pfc_priv_num(hdev, buf_alloc); /* let the last to be cleared first */ for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (hdev->hw_tc_map & BIT(i) && hdev->tm_info.hw_pfc_map & BIT(i)) { @@ -1602,38 +1706,39 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size) pfc_priv_num--; } - if (hclge_is_rx_buf_ok(hdev, rx_all) || + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) || pfc_priv_num == 0) break; } - if (hclge_is_rx_buf_ok(hdev, rx_all)) + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return 0; return -ENOMEM; } -static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev) +static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { - struct hclge_rx_priv_buff *req; + struct hclge_rx_priv_buff_cmd *req; struct hclge_desc desc; int ret; int i; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_PRIV_BUFF_ALLOC, false); - req = (struct hclge_rx_priv_buff *)desc.data; + req = (struct hclge_rx_priv_buff_cmd *)desc.data; /* Alloc private buffer TCs */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - struct hclge_priv_buf *priv = &hdev->priv_buf[i]; + struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i]; req->buf_num[i] = cpu_to_le16(priv->buf_size >> HCLGE_BUF_UNIT_S); req->buf_num[i] |= - cpu_to_le16(true << HCLGE_TC0_PRI_BUF_EN_B); + cpu_to_le16(1 << HCLGE_TC0_PRI_BUF_EN_B); } req->shared_buf = - cpu_to_le16((hdev->s_buf.buf_size >> HCLGE_BUF_UNIT_S) | + cpu_to_le16((buf_alloc->s_buf.buf_size >> HCLGE_BUF_UNIT_S) | (1 << HCLGE_TC0_PRI_BUF_EN_B)); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -1648,7 +1753,8 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev) #define HCLGE_PRIV_ENABLE(a) ((a) > 0 ? 1 : 0) -static int hclge_rx_priv_wl_config(struct hclge_dev *hdev) +static int hclge_rx_priv_wl_config(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_rx_priv_wl_buf *req; struct hclge_priv_buf *priv; @@ -1668,7 +1774,9 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev) desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT); for (j = 0; j < HCLGE_TC_NUM_ONE_DESC; j++) { - priv = &hdev->priv_buf[i * HCLGE_TC_NUM_ONE_DESC + j]; + u32 idx = i * HCLGE_TC_NUM_ONE_DESC + j; + + priv = &buf_alloc->priv_buf[idx]; req->tc_wl[j].high = cpu_to_le16(priv->wl.high >> HCLGE_BUF_UNIT_S); req->tc_wl[j].high |= @@ -1693,9 +1801,10 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev) return 0; } -static int hclge_common_thrd_config(struct hclge_dev *hdev) +static int hclge_common_thrd_config(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { - struct hclge_shared_buf *s_buf = &hdev->s_buf; + struct hclge_shared_buf *s_buf = &buf_alloc->s_buf; struct hclge_rx_com_thrd *req; struct hclge_desc desc[2]; struct hclge_tc_thrd *tc; @@ -1739,9 +1848,10 @@ static int hclge_common_thrd_config(struct hclge_dev *hdev) return 0; } -static int hclge_common_wl_config(struct hclge_dev *hdev) +static int hclge_common_wl_config(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { - struct hclge_shared_buf *buf = &hdev->s_buf; + struct hclge_shared_buf *buf = &buf_alloc->s_buf; struct hclge_rx_com_wl *req; struct hclge_desc desc; int ret; @@ -1771,63 +1881,68 @@ static int hclge_common_wl_config(struct hclge_dev *hdev) int hclge_buffer_alloc(struct hclge_dev *hdev) { - u32 tx_buf_size = HCLGE_DEFAULT_TX_BUF; + struct hclge_pkt_buf_alloc *pkt_buf; int ret; - hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM, - sizeof(struct hclge_priv_buf), - GFP_KERNEL | __GFP_ZERO); - if (!hdev->priv_buf) + pkt_buf = kzalloc(sizeof(*pkt_buf), GFP_KERNEL); + if (!pkt_buf) return -ENOMEM; - ret = hclge_tx_buffer_alloc(hdev, tx_buf_size); + ret = hclge_tx_buffer_calc(hdev, pkt_buf); + if (ret) { + dev_err(&hdev->pdev->dev, + "could not calc tx buffer size for all TCs %d\n", ret); + goto out; + } + + ret = hclge_tx_buffer_alloc(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not alloc tx buffers %d\n", ret); - return ret; + goto out; } - ret = hclge_rx_buffer_calc(hdev, tx_buf_size); + ret = hclge_rx_buffer_calc(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not calc rx priv buffer size for all TCs %d\n", ret); - return ret; + goto out; } - ret = hclge_rx_priv_buf_alloc(hdev); + ret = hclge_rx_priv_buf_alloc(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not alloc rx priv buffer %d\n", ret); - return ret; + goto out; } if (hnae3_dev_dcb_supported(hdev)) { - ret = hclge_rx_priv_wl_config(hdev); + ret = hclge_rx_priv_wl_config(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not configure rx private waterline %d\n", ret); - return ret; + goto out; } - ret = hclge_common_thrd_config(hdev); + ret = hclge_common_thrd_config(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not configure common threshold %d\n", ret); - return ret; + goto out; } } - ret = hclge_common_wl_config(hdev); - if (ret) { + ret = hclge_common_wl_config(hdev, pkt_buf); + if (ret) dev_err(&hdev->pdev->dev, "could not configure common waterline %d\n", ret); - return ret; - } - return 0; +out: + kfree(pkt_buf); + return ret; } static int hclge_init_roce_base_info(struct hclge_vport *vport) @@ -1835,7 +1950,7 @@ static int hclge_init_roce_base_info(struct hclge_vport *vport) struct hnae3_handle *roce = &vport->roce; struct hnae3_handle *nic = &vport->nic; - roce->rinfo.num_vectors = vport->back->num_roce_msix; + roce->rinfo.num_vectors = vport->back->num_roce_msi; if (vport->back->num_msi_left < vport->roce.rinfo.num_vectors || vport->back->num_msi_left == 0) @@ -1853,67 +1968,47 @@ static int hclge_init_roce_base_info(struct hclge_vport *vport) return 0; } -static int hclge_init_msix(struct hclge_dev *hdev) +static int hclge_init_msi(struct hclge_dev *hdev) { struct pci_dev *pdev = hdev->pdev; - int ret, i; - - hdev->msix_entries = devm_kcalloc(&pdev->dev, hdev->num_msi, - sizeof(struct msix_entry), - GFP_KERNEL); - if (!hdev->msix_entries) - return -ENOMEM; - - hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi, - sizeof(u16), GFP_KERNEL); - if (!hdev->vector_status) - return -ENOMEM; + int vectors; + int i; - for (i = 0; i < hdev->num_msi; i++) { - hdev->msix_entries[i].entry = i; - hdev->vector_status[i] = HCLGE_INVALID_VPORT; + vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, + PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (vectors < 0) { + dev_err(&pdev->dev, + "failed(%d) to allocate MSI/MSI-X vectors\n", + vectors); + return vectors; } + if (vectors < hdev->num_msi) + dev_warn(&hdev->pdev->dev, + "requested %d MSI/MSI-X, but allocated %d MSI/MSI-X\n", + hdev->num_msi, vectors); - hdev->num_msi_left = hdev->num_msi; - hdev->base_msi_vector = hdev->pdev->irq; + hdev->num_msi = vectors; + hdev->num_msi_left = vectors; + hdev->base_msi_vector = pdev->irq; hdev->roce_base_vector = hdev->base_msi_vector + HCLGE_ROCE_VECTOR_OFFSET; - ret = pci_enable_msix_range(hdev->pdev, hdev->msix_entries, - hdev->num_msi, hdev->num_msi); - if (ret < 0) { - dev_info(&hdev->pdev->dev, - "MSI-X vector alloc failed: %d\n", ret); - return ret; - } - - return 0; -} - -static int hclge_init_msi(struct hclge_dev *hdev) -{ - struct pci_dev *pdev = hdev->pdev; - int vectors; - int i; - hdev->vector_status = devm_kcalloc(&pdev->dev, hdev->num_msi, sizeof(u16), GFP_KERNEL); - if (!hdev->vector_status) + if (!hdev->vector_status) { + pci_free_irq_vectors(pdev); return -ENOMEM; + } for (i = 0; i < hdev->num_msi; i++) hdev->vector_status[i] = HCLGE_INVALID_VPORT; - vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi, PCI_IRQ_MSI); - if (vectors < 0) { - dev_err(&pdev->dev, "MSI vectors enable failed %d\n", vectors); - return -EINVAL; + hdev->vector_irq = devm_kcalloc(&pdev->dev, hdev->num_msi, + sizeof(int), GFP_KERNEL); + if (!hdev->vector_irq) { + pci_free_irq_vectors(pdev); + return -ENOMEM; } - hdev->num_msi = vectors; - hdev->num_msi_left = vectors; - hdev->base_msi_vector = pdev->irq; - hdev->roce_base_vector = hdev->base_msi_vector + - HCLGE_ROCE_VECTOR_OFFSET; return 0; } @@ -1932,11 +2027,11 @@ static void hclge_check_speed_dup(struct hclge_dev *hdev, int duplex, int speed) int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex) { - struct hclge_config_mac_speed_dup *req; + struct hclge_config_mac_speed_dup_cmd *req; struct hclge_desc desc; int ret; - req = (struct hclge_config_mac_speed_dup *)desc.data; + req = (struct hclge_config_mac_speed_dup_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_SPEED_DUP, false); @@ -2007,12 +2102,12 @@ static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed, static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed, u8 *duplex) { - struct hclge_query_an_speed_dup *req; + struct hclge_query_an_speed_dup_cmd *req; struct hclge_desc desc; int speed_tmp; int ret; - req = (struct hclge_query_an_speed_dup *)desc.data; + req = (struct hclge_query_an_speed_dup_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -2040,11 +2135,11 @@ static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed, static int hclge_query_autoneg_result(struct hclge_dev *hdev) { struct hclge_mac *mac = &hdev->hw.mac; - struct hclge_query_an_speed_dup *req; + struct hclge_query_an_speed_dup_cmd *req; struct hclge_desc desc; int ret; - req = (struct hclge_query_an_speed_dup *)desc.data; + req = (struct hclge_query_an_speed_dup_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -2061,14 +2156,16 @@ static int hclge_query_autoneg_result(struct hclge_dev *hdev) static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable) { - struct hclge_config_auto_neg *req; + struct hclge_config_auto_neg_cmd *req; struct hclge_desc desc; + u32 flag = 0; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_AN_MODE, false); - req = (struct hclge_config_auto_neg *)desc.data; - hnae_set_bit(req->cfg_an_cmd_flag, HCLGE_MAC_CFG_AN_EN_B, !!enable); + req = (struct hclge_config_auto_neg_cmd *)desc.data; + hnae_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, !!enable); + req->cfg_an_cmd_flag = cpu_to_le32(flag); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { @@ -2112,13 +2209,6 @@ static int hclge_mac_init(struct hclge_dev *hdev) mac->link = 0; - ret = hclge_mac_mdio_config(hdev); - if (ret) { - dev_warn(&hdev->pdev->dev, - "mdio config fail ret=%d\n", ret); - return ret; - } - /* Initialize the MTA table work mode */ hdev->accept_mta_mc = true; hdev->enable_mta = true; @@ -2146,7 +2236,7 @@ static void hclge_task_schedule(struct hclge_dev *hdev) static int hclge_get_mac_link_status(struct hclge_dev *hdev) { - struct hclge_link_status *req; + struct hclge_link_status_cmd *req; struct hclge_desc desc; int link_status; int ret; @@ -2159,7 +2249,7 @@ static int hclge_get_mac_link_status(struct hclge_dev *hdev) return ret; } - req = (struct hclge_link_status *)desc.data; + req = (struct hclge_link_status_cmd *)desc.data; link_status = req->status & HCLGE_LINK_STATUS; return !!link_status; @@ -2215,18 +2305,7 @@ static int hclge_update_speed_duplex(struct hclge_dev *hdev) /* get the speed and duplex as autoneg'result from mac cmd when phy * doesn't exit. */ - if (mac.phydev) - return 0; - - /* update mac->antoneg. */ - ret = hclge_query_autoneg_result(hdev); - if (ret) { - dev_err(&hdev->pdev->dev, - "autoneg result query failed %d\n", ret); - return ret; - } - - if (!mac.autoneg) + if (mac.phydev || !mac.autoneg) return 0; ret = hclge_query_mac_an_speed_dup(hdev, &speed, &duplex); @@ -2266,11 +2345,11 @@ static int hclge_get_status(struct hnae3_handle *handle) return hdev->hw.mac.link; } -static void hclge_service_timer(unsigned long data) +static void hclge_service_timer(struct timer_list *t) { - struct hclge_dev *hdev = (struct hclge_dev *)data; - (void)mod_timer(&hdev->service_timer, jiffies + HZ); + struct hclge_dev *hdev = from_timer(hdev, t, service_timer); + mod_timer(&hdev->service_timer, jiffies + HZ); hclge_task_schedule(hdev); } @@ -2283,11 +2362,275 @@ static void hclge_service_complete(struct hclge_dev *hdev) clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); } +static void hclge_enable_vector(struct hclge_misc_vector *vector, bool enable) +{ + writel(enable ? 1 : 0, vector->addr); +} + +static irqreturn_t hclge_misc_irq_handle(int irq, void *data) +{ + struct hclge_dev *hdev = data; + + hclge_enable_vector(&hdev->misc_vector, false); + if (!test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state)) + schedule_work(&hdev->service_task); + + return IRQ_HANDLED; +} + +static void hclge_free_vector(struct hclge_dev *hdev, int vector_id) +{ + hdev->vector_status[vector_id] = HCLGE_INVALID_VPORT; + hdev->num_msi_left += 1; + hdev->num_msi_used -= 1; +} + +static void hclge_get_misc_vector(struct hclge_dev *hdev) +{ + struct hclge_misc_vector *vector = &hdev->misc_vector; + + vector->vector_irq = pci_irq_vector(hdev->pdev, 0); + + vector->addr = hdev->hw.io_base + HCLGE_MISC_VECTOR_REG_BASE; + hdev->vector_status[0] = 0; + + hdev->num_msi_left -= 1; + hdev->num_msi_used += 1; +} + +static int hclge_misc_irq_init(struct hclge_dev *hdev) +{ + int ret; + + hclge_get_misc_vector(hdev); + + ret = devm_request_irq(&hdev->pdev->dev, + hdev->misc_vector.vector_irq, + hclge_misc_irq_handle, 0, "hclge_misc", hdev); + if (ret) { + hclge_free_vector(hdev, 0); + dev_err(&hdev->pdev->dev, "request misc irq(%d) fail\n", + hdev->misc_vector.vector_irq); + } + + return ret; +} + +static int hclge_notify_client(struct hclge_dev *hdev, + enum hnae3_reset_notify_type type) +{ + struct hnae3_client *client = hdev->nic_client; + u16 i; + + if (!client->ops->reset_notify) + return -EOPNOTSUPP; + + for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { + struct hnae3_handle *handle = &hdev->vport[i].nic; + int ret; + + ret = client->ops->reset_notify(handle, type); + if (ret) + return ret; + } + + return 0; +} + +static int hclge_reset_wait(struct hclge_dev *hdev) +{ +#define HCLGE_RESET_WATI_MS 100 +#define HCLGE_RESET_WAIT_CNT 5 + u32 val, reg, reg_bit; + u32 cnt = 0; + + switch (hdev->reset_type) { + case HNAE3_GLOBAL_RESET: + reg = HCLGE_GLOBAL_RESET_REG; + reg_bit = HCLGE_GLOBAL_RESET_BIT; + break; + case HNAE3_CORE_RESET: + reg = HCLGE_GLOBAL_RESET_REG; + reg_bit = HCLGE_CORE_RESET_BIT; + break; + case HNAE3_FUNC_RESET: + reg = HCLGE_FUN_RST_ING; + reg_bit = HCLGE_FUN_RST_ING_B; + break; + default: + dev_err(&hdev->pdev->dev, + "Wait for unsupported reset type: %d\n", + hdev->reset_type); + return -EINVAL; + } + + val = hclge_read_dev(&hdev->hw, reg); + while (hnae_get_bit(val, reg_bit) && cnt < HCLGE_RESET_WAIT_CNT) { + msleep(HCLGE_RESET_WATI_MS); + val = hclge_read_dev(&hdev->hw, reg); + cnt++; + } + + /* must clear reset status register to + * prevent driver detect reset interrupt again + */ + reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG); + hclge_write_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG, reg); + + if (cnt >= HCLGE_RESET_WAIT_CNT) { + dev_warn(&hdev->pdev->dev, + "Wait for reset timeout: %d\n", hdev->reset_type); + return -EBUSY; + } + + return 0; +} + +static int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id) +{ + struct hclge_desc desc; + struct hclge_reset_cmd *req = (struct hclge_reset_cmd *)desc.data; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_RST_TRIGGER, false); + hnae_set_bit(req->mac_func_reset, HCLGE_CFG_RESET_MAC_B, 0); + hnae_set_bit(req->mac_func_reset, HCLGE_CFG_RESET_FUNC_B, 1); + req->fun_reset_vfid = func_id; + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "send function reset cmd fail, status =%d\n", ret); + + return ret; +} + +static void hclge_do_reset(struct hclge_dev *hdev, enum hnae3_reset_type type) +{ + struct pci_dev *pdev = hdev->pdev; + u32 val; + + switch (type) { + case HNAE3_GLOBAL_RESET: + val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG); + hnae_set_bit(val, HCLGE_GLOBAL_RESET_BIT, 1); + hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val); + dev_info(&pdev->dev, "Global Reset requested\n"); + break; + case HNAE3_CORE_RESET: + val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG); + hnae_set_bit(val, HCLGE_CORE_RESET_BIT, 1); + hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val); + dev_info(&pdev->dev, "Core Reset requested\n"); + break; + case HNAE3_FUNC_RESET: + dev_info(&pdev->dev, "PF Reset requested\n"); + hclge_func_reset_cmd(hdev, 0); + break; + default: + dev_warn(&pdev->dev, + "Unsupported reset type: %d\n", type); + break; + } +} + +static enum hnae3_reset_type hclge_detected_reset_event(struct hclge_dev *hdev) +{ + enum hnae3_reset_type rst_level = HNAE3_NONE_RESET; + u32 rst_reg_val; + + rst_reg_val = hclge_read_dev(&hdev->hw, HCLGE_MISC_RESET_STS_REG); + if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & rst_reg_val) + rst_level = HNAE3_GLOBAL_RESET; + else if (BIT(HCLGE_VECTOR0_CORERESET_INT_B) & rst_reg_val) + rst_level = HNAE3_CORE_RESET; + else if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & rst_reg_val) + rst_level = HNAE3_IMP_RESET; + + return rst_level; +} + +static void hclge_reset_event(struct hnae3_handle *handle, + enum hnae3_reset_type reset) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + dev_info(&hdev->pdev->dev, + "Receive reset event , reset_type is %d", reset); + + switch (reset) { + case HNAE3_FUNC_RESET: + case HNAE3_CORE_RESET: + case HNAE3_GLOBAL_RESET: + if (test_bit(HCLGE_STATE_RESET_INT, &hdev->state)) { + dev_err(&hdev->pdev->dev, "Already in reset state"); + return; + } + hdev->reset_type = reset; + set_bit(HCLGE_STATE_RESET_INT, &hdev->state); + set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state); + schedule_work(&hdev->service_task); + break; + default: + dev_warn(&hdev->pdev->dev, "Unsupported reset event:%d", reset); + break; + } +} + +static void hclge_reset_subtask(struct hclge_dev *hdev) +{ + bool do_reset; + + do_reset = hdev->reset_type != HNAE3_NONE_RESET; + + /* Reset is detected by interrupt */ + if (hdev->reset_type == HNAE3_NONE_RESET) + hdev->reset_type = hclge_detected_reset_event(hdev); + + if (hdev->reset_type == HNAE3_NONE_RESET) + return; + + switch (hdev->reset_type) { + case HNAE3_FUNC_RESET: + case HNAE3_CORE_RESET: + case HNAE3_GLOBAL_RESET: + case HNAE3_IMP_RESET: + hclge_notify_client(hdev, HNAE3_DOWN_CLIENT); + + if (do_reset) + hclge_do_reset(hdev, hdev->reset_type); + else + set_bit(HCLGE_STATE_RESET_INT, &hdev->state); + + if (!hclge_reset_wait(hdev)) { + hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT); + hclge_reset_ae_dev(hdev->ae_dev); + hclge_notify_client(hdev, HNAE3_INIT_CLIENT); + clear_bit(HCLGE_STATE_RESET_INT, &hdev->state); + } + hclge_notify_client(hdev, HNAE3_UP_CLIENT); + break; + default: + dev_err(&hdev->pdev->dev, "Unsupported reset type:%d\n", + hdev->reset_type); + break; + } + hdev->reset_type = HNAE3_NONE_RESET; +} + +static void hclge_misc_irq_service_task(struct hclge_dev *hdev) +{ + hclge_reset_subtask(hdev); + hclge_enable_vector(&hdev->misc_vector, true); +} + static void hclge_service_task(struct work_struct *work) { struct hclge_dev *hdev = container_of(work, struct hclge_dev, service_task); + hclge_misc_irq_service_task(hdev); hclge_update_speed_duplex(hdev); hclge_update_link_status(hdev); hclge_update_stats_for_all(hdev); @@ -2341,6 +2684,7 @@ static int hclge_get_vector(struct hnae3_handle *handle, u16 vector_num, vport->vport_id * HCLGE_VECTOR_VF_OFFSET; hdev->vector_status[i] = vport->vport_id; + hdev->vector_irq[i] = vector->vector; vector++; alloc++; @@ -2359,15 +2703,10 @@ static int hclge_get_vector_index(struct hclge_dev *hdev, int vector) { int i; - for (i = 0; i < hdev->num_msi; i++) { - if (hdev->msix_entries) { - if (vector == hdev->msix_entries[i].vector) - return i; - } else { - if (vector == (hdev->base_msi_vector + i)) - return i; - } - } + for (i = 0; i < hdev->num_msi; i++) + if (vector == hdev->vector_irq[i]) + return i; + return -EINVAL; } @@ -2383,7 +2722,7 @@ static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle) static int hclge_get_rss_algo(struct hclge_dev *hdev) { - struct hclge_rss_config *req; + struct hclge_rss_config_cmd *req; struct hclge_desc desc; int rss_hash_algo; int ret; @@ -2397,7 +2736,7 @@ static int hclge_get_rss_algo(struct hclge_dev *hdev) return ret; } - req = (struct hclge_rss_config *)desc.data; + req = (struct hclge_rss_config_cmd *)desc.data; rss_hash_algo = (req->hash_config & HCLGE_RSS_HASH_ALGO_MASK); if (rss_hash_algo == HCLGE_RSS_HASH_ALGO_TOEPLITZ) @@ -2409,13 +2748,13 @@ static int hclge_get_rss_algo(struct hclge_dev *hdev) static int hclge_set_rss_algo_key(struct hclge_dev *hdev, const u8 hfunc, const u8 *key) { - struct hclge_rss_config *req; + struct hclge_rss_config_cmd *req; struct hclge_desc desc; int key_offset; int key_size; int ret; - req = (struct hclge_rss_config *)desc.data; + req = (struct hclge_rss_config_cmd *)desc.data; for (key_offset = 0; key_offset < 3; key_offset++) { hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG, @@ -2446,19 +2785,20 @@ static int hclge_set_rss_algo_key(struct hclge_dev *hdev, static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir) { - struct hclge_rss_indirection_table *req; + struct hclge_rss_indirection_table_cmd *req; struct hclge_desc desc; int i, j; int ret; - req = (struct hclge_rss_indirection_table *)desc.data; + req = (struct hclge_rss_indirection_table_cmd *)desc.data; for (i = 0; i < HCLGE_RSS_CFG_TBL_NUM; i++) { hclge_cmd_setup_basic_desc (&desc, HCLGE_OPC_RSS_INDIR_TABLE, false); - req->start_table_index = i * HCLGE_RSS_CFG_TBL_SIZE; - req->rss_set_bitmap = HCLGE_RSS_SET_BITMAP_MSK; + req->start_table_index = + cpu_to_le16(i * HCLGE_RSS_CFG_TBL_SIZE); + req->rss_set_bitmap = cpu_to_le16(HCLGE_RSS_SET_BITMAP_MSK); for (j = 0; j < HCLGE_RSS_CFG_TBL_SIZE; j++) req->rss_result[j] = @@ -2478,21 +2818,24 @@ static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir) static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid, u16 *tc_size, u16 *tc_offset) { - struct hclge_rss_tc_mode *req; + struct hclge_rss_tc_mode_cmd *req; struct hclge_desc desc; int ret; int i; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_TC_MODE, false); - req = (struct hclge_rss_tc_mode *)desc.data; + req = (struct hclge_rss_tc_mode_cmd *)desc.data; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - hnae_set_bit(req->rss_tc_mode[i], HCLGE_RSS_TC_VALID_B, - (tc_valid[i] & 0x1)); - hnae_set_field(req->rss_tc_mode[i], HCLGE_RSS_TC_SIZE_M, + u16 mode = 0; + + hnae_set_bit(mode, HCLGE_RSS_TC_VALID_B, (tc_valid[i] & 0x1)); + hnae_set_field(mode, HCLGE_RSS_TC_SIZE_M, HCLGE_RSS_TC_SIZE_S, tc_size[i]); - hnae_set_field(req->rss_tc_mode[i], HCLGE_RSS_TC_OFFSET_M, + hnae_set_field(mode, HCLGE_RSS_TC_OFFSET_M, HCLGE_RSS_TC_OFFSET_S, tc_offset[i]); + + req->rss_tc_mode[i] = cpu_to_le16(mode); } ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -2507,15 +2850,13 @@ static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid, static int hclge_set_rss_input_tuple(struct hclge_dev *hdev) { -#define HCLGE_RSS_INPUT_TUPLE_OTHER 0xf -#define HCLGE_RSS_INPUT_TUPLE_SCTP 0x1f - struct hclge_rss_input_tuple *req; + struct hclge_rss_input_tuple_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false); - req = (struct hclge_rss_input_tuple *)desc.data; + req = (struct hclge_rss_input_tuple_cmd *)desc.data; req->ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; req->ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; req->ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP; @@ -2589,6 +2930,161 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir, return ret; } +static u8 hclge_get_rss_hash_bits(struct ethtool_rxnfc *nfc) +{ + u8 hash_sets = nfc->data & RXH_L4_B_0_1 ? HCLGE_S_PORT_BIT : 0; + + if (nfc->data & RXH_L4_B_2_3) + hash_sets |= HCLGE_D_PORT_BIT; + else + hash_sets &= ~HCLGE_D_PORT_BIT; + + if (nfc->data & RXH_IP_SRC) + hash_sets |= HCLGE_S_IP_BIT; + else + hash_sets &= ~HCLGE_S_IP_BIT; + + if (nfc->data & RXH_IP_DST) + hash_sets |= HCLGE_D_IP_BIT; + else + hash_sets &= ~HCLGE_D_IP_BIT; + + if (nfc->flow_type == SCTP_V4_FLOW || nfc->flow_type == SCTP_V6_FLOW) + hash_sets |= HCLGE_V_TAG_BIT; + + return hash_sets; +} + +static int hclge_set_rss_tuple(struct hnae3_handle *handle, + struct ethtool_rxnfc *nfc) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + struct hclge_rss_input_tuple_cmd *req; + struct hclge_desc desc; + u8 tuple_sets; + int ret; + + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + req = (struct hclge_rss_input_tuple_cmd *)desc.data; + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "Read rss tuple fail, status = %d\n", ret); + return ret; + } + + hclge_cmd_reuse_desc(&desc, false); + + tuple_sets = hclge_get_rss_hash_bits(nfc); + switch (nfc->flow_type) { + case TCP_V4_FLOW: + req->ipv4_tcp_en = tuple_sets; + break; + case TCP_V6_FLOW: + req->ipv6_tcp_en = tuple_sets; + break; + case UDP_V4_FLOW: + req->ipv4_udp_en = tuple_sets; + break; + case UDP_V6_FLOW: + req->ipv6_udp_en = tuple_sets; + break; + case SCTP_V4_FLOW: + req->ipv4_sctp_en = tuple_sets; + break; + case SCTP_V6_FLOW: + if ((nfc->data & RXH_L4_B_0_1) || + (nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + + req->ipv6_sctp_en = tuple_sets; + break; + case IPV4_FLOW: + req->ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + break; + case IPV6_FLOW: + req->ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; + break; + default: + return -EINVAL; + } + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "Set rss tuple fail, status = %d\n", ret); + + return ret; +} + +static int hclge_get_rss_tuple(struct hnae3_handle *handle, + struct ethtool_rxnfc *nfc) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + struct hclge_rss_input_tuple_cmd *req; + struct hclge_desc desc; + u8 tuple_sets; + int ret; + + nfc->data = 0; + + req = (struct hclge_rss_input_tuple_cmd *)desc.data; + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "Read rss tuple fail, status = %d\n", ret); + return ret; + } + + switch (nfc->flow_type) { + case TCP_V4_FLOW: + tuple_sets = req->ipv4_tcp_en; + break; + case UDP_V4_FLOW: + tuple_sets = req->ipv4_udp_en; + break; + case TCP_V6_FLOW: + tuple_sets = req->ipv6_tcp_en; + break; + case UDP_V6_FLOW: + tuple_sets = req->ipv6_udp_en; + break; + case SCTP_V4_FLOW: + tuple_sets = req->ipv4_sctp_en; + break; + case SCTP_V6_FLOW: + tuple_sets = req->ipv6_sctp_en; + break; + case IPV4_FLOW: + case IPV6_FLOW: + tuple_sets = HCLGE_S_IP_BIT | HCLGE_D_IP_BIT; + break; + default: + return -EINVAL; + } + + if (!tuple_sets) + return 0; + + if (tuple_sets & HCLGE_D_PORT_BIT) + nfc->data |= RXH_L4_B_2_3; + if (tuple_sets & HCLGE_S_PORT_BIT) + nfc->data |= RXH_L4_B_0_1; + if (tuple_sets & HCLGE_D_IP_BIT) + nfc->data |= RXH_IP_DST; + if (tuple_sets & HCLGE_S_IP_BIT) + nfc->data |= RXH_IP_SRC; + + return 0; +} + static int hclge_get_tc_size(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -2597,7 +3093,7 @@ static int hclge_get_tc_size(struct hnae3_handle *handle) return hdev->rss_size_max; } -static int hclge_rss_init_hw(struct hclge_dev *hdev) +int hclge_rss_init_hw(struct hclge_dev *hdev) { const u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ; struct hclge_vport *vport = hdev->vport; @@ -2682,7 +3178,7 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id, struct hnae3_ring_chain_node *ring_chain) { struct hclge_dev *hdev = vport->back; - struct hclge_ctrl_vector_chain *req; + struct hclge_ctrl_vector_chain_cmd *req; struct hnae3_ring_chain_node *node; struct hclge_desc desc; int ret; @@ -2690,20 +3186,21 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id, hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ADD_RING_TO_VECTOR, false); - req = (struct hclge_ctrl_vector_chain *)desc.data; + req = (struct hclge_ctrl_vector_chain_cmd *)desc.data; req->int_vector_id = vector_id; i = 0; for (node = ring_chain; node; node = node->next) { - hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_TYPE_M, - HCLGE_INT_TYPE_S, + u16 type_and_id = 0; + + hnae_set_field(type_and_id, HCLGE_INT_TYPE_M, HCLGE_INT_TYPE_S, hnae_get_bit(node->flag, HNAE3_RING_TYPE_B)); - hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M, - HCLGE_TQP_ID_S, node->tqp_index); - hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M, + hnae_set_field(type_and_id, HCLGE_TQP_ID_M, HCLGE_TQP_ID_S, + node->tqp_index); + hnae_set_field(type_and_id, HCLGE_INT_GL_IDX_M, HCLGE_INT_GL_IDX_S, hnae_get_bit(node->flag, HNAE3_RING_TYPE_B)); - req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]); + req->tqp_type_and_id[i] = cpu_to_le16(type_and_id); req->vfid = vport->vport_id; if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) { @@ -2739,9 +3236,9 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id, return 0; } -int hclge_map_handle_ring_to_vector(struct hnae3_handle *handle, - int vector, - struct hnae3_ring_chain_node *ring_chain) +static int hclge_map_handle_ring_to_vector( + struct hnae3_handle *handle, int vector, + struct hnae3_ring_chain_node *ring_chain) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; @@ -2763,7 +3260,7 @@ static int hclge_unmap_ring_from_vector( { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - struct hclge_ctrl_vector_chain *req; + struct hclge_ctrl_vector_chain_cmd *req; struct hnae3_ring_chain_node *node; struct hclge_desc desc; int i, vector_id; @@ -2778,21 +3275,22 @@ static int hclge_unmap_ring_from_vector( hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_DEL_RING_TO_VECTOR, false); - req = (struct hclge_ctrl_vector_chain *)desc.data; + req = (struct hclge_ctrl_vector_chain_cmd *)desc.data; req->int_vector_id = vector_id; i = 0; for (node = ring_chain; node; node = node->next) { - hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_TYPE_M, - HCLGE_INT_TYPE_S, + u16 type_and_id = 0; + + hnae_set_field(type_and_id, HCLGE_INT_TYPE_M, HCLGE_INT_TYPE_S, hnae_get_bit(node->flag, HNAE3_RING_TYPE_B)); - hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M, - HCLGE_TQP_ID_S, node->tqp_index); - hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M, + hnae_set_field(type_and_id, HCLGE_TQP_ID_M, HCLGE_TQP_ID_S, + node->tqp_index); + hnae_set_field(type_and_id, HCLGE_INT_GL_IDX_M, HCLGE_INT_GL_IDX_S, hnae_get_bit(node->flag, HNAE3_RING_TYPE_B)); - req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]); + req->tqp_type_and_id[i] = cpu_to_le16(type_and_id); req->vfid = vport->vport_id; if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) { @@ -2830,13 +3328,13 @@ static int hclge_unmap_ring_from_vector( int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev, struct hclge_promisc_param *param) { - struct hclge_promisc_cfg *req; + struct hclge_promisc_cfg_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PROMISC_MODE, false); - req = (struct hclge_promisc_cfg *)desc.data; + req = (struct hclge_promisc_cfg_cmd *)desc.data; req->vf_id = param->vf_id; req->flag = (param->enable << HCLGE_PROMISC_EN_B); @@ -2878,29 +3376,27 @@ static void hclge_set_promisc_mode(struct hnae3_handle *handle, u32 en) static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) { struct hclge_desc desc; - struct hclge_config_mac_mode *req = - (struct hclge_config_mac_mode *)desc.data; + struct hclge_config_mac_mode_cmd *req = + (struct hclge_config_mac_mode_cmd *)desc.data; + u32 loop_en = 0; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, false); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_TX_EN_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_RX_EN_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_PAD_TX_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_PAD_RX_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_1588_TX_B, 0); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_1588_RX_B, 0); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_APP_LP_B, 0); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_LINE_LP_B, 0); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_FCS_TX_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_RX_FCS_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, - HCLGE_MAC_RX_FCS_STRIP_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, - HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, - HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable); - hnae_set_bit(req->txrx_pad_fcs_loop_en, - HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_TX_EN_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_RX_EN_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_1588_TX_B, 0); + hnae_set_bit(loop_en, HCLGE_MAC_1588_RX_B, 0); + hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0); + hnae_set_bit(loop_en, HCLGE_MAC_LINE_LP_B, 0); + hnae_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable); + hnae_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable); + req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) @@ -2908,12 +3404,65 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) "mac enable fail, ret =%d.\n", ret); } +static int hclge_set_loopback(struct hnae3_handle *handle, + enum hnae3_loop loop_mode, bool en) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_config_mac_mode_cmd *req; + struct hclge_dev *hdev = vport->back; + struct hclge_desc desc; + u32 loop_en; + int ret; + + switch (loop_mode) { + case HNAE3_MAC_INTER_LOOP_MAC: + req = (struct hclge_config_mac_mode_cmd *)&desc.data[0]; + /* 1 Read out the MAC mode config at first */ + hclge_cmd_setup_basic_desc(&desc, + HCLGE_OPC_CONFIG_MAC_MODE, + true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "mac loopback get fail, ret =%d.\n", + ret); + return ret; + } + + /* 2 Then setup the loopback flag */ + loop_en = le32_to_cpu(req->txrx_pad_fcs_loop_en); + if (en) + hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 1); + else + hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0); + + req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en); + + /* 3 Config mac work mode with loopback flag + * and its original configure parameters + */ + hclge_cmd_reuse_desc(&desc, false); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "mac loopback set fail, ret =%d.\n", ret); + break; + default: + ret = -ENOTSUPP; + dev_err(&hdev->pdev->dev, + "loop_mode %d is not supported\n", loop_mode); + break; + } + + return ret; +} + static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id, int stream_id, bool enable) { struct hclge_desc desc; - struct hclge_cfg_com_tqp_queue *req = - (struct hclge_cfg_com_tqp_queue *)desc.data; + struct hclge_cfg_com_tqp_queue_cmd *req = + (struct hclge_cfg_com_tqp_queue_cmd *)desc.data; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false); @@ -2963,7 +3512,7 @@ static int hclge_ae_start(struct hnae3_handle *handle) /* mac enable */ hclge_cfg_mac_mode(hdev, true); clear_bit(HCLGE_STATE_DOWN, &hdev->state); - (void)mod_timer(&hdev->service_timer, jiffies + HZ); + mod_timer(&hdev->service_timer, jiffies + HZ); ret = hclge_mac_start_phy(hdev); if (ret) @@ -3077,16 +3626,16 @@ static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr) word_num = vfid / 32; bit_num = vfid % 32; if (clr) - desc[1].data[word_num] &= ~(1 << bit_num); + desc[1].data[word_num] &= cpu_to_le32(~(1 << bit_num)); else - desc[1].data[word_num] |= (1 << bit_num); + desc[1].data[word_num] |= cpu_to_le32(1 << bit_num); } else { word_num = (vfid - 192) / 32; bit_num = vfid % 32; if (clr) - desc[2].data[word_num] &= ~(1 << bit_num); + desc[2].data[word_num] &= cpu_to_le32(~(1 << bit_num)); else - desc[2].data[word_num] |= (1 << bit_num); + desc[2].data[word_num] |= cpu_to_le32(1 << bit_num); } return 0; @@ -3106,7 +3655,7 @@ static bool hclge_is_all_function_id_zero(struct hclge_desc *desc) return true; } -static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry *new_req, +static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry_cmd *new_req, const u8 *addr) { const unsigned char *mac_addr = addr; @@ -3118,8 +3667,8 @@ static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry *new_req, new_req->mac_addr_lo16 = cpu_to_le16(low_val & 0xffff); } -u16 hclge_get_mac_addr_to_mta_index(struct hclge_vport *vport, - const u8 *addr) +static u16 hclge_get_mac_addr_to_mta_index(struct hclge_vport *vport, + const u8 *addr) { u16 high_val = addr[1] | (addr[0] << 8); struct hclge_dev *hdev = vport->back; @@ -3133,11 +3682,11 @@ static int hclge_set_mta_filter_mode(struct hclge_dev *hdev, enum hclge_mta_dmac_sel_type mta_mac_sel, bool enable) { - struct hclge_mta_filter_mode *req; + struct hclge_mta_filter_mode_cmd *req; struct hclge_desc desc; int ret; - req = (struct hclge_mta_filter_mode *)desc.data; + req = (struct hclge_mta_filter_mode_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_MODE_CFG, false); hnae_set_bit(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_EN_B, @@ -3160,11 +3709,11 @@ int hclge_cfg_func_mta_filter(struct hclge_dev *hdev, u8 func_id, bool enable) { - struct hclge_cfg_func_mta_filter *req; + struct hclge_cfg_func_mta_filter_cmd *req; struct hclge_desc desc; int ret; - req = (struct hclge_cfg_func_mta_filter *)desc.data; + req = (struct hclge_cfg_func_mta_filter_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_FUNC_CFG, false); hnae_set_bit(req->accept, HCLGE_CFG_FUNC_MTA_ACCEPT_B, @@ -3187,17 +3736,18 @@ static int hclge_set_mta_table_item(struct hclge_vport *vport, bool enable) { struct hclge_dev *hdev = vport->back; - struct hclge_cfg_func_mta_item *req; + struct hclge_cfg_func_mta_item_cmd *req; struct hclge_desc desc; + u16 item_idx = 0; int ret; - req = (struct hclge_cfg_func_mta_item *)desc.data; + req = (struct hclge_cfg_func_mta_item_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_TBL_ITEM_CFG, false); hnae_set_bit(req->accept, HCLGE_CFG_MTA_ITEM_ACCEPT_B, enable); - hnae_set_field(req->item_idx, HCLGE_CFG_MTA_ITEM_IDX_M, + hnae_set_field(item_idx, HCLGE_CFG_MTA_ITEM_IDX_M, HCLGE_CFG_MTA_ITEM_IDX_S, idx); - req->item_idx = cpu_to_le16(req->item_idx); + req->item_idx = cpu_to_le16(item_idx); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { @@ -3211,16 +3761,17 @@ static int hclge_set_mta_table_item(struct hclge_vport *vport, } static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport, - struct hclge_mac_vlan_tbl_entry *req) + struct hclge_mac_vlan_tbl_entry_cmd *req) { struct hclge_dev *hdev = vport->back; struct hclge_desc desc; u8 resp_code; + u16 retval; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_REMOVE, false); - memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry)); + memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { @@ -3229,19 +3780,21 @@ static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport, ret); return ret; } - resp_code = (desc.data[0] >> 8) & 0xff; + resp_code = (le32_to_cpu(desc.data[0]) >> 8) & 0xff; + retval = le16_to_cpu(desc.retval); - return hclge_get_mac_vlan_cmd_status(vport, desc.retval, resp_code, + return hclge_get_mac_vlan_cmd_status(vport, retval, resp_code, HCLGE_MAC_VLAN_REMOVE); } static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport, - struct hclge_mac_vlan_tbl_entry *req, + struct hclge_mac_vlan_tbl_entry_cmd *req, struct hclge_desc *desc, bool is_mc) { struct hclge_dev *hdev = vport->back; u8 resp_code; + u16 retval; int ret; hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_MAC_VLAN_ADD, true); @@ -3249,7 +3802,7 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport, desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); memcpy(desc[0].data, req, - sizeof(struct hclge_mac_vlan_tbl_entry)); + sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_MAC_VLAN_ADD, true); @@ -3261,7 +3814,7 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport, } else { memcpy(desc[0].data, req, - sizeof(struct hclge_mac_vlan_tbl_entry)); + sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); ret = hclge_cmd_send(&hdev->hw, desc, 1); } if (ret) { @@ -3270,19 +3823,21 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport, ret); return ret; } - resp_code = (desc[0].data[0] >> 8) & 0xff; + resp_code = (le32_to_cpu(desc[0].data[0]) >> 8) & 0xff; + retval = le16_to_cpu(desc[0].retval); - return hclge_get_mac_vlan_cmd_status(vport, desc[0].retval, resp_code, + return hclge_get_mac_vlan_cmd_status(vport, retval, resp_code, HCLGE_MAC_VLAN_LKUP); } static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport, - struct hclge_mac_vlan_tbl_entry *req, + struct hclge_mac_vlan_tbl_entry_cmd *req, struct hclge_desc *mc_desc) { struct hclge_dev *hdev = vport->back; int cfg_status; u8 resp_code; + u16 retval; int ret; if (!mc_desc) { @@ -3291,25 +3846,29 @@ static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport, hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_ADD, false); - memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry)); + memcpy(desc.data, req, + sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); ret = hclge_cmd_send(&hdev->hw, &desc, 1); - resp_code = (desc.data[0] >> 8) & 0xff; - cfg_status = hclge_get_mac_vlan_cmd_status(vport, desc.retval, + resp_code = (le32_to_cpu(desc.data[0]) >> 8) & 0xff; + retval = le16_to_cpu(desc.retval); + + cfg_status = hclge_get_mac_vlan_cmd_status(vport, retval, resp_code, HCLGE_MAC_VLAN_ADD); } else { - mc_desc[0].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR); + hclge_cmd_reuse_desc(&mc_desc[0], false); mc_desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - mc_desc[1].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR); + hclge_cmd_reuse_desc(&mc_desc[1], false); mc_desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR); + hclge_cmd_reuse_desc(&mc_desc[2], false); mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_NEXT); memcpy(mc_desc[0].data, req, - sizeof(struct hclge_mac_vlan_tbl_entry)); + sizeof(struct hclge_mac_vlan_tbl_entry_cmd)); ret = hclge_cmd_send(&hdev->hw, mc_desc, 3); - resp_code = (mc_desc[0].data[0] >> 8) & 0xff; - cfg_status = hclge_get_mac_vlan_cmd_status(vport, - mc_desc[0].retval, + resp_code = (le32_to_cpu(mc_desc[0].data[0]) >> 8) & 0xff; + retval = le16_to_cpu(mc_desc[0].retval); + + cfg_status = hclge_get_mac_vlan_cmd_status(vport, retval, resp_code, HCLGE_MAC_VLAN_ADD); } @@ -3336,8 +3895,9 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport, const unsigned char *addr) { struct hclge_dev *hdev = vport->back; - struct hclge_mac_vlan_tbl_entry req; + struct hclge_mac_vlan_tbl_entry_cmd req; enum hclge_cmd_status status; + u16 egress_port = 0; /* mac addr check */ if (is_zero_ether_addr(addr) || @@ -3357,15 +3917,15 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport, hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0); hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 0); hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0); - hnae_set_bit(req.egress_port, - HCLGE_MAC_EPORT_SW_EN_B, 0); - hnae_set_bit(req.egress_port, - HCLGE_MAC_EPORT_TYPE_B, 0); - hnae_set_field(req.egress_port, HCLGE_MAC_EPORT_VFID_M, + + hnae_set_bit(egress_port, HCLGE_MAC_EPORT_SW_EN_B, 0); + hnae_set_bit(egress_port, HCLGE_MAC_EPORT_TYPE_B, 0); + hnae_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M, HCLGE_MAC_EPORT_VFID_S, vport->vport_id); - hnae_set_field(req.egress_port, HCLGE_MAC_EPORT_PFID_M, + hnae_set_field(egress_port, HCLGE_MAC_EPORT_PFID_M, HCLGE_MAC_EPORT_PFID_S, 0); - req.egress_port = cpu_to_le16(req.egress_port); + + req.egress_port = cpu_to_le16(egress_port); hclge_prepare_mac_addr(&req, addr); @@ -3386,7 +3946,7 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport, const unsigned char *addr) { struct hclge_dev *hdev = vport->back; - struct hclge_mac_vlan_tbl_entry req; + struct hclge_mac_vlan_tbl_entry_cmd req; enum hclge_cmd_status status; /* mac addr check */ @@ -3420,7 +3980,7 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport, const unsigned char *addr) { struct hclge_dev *hdev = vport->back; - struct hclge_mac_vlan_tbl_entry req; + struct hclge_mac_vlan_tbl_entry_cmd req; struct hclge_desc desc[3]; u16 tbl_idx; int status; @@ -3471,7 +4031,7 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport, const unsigned char *addr) { struct hclge_dev *hdev = vport->back; - struct hclge_mac_vlan_tbl_entry req; + struct hclge_mac_vlan_tbl_entry_cmd req; enum hclge_cmd_status status; struct hclge_desc desc[3]; u16 tbl_idx; @@ -3554,13 +4114,13 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p) static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type, bool filter_en) { - struct hclge_vlan_filter_ctrl *req; + struct hclge_vlan_filter_ctrl_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, false); - req = (struct hclge_vlan_filter_ctrl *)desc.data; + req = (struct hclge_vlan_filter_ctrl_cmd *)desc.data; req->vlan_type = vlan_type; req->vlan_fe = filter_en; @@ -3578,8 +4138,8 @@ int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid, bool is_kill, u16 vlan, u8 qos, __be16 proto) { #define HCLGE_MAX_VF_BYTES 16 - struct hclge_vlan_filter_vf_cfg *req0; - struct hclge_vlan_filter_vf_cfg *req1; + struct hclge_vlan_filter_vf_cfg_cmd *req0; + struct hclge_vlan_filter_vf_cfg_cmd *req1; struct hclge_desc desc[2]; u8 vf_byte_val; u8 vf_byte_off; @@ -3595,10 +4155,10 @@ int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid, vf_byte_off = vfid / 8; vf_byte_val = 1 << (vfid % 8); - req0 = (struct hclge_vlan_filter_vf_cfg *)desc[0].data; - req1 = (struct hclge_vlan_filter_vf_cfg *)desc[1].data; + req0 = (struct hclge_vlan_filter_vf_cfg_cmd *)desc[0].data; + req1 = (struct hclge_vlan_filter_vf_cfg_cmd *)desc[1].data; - req0->vlan_id = vlan; + req0->vlan_id = cpu_to_le16(vlan); req0->vlan_cfg = is_kill; if (vf_byte_off < HCLGE_MAX_VF_BYTES) @@ -3639,7 +4199,7 @@ static int hclge_set_port_vlan_filter(struct hnae3_handle *handle, { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - struct hclge_vlan_filter_pf_cfg *req; + struct hclge_vlan_filter_pf_cfg_cmd *req; struct hclge_desc desc; u8 vlan_offset_byte_val; u8 vlan_offset_byte; @@ -3652,7 +4212,7 @@ static int hclge_set_port_vlan_filter(struct hnae3_handle *handle, vlan_offset_byte = (vlan_id % 160) / 8; vlan_offset_byte_val = 1 << (vlan_id % 8); - req = (struct hclge_vlan_filter_pf_cfg *)desc.data; + req = (struct hclge_vlan_filter_pf_cfg_cmd *)desc.data; req->vlan_offset = vlan_offset_160; req->vlan_cfg = is_kill; req->vlan_offset_bitmap[vlan_offset_byte] = vlan_offset_byte_val; @@ -3714,7 +4274,7 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev) static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu) { struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_config_max_frm_size *req; + struct hclge_config_max_frm_size_cmd *req; struct hclge_dev *hdev = vport->back; struct hclge_desc desc; int ret; @@ -3725,7 +4285,7 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu) hdev->mps = new_mtu; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAX_FRM_SIZE, false); - req = (struct hclge_config_max_frm_size *)desc.data; + req = (struct hclge_config_max_frm_size_cmd *)desc.data; req->max_frm_size = cpu_to_le16(new_mtu); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -3740,13 +4300,13 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu) static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id, bool enable) { - struct hclge_reset_tqp_queue *req; + struct hclge_reset_tqp_queue_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, false); - req = (struct hclge_reset_tqp_queue *)desc.data; + req = (struct hclge_reset_tqp_queue_cmd *)desc.data; req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK); hnae_set_bit(req->reset_req, HCLGE_TQP_RESET_B, enable); @@ -3762,13 +4322,13 @@ static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id, static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id) { - struct hclge_reset_tqp_queue *req; + struct hclge_reset_tqp_queue_cmd *req; struct hclge_desc desc; int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, true); - req = (struct hclge_reset_tqp_queue *)desc.data; + req = (struct hclge_reset_tqp_queue_cmd *)desc.data; req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -3981,7 +4541,7 @@ static int hclge_init_client_instance(struct hnae3_client *client, vport->roce.client = client; } - if (hdev->roce_client) { + if (hdev->roce_client && hdev->nic_client) { ret = hclge_init_roce_base_info(vport); if (ret) goto err; @@ -4007,13 +4567,19 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { vport = &hdev->vport[i]; - if (hdev->roce_client) + if (hdev->roce_client) { hdev->roce_client->ops->uninit_instance(&vport->roce, 0); + hdev->roce_client = NULL; + vport->roce.client = NULL; + } if (client->type == HNAE3_CLIENT_ROCE) return; - if (client->ops->uninit_instance) + if (client->ops->uninit_instance) { client->ops->uninit_instance(&vport->nic, 0); + hdev->nic_client = NULL; + vport->nic.client = NULL; + } } } @@ -4056,6 +4622,8 @@ static int hclge_pci_init(struct hclge_dev *hdev) goto err_clr_master; } + hdev->num_req_vfs = pci_sriov_get_totalvfs(pdev); + return 0; err_clr_master: pci_clear_master(pdev); @@ -4072,14 +4640,7 @@ static void hclge_pci_uninit(struct hclge_dev *hdev) { struct pci_dev *pdev = hdev->pdev; - if (hdev->flag & HCLGE_FLAG_USE_MSIX) { - pci_disable_msix(pdev); - devm_kfree(&pdev->dev, hdev->msix_entries); - hdev->msix_entries = NULL; - } else { - pci_disable_msi(pdev); - } - + pci_free_irq_vectors(pdev); pci_clear_master(pdev); pci_release_mem_regions(pdev); pci_disable_device(pdev); @@ -4097,9 +4658,9 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_hclge_dev; } - hdev->flag |= HCLGE_FLAG_USE_MSIX; hdev->pdev = pdev; hdev->ae_dev = ae_dev; + hdev->reset_type = HNAE3_NONE_RESET; ae_dev->priv = hdev; ret = hclge_pci_init(hdev); @@ -4108,7 +4669,14 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_pci_init; } - /* Command queue initialize */ + /* Firmware command queue initialize */ + ret = hclge_cmd_queue_init(hdev); + if (ret) { + dev_err(&pdev->dev, "Cmd queue init failed, ret = %d.\n", ret); + return ret; + } + + /* Firmware command initialize */ ret = hclge_cmd_init(hdev); if (ret) goto err_cmd_init; @@ -4126,12 +4694,17 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } - if (hdev->flag & HCLGE_FLAG_USE_MSIX) - ret = hclge_init_msix(hdev); - else - ret = hclge_init_msi(hdev); + ret = hclge_init_msi(hdev); if (ret) { - dev_err(&pdev->dev, "Init msix/msi error, ret = %d.\n", ret); + dev_err(&pdev->dev, "Init MSI/MSI-X error, ret = %d.\n", ret); + return ret; + } + + ret = hclge_misc_irq_init(hdev); + if (ret) { + dev_err(&pdev->dev, + "Misc IRQ(vector0) init error, ret = %d.\n", + ret); return ret; } @@ -4147,6 +4720,19 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } + ret = hclge_map_tqp(hdev); + if (ret) { + dev_err(&pdev->dev, "Map tqp error, ret = %d.\n", ret); + return ret; + } + + ret = hclge_mac_mdio_config(hdev); + if (ret) { + dev_warn(&hdev->pdev->dev, + "mdio config fail ret=%d\n", ret); + return ret; + } + ret = hclge_mac_init(hdev); if (ret) { dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret); @@ -4182,10 +4768,14 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } - setup_timer(&hdev->service_timer, hclge_service_timer, - (unsigned long)hdev); + hclge_dcb_ops_set(hdev); + + timer_setup(&hdev->service_timer, hclge_service_timer, 0); INIT_WORK(&hdev->service_task, hclge_service_task); + /* Enable MISC vector(vector0) */ + hclge_enable_vector(&hdev->misc_vector, true); + set_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state); set_bit(HCLGE_STATE_DOWN, &hdev->state); @@ -4200,6 +4790,91 @@ err_hclge_dev: return ret; } +static void hclge_stats_clear(struct hclge_dev *hdev) +{ + memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats)); +} + +static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) +{ + struct hclge_dev *hdev = ae_dev->priv; + struct pci_dev *pdev = ae_dev->pdev; + int ret; + + set_bit(HCLGE_STATE_DOWN, &hdev->state); + + hclge_stats_clear(hdev); + + ret = hclge_cmd_init(hdev); + if (ret) { + dev_err(&pdev->dev, "Cmd queue init failed\n"); + return ret; + } + + ret = hclge_get_cap(hdev); + if (ret) { + dev_err(&pdev->dev, "get hw capability error, ret = %d.\n", + ret); + return ret; + } + + ret = hclge_configure(hdev); + if (ret) { + dev_err(&pdev->dev, "Configure dev error, ret = %d.\n", ret); + return ret; + } + + ret = hclge_map_tqp(hdev); + if (ret) { + dev_err(&pdev->dev, "Map tqp error, ret = %d.\n", ret); + return ret; + } + + ret = hclge_mac_init(hdev); + if (ret) { + dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret); + return ret; + } + + ret = hclge_buffer_alloc(hdev); + if (ret) { + dev_err(&pdev->dev, "Buffer allocate fail, ret =%d\n", ret); + return ret; + } + + ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX); + if (ret) { + dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret); + return ret; + } + + ret = hclge_init_vlan_config(hdev); + if (ret) { + dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret); + return ret; + } + + ret = hclge_tm_schd_init(hdev); + if (ret) { + dev_err(&pdev->dev, "tm schd init fail, ret =%d\n", ret); + return ret; + } + + ret = hclge_rss_init_hw(hdev); + if (ret) { + dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret); + return ret; + } + + /* Enable MISC vector(vector0) */ + hclge_enable_vector(&hdev->misc_vector, true); + + dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n", + HCLGE_DRIVER_NAME); + + return 0; +} + static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; @@ -4210,7 +4885,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) if (IS_ENABLED(CONFIG_PCI_IOV)) hclge_disable_sriov(hdev); - if (hdev->service_timer.data) + if (hdev->service_timer.function) del_timer_sync(&hdev->service_timer); if (hdev->service_task.func) cancel_work_sync(&hdev->service_task); @@ -4218,6 +4893,9 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) if (mac->phydev) mdiobus_unregister(mac->mdio_bus); + /* Disable MISC vector(vector0) */ + hclge_enable_vector(&hdev->misc_vector, false); + hclge_free_vector(hdev, 0); hclge_destroy_cmd_queue(&hdev->hw); hclge_pci_uninit(hdev); ae_dev->priv = NULL; @@ -4232,6 +4910,7 @@ static const struct hnae3_ae_ops hclge_ops = { .unmap_ring_from_vector = hclge_unmap_ring_from_vector, .get_vector = hclge_get_vector, .set_promisc_mode = hclge_set_promisc_mode, + .set_loopback = hclge_set_loopback, .start = hclge_ae_start, .stop = hclge_ae_stop, .get_status = hclge_get_status, @@ -4243,6 +4922,8 @@ static const struct hnae3_ae_ops hclge_ops = { .get_rss_indir_size = hclge_get_rss_indir_size, .get_rss = hclge_get_rss, .set_rss = hclge_set_rss, + .set_rss_tuple = hclge_set_rss_tuple, + .get_rss_tuple = hclge_get_rss_tuple, .get_tc_size = hclge_get_tc_size, .get_mac_addr = hclge_get_mac_addr, .set_mac_addr = hclge_set_mac_addr, @@ -4263,6 +4944,7 @@ static const struct hnae3_ae_ops hclge_ops = { .get_mdix_mode = hclge_get_mdix_mode, .set_vlan_filter = hclge_set_port_vlan_filter, .set_vf_vlan_filter = hclge_set_vf_vlan_filter, + .reset_event = hclge_reset_event, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 9fcfd9395424..7027814ea5d7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -27,12 +27,13 @@ (HCLGE_PF_CFG_BLOCK_SIZE / HCLGE_CFG_RD_LEN_BYTES) #define HCLGE_VECTOR_REG_BASE 0x20000 +#define HCLGE_MISC_VECTOR_REG_BASE 0x20400 #define HCLGE_VECTOR_REG_OFFSET 0x4 #define HCLGE_VECTOR_VF_OFFSET 0x100000 #define HCLGE_RSS_IND_TBL_SIZE 512 -#define HCLGE_RSS_SET_BITMAP_MSK 0xffff +#define HCLGE_RSS_SET_BITMAP_MSK GENMASK(15, 0) #define HCLGE_RSS_KEY_SIZE 40 #define HCLGE_RSS_HASH_ALGO_TOEPLITZ 0 #define HCLGE_RSS_HASH_ALGO_SIMPLE 1 @@ -41,6 +42,14 @@ #define HCLGE_RSS_CFG_TBL_NUM \ (HCLGE_RSS_IND_TBL_SIZE / HCLGE_RSS_CFG_TBL_SIZE) +#define HCLGE_RSS_INPUT_TUPLE_OTHER GENMASK(3, 0) +#define HCLGE_RSS_INPUT_TUPLE_SCTP GENMASK(4, 0) +#define HCLGE_D_PORT_BIT BIT(0) +#define HCLGE_S_PORT_BIT BIT(1) +#define HCLGE_D_IP_BIT BIT(2) +#define HCLGE_S_IP_BIT BIT(3) +#define HCLGE_V_TAG_BIT BIT(4) + #define HCLGE_RSS_TC_SIZE_0 1 #define HCLGE_RSS_TC_SIZE_1 2 #define HCLGE_RSS_TC_SIZE_2 4 @@ -65,11 +74,24 @@ #define HCLGE_PHY_CSS_REG 17 #define HCLGE_PHY_MDIX_CTRL_S (5) -#define HCLGE_PHY_MDIX_CTRL_M (3 << HCLGE_PHY_MDIX_CTRL_S) +#define HCLGE_PHY_MDIX_CTRL_M GENMASK(6, 5) #define HCLGE_PHY_MDIX_STATUS_B (6) #define HCLGE_PHY_SPEED_DUP_RESOLVE_B (11) +/* Reset related Registers */ +#define HCLGE_MISC_RESET_STS_REG 0x20700 +#define HCLGE_GLOBAL_RESET_REG 0x20A00 +#define HCLGE_GLOBAL_RESET_BIT 0x0 +#define HCLGE_CORE_RESET_BIT 0x1 +#define HCLGE_FUN_RST_ING 0x20C00 +#define HCLGE_FUN_RST_ING_B 0 + +/* Vector0 register bits define */ +#define HCLGE_VECTOR0_GLOBALRESET_INT_B 5 +#define HCLGE_VECTOR0_CORERESET_INT_B 6 +#define HCLGE_VECTOR0_IMPRESET_INT_B 7 + enum HCLGE_DEV_STATE { HCLGE_STATE_REINITING, HCLGE_STATE_DOWN, @@ -79,6 +101,7 @@ enum HCLGE_DEV_STATE { HCLGE_STATE_SERVICE_SCHED, HCLGE_STATE_MBX_HANDLING, HCLGE_STATE_MBX_IRQ, + HCLGE_STATE_RESET_INT, HCLGE_STATE_MAX }; @@ -392,17 +415,16 @@ struct hclge_dev { struct pci_dev *pdev; struct hnae3_ae_dev *ae_dev; struct hclge_hw hw; + struct hclge_misc_vector misc_vector; struct hclge_hw_stats hw_stats; unsigned long state; + enum hnae3_reset_type reset_type; u32 fw_version; u16 num_vmdq_vport; /* Num vmdq vport this PF has set up */ u16 num_tqps; /* Num task queue pairs of this PF */ u16 num_req_vfs; /* Num VFs requested for this PF */ - u16 num_roce_msix; /* Num of roce vectors for this PF */ - int roce_base_vector; - /* Base task tqp physical id of this PF */ u16 base_tqp_pid; u16 alloc_rss_size; /* Allocated RSS task queue */ @@ -421,16 +443,21 @@ struct hclge_dev { #define HCLGE_FLAG_TC_BASE_SCH_MODE 1 #define HCLGE_FLAG_VNET_BASE_SCH_MODE 2 u8 tx_sch_mode; + u8 tc_max; + u8 pfc_max; u8 default_up; + u8 dcbx_cap; struct hclge_tm_info tm_info; u16 num_msi; u16 num_msi_left; u16 num_msi_used; u32 base_msi_vector; - struct msix_entry *msix_entries; u16 *vector_status; + int *vector_irq; + u16 num_roce_msi; /* Num of roce vectors for this PF */ + int roce_base_vector; u16 pending_udp_bitmap; @@ -454,17 +481,14 @@ struct hclge_dev { struct hnae3_client *nic_client; struct hnae3_client *roce_client; -#define HCLGE_FLAG_USE_MSI 0x00000001 -#define HCLGE_FLAG_USE_MSIX 0x00000002 -#define HCLGE_FLAG_MAIN 0x00000004 -#define HCLGE_FLAG_DCB_CAPABLE 0x00000008 -#define HCLGE_FLAG_DCB_ENABLE 0x00000010 +#define HCLGE_FLAG_MAIN BIT(0) +#define HCLGE_FLAG_DCB_CAPABLE BIT(1) +#define HCLGE_FLAG_DCB_ENABLE BIT(2) +#define HCLGE_FLAG_MQPRIO_ENABLE BIT(3) u32 flag; u32 pkt_buf_size; /* Total pf buf size for tx/rx */ u32 mps; /* Max packet size */ - struct hclge_priv_buf *priv_buf; - struct hclge_shared_buf s_buf; enum hclge_mta_dmac_sel_type mta_mac_sel_type; bool enable_mta; /* Mutilcast filter enable */ @@ -517,4 +541,7 @@ static inline int hclge_get_queue_id(struct hnae3_queue *queue) int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex); int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid, bool is_kill, u16 vlan, u8 qos, __be16 proto); + +int hclge_buffer_alloc(struct hclge_dev *hdev); +int hclge_rss_init_hw(struct hclge_dev *hdev); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index f32d719c4f77..7069e9408d7d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -14,6 +14,13 @@ #include "hclge_main.h" #include "hclge_mdio.h" +#define HCLGE_PHY_SUPPORTED_FEATURES (SUPPORTED_Autoneg | \ + SUPPORTED_TP | \ + SUPPORTED_Pause | \ + PHY_10BT_FEATURES | \ + PHY_100BT_FEATURES | \ + PHY_1000BT_FEATURES) + enum hclge_mdio_c22_op_seq { HCLGE_MDIO_C22_WRITE = 1, HCLGE_MDIO_C22_READ = 2 @@ -195,6 +202,9 @@ int hclge_mac_start_phy(struct hclge_dev *hdev) return ret; } + phydev->supported &= HCLGE_PHY_SUPPORTED_FEATURES; + phydev->advertising = phydev->supported; + phy_start(phydev); return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 73a75d7cc551..7bfa2e5497cb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -124,6 +124,20 @@ static int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx) return hclge_cmd_send(&hdev->hw, &desc, 1); } +static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap, + u8 pfc_bitmap) +{ + struct hclge_desc desc; + struct hclge_pfc_en_cmd *pfc = (struct hclge_pfc_en_cmd *)&desc.data; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PFC_PAUSE_EN, false); + + pfc->tx_rx_en_bitmap = tx_rx_bitmap; + pfc->pri_en_bitmap = pfc_bitmap; + + return hclge_cmd_send(&hdev->hw, &desc, 1); +} + static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id) { u8 tc; @@ -269,6 +283,7 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev, struct hclge_pg_shapping_cmd *shap_cfg_cmd; enum hclge_opcode_type opcode; struct hclge_desc desc; + u32 shapping_para = 0; opcode = bucket ? HCLGE_OPC_TM_PG_P_SHAPPING : HCLGE_OPC_TM_PG_C_SHAPPING; @@ -278,11 +293,41 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev, shap_cfg_cmd->pg_id = pg_id; - hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b); - hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u); - hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s); - hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b); - hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s); + hclge_tm_set_field(shapping_para, IR_B, ir_b); + hclge_tm_set_field(shapping_para, IR_U, ir_u); + hclge_tm_set_field(shapping_para, IR_S, ir_s); + hclge_tm_set_field(shapping_para, BS_B, bs_b); + hclge_tm_set_field(shapping_para, BS_S, bs_s); + + shap_cfg_cmd->pg_shapping_para = cpu_to_le32(shapping_para); + + return hclge_cmd_send(&hdev->hw, &desc, 1); +} + +static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev) +{ + struct hclge_port_shapping_cmd *shap_cfg_cmd; + struct hclge_desc desc; + u32 shapping_para = 0; + u8 ir_u, ir_b, ir_s; + int ret; + + ret = hclge_shaper_para_calc(HCLGE_ETHER_MAX_RATE, + HCLGE_SHAPER_LVL_PORT, + &ir_b, &ir_u, &ir_s); + if (ret) + return ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PORT_SHAPPING, false); + shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data; + + hclge_tm_set_field(shapping_para, IR_B, ir_b); + hclge_tm_set_field(shapping_para, IR_U, ir_u); + hclge_tm_set_field(shapping_para, IR_S, ir_s); + hclge_tm_set_field(shapping_para, BS_B, HCLGE_SHAPER_BS_U_DEF); + hclge_tm_set_field(shapping_para, BS_S, HCLGE_SHAPER_BS_S_DEF); + + shap_cfg_cmd->port_shapping_para = cpu_to_le32(shapping_para); return hclge_cmd_send(&hdev->hw, &desc, 1); } @@ -295,6 +340,7 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev, struct hclge_pri_shapping_cmd *shap_cfg_cmd; enum hclge_opcode_type opcode; struct hclge_desc desc; + u32 shapping_para = 0; opcode = bucket ? HCLGE_OPC_TM_PRI_P_SHAPPING : HCLGE_OPC_TM_PRI_C_SHAPPING; @@ -305,11 +351,13 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev, shap_cfg_cmd->pri_id = pri_id; - hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b); - hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u); - hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s); - hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b); - hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s); + hclge_tm_set_field(shapping_para, IR_B, ir_b); + hclge_tm_set_field(shapping_para, IR_U, ir_u); + hclge_tm_set_field(shapping_para, IR_S, ir_s); + hclge_tm_set_field(shapping_para, BS_B, bs_b); + hclge_tm_set_field(shapping_para, BS_S, bs_s); + + shap_cfg_cmd->pri_shapping_para = cpu_to_le32(shapping_para); return hclge_cmd_send(&hdev->hw, &desc, 1); } @@ -346,13 +394,13 @@ static int hclge_tm_pri_schd_mode_cfg(struct hclge_dev *hdev, u8 pri_id) return hclge_cmd_send(&hdev->hw, &desc, 1); } -static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id) +static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id, u8 mode) { struct hclge_desc desc; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_SCH_MODE_CFG, false); - if (hdev->tm_info.tc_info[qs_id].tc_sch_mode == HCLGE_SCH_MODE_DWRR) + if (mode == HCLGE_SCH_MODE_DWRR) desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK); else desc.data[1] = 0; @@ -386,7 +434,6 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) struct hclge_dev *hdev = vport->back; u8 i; - kinfo = &vport->nic.kinfo; vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit; kinfo->num_tc = min_t(u16, kinfo->num_tqps, hdev->tm_info.num_tc); @@ -444,7 +491,11 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev) hdev->tm_info.prio_tc[i] = (i >= hdev->tm_info.num_tc) ? 0 : i; - hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; + /* DCB is enabled if we have more than 1 TC */ + if (hdev->tm_info.num_tc > 1) + hdev->flag |= HCLGE_FLAG_DCB_ENABLE; + else + hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; } static void hclge_tm_pg_info_init(struct hclge_dev *hdev) @@ -470,6 +521,24 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev) } } +static void hclge_pfc_info_init(struct hclge_dev *hdev) +{ + if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) { + if (hdev->fc_mode_last_time == HCLGE_FC_PFC) + dev_warn(&hdev->pdev->dev, + "DCB is disable, but last mode is FC_PFC\n"); + + hdev->tm_info.fc_mode = hdev->fc_mode_last_time; + } else if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) { + /* fc_mode_last_time record the last fc_mode when + * DCB is enabled, so that fc_mode can be set to + * the correct value when DCB is disabled. + */ + hdev->fc_mode_last_time = hdev->tm_info.fc_mode; + hdev->tm_info.fc_mode = HCLGE_FC_PFC; + } +} + static int hclge_tm_schd_info_init(struct hclge_dev *hdev) { if ((hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE) && @@ -482,8 +551,7 @@ static int hclge_tm_schd_info_init(struct hclge_dev *hdev) hclge_tm_vport_info_update(hdev); - hdev->tm_info.fc_mode = HCLGE_FC_NONE; - hdev->fc_mode_last_time = hdev->tm_info.fc_mode; + hclge_pfc_info_init(hdev); return 0; } @@ -596,17 +664,18 @@ static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev) { struct hclge_vport *vport = hdev->vport; int ret; - u32 i; + u32 i, k; if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) { /* Cfg qs -> pri mapping, one by one mapping */ - for (i = 0; i < hdev->tm_info.num_tc; i++) { - ret = hclge_tm_qs_to_pri_map_cfg(hdev, i, i); - if (ret) - return ret; - } + for (k = 0; k < hdev->num_alloc_vport; k++) + for (i = 0; i < hdev->tm_info.num_tc; i++) { + ret = hclge_tm_qs_to_pri_map_cfg( + hdev, vport[k].qs_offset + i, i); + if (ret) + return ret; + } } else if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE) { - int k; /* Cfg qs -> pri mapping, qs = tc, pri = vf, 8 qs -> 1 pri */ for (k = 0; k < hdev->num_alloc_vport; k++) for (i = 0; i < HNAE3_MAX_TC; i++) { @@ -696,13 +765,11 @@ static int hclge_tm_pri_vnet_base_shaper_qs_cfg(struct hclge_vport *vport) { struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; struct hclge_dev *hdev = vport->back; - struct hnae3_tc_info *v_tc_info; u8 ir_u, ir_b, ir_s; u32 i; int ret; for (i = 0; i < kinfo->num_tc; i++) { - v_tc_info = &kinfo->tc_info[i]; ret = hclge_shaper_para_calc( hdev->tm_info.tc_info[i].bw_limit, HCLGE_SHAPER_LVL_QSET, @@ -755,10 +822,11 @@ static int hclge_tm_pri_shaper_cfg(struct hclge_dev *hdev) static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev) { + struct hclge_vport *vport = hdev->vport; struct hclge_pg_info *pg_info; u8 dwrr; int ret; - u32 i; + u32 i, k; for (i = 0; i < hdev->tm_info.num_tc; i++) { pg_info = @@ -769,9 +837,13 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev) if (ret) return ret; - ret = hclge_tm_qs_weight_cfg(hdev, i, dwrr); - if (ret) - return ret; + for (k = 0; k < hdev->num_alloc_vport; k++) { + ret = hclge_tm_qs_weight_cfg( + hdev, vport[k].qs_offset + i, + vport[k].dwrr); + if (ret) + return ret; + } } return 0; @@ -835,10 +907,14 @@ static int hclge_tm_pri_dwrr_cfg(struct hclge_dev *hdev) return 0; } -static int hclge_tm_map_cfg(struct hclge_dev *hdev) +int hclge_tm_map_cfg(struct hclge_dev *hdev) { int ret; + ret = hclge_up_to_tc_map(hdev); + if (ret) + return ret; + ret = hclge_tm_pg_to_pri_map(hdev); if (ret) return ret; @@ -850,6 +926,10 @@ static int hclge_tm_shaper_cfg(struct hclge_dev *hdev) { int ret; + ret = hclge_tm_port_shaper_cfg(hdev); + if (ret) + return ret; + ret = hclge_tm_pg_shaper_cfg(hdev); if (ret) return ret; @@ -898,7 +978,10 @@ static int hclge_tm_schd_mode_vnet_base_cfg(struct hclge_vport *vport) return ret; for (i = 0; i < kinfo->num_tc; i++) { - ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i); + u8 sch_mode = hdev->tm_info.tc_info[i].tc_sch_mode; + + ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i, + sch_mode); if (ret) return ret; } @@ -910,7 +993,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev) { struct hclge_vport *vport = hdev->vport; int ret; - u8 i; + u8 i, k; if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) { for (i = 0; i < hdev->tm_info.num_tc; i++) { @@ -918,9 +1001,13 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev) if (ret) return ret; - ret = hclge_tm_qs_schd_mode_cfg(hdev, i); - if (ret) - return ret; + for (k = 0; k < hdev->num_alloc_vport; k++) { + ret = hclge_tm_qs_schd_mode_cfg( + hdev, vport[k].qs_offset + i, + HCLGE_SCH_MODE_DWRR); + if (ret) + return ret; + } } } else { for (i = 0; i < hdev->num_alloc_vport; i++) { @@ -935,7 +1022,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev) return 0; } -static int hclge_tm_schd_mode_hw(struct hclge_dev *hdev) +int hclge_tm_schd_mode_hw(struct hclge_dev *hdev) { int ret; @@ -969,27 +1056,109 @@ static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev) return hclge_tm_schd_mode_hw(hdev); } +static int hclge_pfc_setup_hw(struct hclge_dev *hdev) +{ + u8 enable_bitmap = 0; + + if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) + enable_bitmap = HCLGE_TX_MAC_PAUSE_EN_MSK | + HCLGE_RX_MAC_PAUSE_EN_MSK; + + return hclge_pfc_pause_en_cfg(hdev, enable_bitmap, + hdev->tm_info.hw_pfc_map); +} + +static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) +{ + bool tx_en, rx_en; + + switch (hdev->tm_info.fc_mode) { + case HCLGE_FC_NONE: + tx_en = false; + rx_en = false; + break; + case HCLGE_FC_RX_PAUSE: + tx_en = false; + rx_en = true; + break; + case HCLGE_FC_TX_PAUSE: + tx_en = true; + rx_en = false; + break; + case HCLGE_FC_FULL: + tx_en = true; + rx_en = true; + break; + default: + tx_en = true; + rx_en = true; + } + + return hclge_mac_pause_en_cfg(hdev, tx_en, rx_en); +} + int hclge_pause_setup_hw(struct hclge_dev *hdev) { - bool en = hdev->tm_info.fc_mode != HCLGE_FC_PFC; int ret; u8 i; - ret = hclge_mac_pause_en_cfg(hdev, en, en); - if (ret) - return ret; + if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) + return hclge_mac_pause_setup_hw(hdev); - /* Only DCB-supported dev supports qset back pressure setting */ + /* Only DCB-supported dev supports qset back pressure and pfc cmd */ if (!hnae3_dev_dcb_supported(hdev)) return 0; + /* When MAC is GE Mode, hdev does not support pfc setting */ + ret = hclge_pfc_setup_hw(hdev); + if (ret) + dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret); + for (i = 0; i < hdev->tm_info.num_tc; i++) { ret = hclge_tm_qs_bp_cfg(hdev, i); if (ret) return ret; } - return hclge_up_to_tc_map(hdev); + return 0; +} + +int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc) +{ + struct hclge_vport *vport = hdev->vport; + struct hnae3_knic_private_info *kinfo; + u32 i, k; + + for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) { + if (prio_tc[i] >= hdev->tm_info.num_tc) + return -EINVAL; + hdev->tm_info.prio_tc[i] = prio_tc[i]; + + for (k = 0; k < hdev->num_alloc_vport; k++) { + kinfo = &vport[k].nic.kinfo; + kinfo->prio_tc[i] = prio_tc[i]; + } + } + return 0; +} + +void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc) +{ + u8 i, bit_map = 0; + + hdev->tm_info.num_tc = num_tc; + + for (i = 0; i < hdev->tm_info.num_tc; i++) + bit_map |= BIT(i); + + if (!bit_map) { + bit_map = 1; + hdev->tm_info.num_tc = 1; + } + + hdev->hw_tc_map = bit_map; + + hclge_tm_schd_info_init(hdev); } int hclge_tm_init_hw(struct hclge_dev *hdev) @@ -1013,8 +1182,13 @@ int hclge_tm_init_hw(struct hclge_dev *hdev) int hclge_tm_schd_init(struct hclge_dev *hdev) { - int ret = hclge_tm_schd_info_init(hdev); + int ret; + + /* fc_mode is HCLGE_FC_FULL on reset */ + hdev->tm_info.fc_mode = HCLGE_FC_FULL; + hdev->fc_mode_last_time = hdev->tm_info.fc_mode; + ret = hclge_tm_schd_info_init(hdev); if (ret) return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 85158b0d73fe..bf59961918ab 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -94,6 +94,15 @@ struct hclge_bp_to_qs_map_cmd { u32 rsvd1; }; +struct hclge_pfc_en_cmd { + u8 tx_rx_en_bitmap; + u8 pri_en_bitmap; +}; + +struct hclge_port_shapping_cmd { + __le32 port_shapping_para; +}; + #define hclge_tm_set_field(dest, string, val) \ hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \ (HCLGE_TM_SHAP_##string##_LSH), val) @@ -103,4 +112,10 @@ struct hclge_bp_to_qs_map_cmd { int hclge_tm_schd_init(struct hclge_dev *hdev); int hclge_pause_setup_hw(struct hclge_dev *hdev); +int hclge_tm_schd_mode_hw(struct hclge_dev *hdev); +int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc); +void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc); +int hclge_tm_dwrr_cfg(struct hclge_dev *hdev); +int hclge_tm_map_cfg(struct hclge_dev *hdev); +int hclge_tm_init_hw(struct hclge_dev *hdev); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c new file mode 100644 index 000000000000..925619a7c50a --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016-2017 Hisilicon Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "hnae3.h" +#include "hns3_enet.h" + +static +int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + + if (h->kinfo.dcb_ops->ieee_getets) + return h->kinfo.dcb_ops->ieee_getets(h, ets); + + return -EOPNOTSUPP; +} + +static +int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + + if (h->kinfo.dcb_ops->ieee_setets) + return h->kinfo.dcb_ops->ieee_setets(h, ets); + + return -EOPNOTSUPP; +} + +static +int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + + if (h->kinfo.dcb_ops->ieee_getpfc) + return h->kinfo.dcb_ops->ieee_getpfc(h, pfc); + + return -EOPNOTSUPP; +} + +static +int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + + if (h->kinfo.dcb_ops->ieee_setpfc) + return h->kinfo.dcb_ops->ieee_setpfc(h, pfc); + + return -EOPNOTSUPP; +} + +/* DCBX configuration */ +static u8 hns3_dcbnl_getdcbx(struct net_device *ndev) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + + if (h->kinfo.dcb_ops->getdcbx) + return h->kinfo.dcb_ops->getdcbx(h); + + return 0; +} + +/* return 0 if successful, otherwise fail */ +static u8 hns3_dcbnl_setdcbx(struct net_device *ndev, u8 mode) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + + if (h->kinfo.dcb_ops->setdcbx) + return h->kinfo.dcb_ops->setdcbx(h, mode); + + return 1; +} + +static const struct dcbnl_rtnl_ops hns3_dcbnl_ops = { + .ieee_getets = hns3_dcbnl_ieee_getets, + .ieee_setets = hns3_dcbnl_ieee_setets, + .ieee_getpfc = hns3_dcbnl_ieee_getpfc, + .ieee_setpfc = hns3_dcbnl_ieee_setpfc, + .getdcbx = hns3_dcbnl_getdcbx, + .setdcbx = hns3_dcbnl_setdcbx, +}; + +/* hclge_dcbnl_setup - DCBNL setup + * @handle: the corresponding vport handle + * Set up DCBNL + */ +void hns3_dcbnl_setup(struct hnae3_handle *handle) +{ + struct net_device *dev = handle->kinfo.netdev; + + if (!handle->kinfo.dcb_ops) + return; + + dev->dcbnl_ops = &hns3_dcbnl_ops; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 35369e1c8036..59415090ff0f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -19,12 +19,13 @@ #include <linux/sctp.h> #include <linux/vermagic.h> #include <net/gre.h> +#include <net/pkt_cls.h> #include <net/vxlan.h> #include "hnae3.h" #include "hns3_enet.h" -const char hns3_driver_name[] = "hns3"; +static const char hns3_driver_name[] = "hns3"; const char hns3_driver_version[] = VERMAGIC_STRING; static const char hns3_driver_string[] = "Hisilicon Ethernet Network Driver for Hip08 Family"; @@ -196,6 +197,31 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector) tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW; } +static int hns3_nic_set_real_num_queue(struct net_device *netdev) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + struct hnae3_knic_private_info *kinfo = &h->kinfo; + unsigned int queue_size = kinfo->rss_size * kinfo->num_tc; + int ret; + + ret = netif_set_real_num_tx_queues(netdev, queue_size); + if (ret) { + netdev_err(netdev, + "netif_set_real_num_tx_queues fail, ret=%d!\n", + ret); + return ret; + } + + ret = netif_set_real_num_rx_queues(netdev, queue_size); + if (ret) { + netdev_err(netdev, + "netif_set_real_num_rx_queues fail, ret=%d!\n", ret); + return ret; + } + + return 0; +} + static int hns3_nic_net_up(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); @@ -233,25 +259,13 @@ out_start_err: static int hns3_nic_net_open(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; int ret; netif_carrier_off(netdev); - ret = netif_set_real_num_tx_queues(netdev, h->kinfo.num_tqps); - if (ret) { - netdev_err(netdev, - "netif_set_real_num_tx_queues fail, ret=%d!\n", - ret); + ret = hns3_nic_set_real_num_queue(netdev); + if (ret) return ret; - } - - ret = netif_set_real_num_rx_queues(netdev, h->kinfo.num_tqps); - if (ret) { - netdev_err(netdev, - "netif_set_real_num_rx_queues fail, ret=%d!\n", ret); - return ret; - } ret = hns3_nic_net_up(netdev); if (ret) { @@ -260,6 +274,7 @@ static int hns3_nic_net_open(struct net_device *netdev) return ret; } + priv->last_reset_time = jiffies; return 0; } @@ -292,24 +307,10 @@ static int hns3_nic_net_stop(struct net_device *netdev) return 0; } -void hns3_set_multicast_list(struct net_device *netdev) -{ - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; - struct netdev_hw_addr *ha = NULL; - - if (h->ae_algo->ops->set_mc_addr) { - netdev_for_each_mc_addr(ha, netdev) - if (h->ae_algo->ops->set_mc_addr(h, ha->addr)) - netdev_err(netdev, "set multicast fail\n"); - } -} - static int hns3_nic_uc_sync(struct net_device *netdev, const unsigned char *addr) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo->ops->add_uc_addr) return h->ae_algo->ops->add_uc_addr(h, addr); @@ -320,8 +321,7 @@ static int hns3_nic_uc_sync(struct net_device *netdev, static int hns3_nic_uc_unsync(struct net_device *netdev, const unsigned char *addr) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo->ops->rm_uc_addr) return h->ae_algo->ops->rm_uc_addr(h, addr); @@ -332,8 +332,7 @@ static int hns3_nic_uc_unsync(struct net_device *netdev, static int hns3_nic_mc_sync(struct net_device *netdev, const unsigned char *addr) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo->ops->add_mc_addr) return h->ae_algo->ops->add_mc_addr(h, addr); @@ -344,8 +343,7 @@ static int hns3_nic_mc_sync(struct net_device *netdev, static int hns3_nic_mc_unsync(struct net_device *netdev, const unsigned char *addr) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo->ops->rm_mc_addr) return h->ae_algo->ops->rm_mc_addr(h, addr); @@ -353,10 +351,9 @@ static int hns3_nic_mc_unsync(struct net_device *netdev, return 0; } -void hns3_nic_set_rx_mode(struct net_device *netdev) +static void hns3_nic_set_rx_mode(struct net_device *netdev) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo->ops->set_promisc_mode) { if (netdev->flags & IFF_PROMISC) @@ -721,7 +718,7 @@ static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end) HNS3_TXD_BDTYPE_M, 0); hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end); hnae_set_bit(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1); - hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 1); + hnae_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_SC_M, HNS3_TXD_SC_S, 0); } static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, @@ -755,7 +752,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, if (type == DESC_TYPE_SKB) { skb = (struct sk_buff *)priv; - paylen = cpu_to_le16(skb->len); + paylen = skb->len; if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_reset_mac_len(skb); @@ -789,7 +786,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, cpu_to_le32(ol_type_vlan_len_msec); desc->tx.type_cs_vlan_tso_len = cpu_to_le32(type_cs_vlan_tso); - desc->tx.paylen = cpu_to_le16(paylen); + desc->tx.paylen = cpu_to_le32(paylen); desc->tx.mss = cpu_to_le16(mss); } @@ -905,8 +902,7 @@ static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig) } } -static netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, - struct net_device *netdev) +netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); struct hns3_nic_ring_data *ring_data = @@ -1012,8 +1008,7 @@ out_net_tx_busy: static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); struct sockaddr *mac_addr = p; int ret; @@ -1193,61 +1188,80 @@ static void hns3_nic_udp_tunnel_del(struct net_device *netdev, } } -static int hns3_setup_tc(struct net_device *netdev, u8 tc) +static int hns3_setup_tc(struct net_device *netdev, void *type_data) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; + struct hnae3_handle *h = hns3_get_handle(netdev); struct hnae3_knic_private_info *kinfo = &h->kinfo; + u8 *prio_tc = mqprio_qopt->qopt.prio_tc_map; + u8 tc = mqprio_qopt->qopt.num_tc; + u16 mode = mqprio_qopt->mode; + u8 hw = mqprio_qopt->qopt.hw; + bool if_running; unsigned int i; int ret; + if (!((hw == TC_MQPRIO_HW_OFFLOAD_TCS && + mode == TC_MQPRIO_MODE_CHANNEL) || (!hw && tc == 0))) + return -EOPNOTSUPP; + if (tc > HNAE3_MAX_TC) return -EINVAL; - if (kinfo->num_tc == tc) - return 0; - if (!netdev) return -EINVAL; - if (!tc) { - netdev_reset_tc(netdev); - return 0; + if_running = netif_running(netdev); + if (if_running) { + hns3_nic_net_stop(netdev); + msleep(100); } - /* Set num_tc for netdev */ - ret = netdev_set_num_tc(netdev, tc); + ret = (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ? + kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP; if (ret) - return ret; + goto out; + + if (tc <= 1) { + netdev_reset_tc(netdev); + } else { + ret = netdev_set_num_tc(netdev, tc); + if (ret) + goto out; + + for (i = 0; i < HNAE3_MAX_TC; i++) { + if (!kinfo->tc_info[i].enable) + continue; - /* Set per TC queues for the VSI */ - for (i = 0; i < HNAE3_MAX_TC; i++) { - if (kinfo->tc_info[i].enable) netdev_set_tc_queue(netdev, kinfo->tc_info[i].tc, kinfo->tc_info[i].tqp_count, kinfo->tc_info[i].tqp_offset); + } } - return 0; + ret = hns3_nic_set_real_num_queue(netdev); + +out: + if (if_running) + hns3_nic_net_open(netdev); + + return ret; } static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { - struct tc_mqprio_qopt *mqprio = type_data; - - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; - return hns3_setup_tc(dev, mqprio->num_tc); + return hns3_setup_tc(dev, type_data); } static int hns3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); int ret = -EIO; if (h->ae_algo->ops->set_vlan_filter) @@ -1259,8 +1273,7 @@ static int hns3_vlan_rx_add_vid(struct net_device *netdev, static int hns3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); int ret = -EIO; if (h->ae_algo->ops->set_vlan_filter) @@ -1272,8 +1285,7 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev, static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, __be16 vlan_proto) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); int ret = -EIO; if (h->ae_algo->ops->set_vf_vlan_filter) @@ -1285,8 +1297,7 @@ static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); bool if_running = netif_running(netdev); int ret; @@ -1313,10 +1324,91 @@ static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu) return ret; } +static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hns3_enet_ring *tx_ring = NULL; + int timeout_queue = 0; + int hw_head, hw_tail; + int i; + + /* Find the stopped queue the same way the stack does */ + for (i = 0; i < ndev->real_num_tx_queues; i++) { + struct netdev_queue *q; + unsigned long trans_start; + + q = netdev_get_tx_queue(ndev, i); + trans_start = q->trans_start; + if (netif_xmit_stopped(q) && + time_after(jiffies, + (trans_start + ndev->watchdog_timeo))) { + timeout_queue = i; + break; + } + } + + if (i == ndev->num_tx_queues) { + netdev_info(ndev, + "no netdev TX timeout queue found, timeout count: %llu\n", + priv->tx_timeout_count); + return false; + } + + tx_ring = priv->ring_data[timeout_queue].ring; + + hw_head = readl_relaxed(tx_ring->tqp->io_base + + HNS3_RING_TX_RING_HEAD_REG); + hw_tail = readl_relaxed(tx_ring->tqp->io_base + + HNS3_RING_TX_RING_TAIL_REG); + netdev_info(ndev, + "tx_timeout count: %llu, queue id: %d, SW_NTU: 0x%x, SW_NTC: 0x%x, HW_HEAD: 0x%x, HW_TAIL: 0x%x, INT: 0x%x\n", + priv->tx_timeout_count, + timeout_queue, + tx_ring->next_to_use, + tx_ring->next_to_clean, + hw_head, + hw_tail, + readl(tx_ring->tqp_vector->mask_addr)); + + return true; +} + +static void hns3_nic_net_timeout(struct net_device *ndev) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + unsigned long last_reset_time = priv->last_reset_time; + struct hnae3_handle *h = priv->ae_handle; + + if (!hns3_get_tx_timeo_queue_info(ndev)) + return; + + priv->tx_timeout_count++; + + /* This timeout is far away enough from last timeout, + * if timeout again,set the reset type to PF reset + */ + if (time_after(jiffies, (last_reset_time + 20 * HZ))) + priv->reset_level = HNAE3_FUNC_RESET; + + /* Don't do any new action before the next timeout */ + else if (time_before(jiffies, (last_reset_time + ndev->watchdog_timeo))) + return; + + priv->last_reset_time = jiffies; + + if (h->ae_algo->ops->reset_event) + h->ae_algo->ops->reset_event(h, priv->reset_level); + + priv->reset_level++; + if (priv->reset_level > HNAE3_GLOBAL_RESET) + priv->reset_level = HNAE3_GLOBAL_RESET; +} + static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_open = hns3_nic_net_open, .ndo_stop = hns3_nic_net_stop, .ndo_start_xmit = hns3_nic_net_xmit, + .ndo_tx_timeout = hns3_nic_net_timeout, .ndo_set_mac_address = hns3_nic_net_set_mac_address, .ndo_change_mtu = hns3_nic_change_mtu, .ndo_set_features = hns3_nic_set_features, @@ -1435,8 +1527,6 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring, cb->length = hnae_page_size(ring); cb->type = DESC_TYPE_PAGE; - memset(cb->buf, 0, cb->length); - return 0; } @@ -1546,7 +1636,7 @@ static int hns3_reserve_buffer_map(struct hns3_enet_ring *ring, return 0; out_with_buf: - hns3_free_buffers(ring); + hns3_free_buffer(ring, cb); out: return ret; } @@ -1586,7 +1676,7 @@ out_buffer_fail: static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i, struct hns3_desc_cb *res_cb) { - hns3_map_buffer(ring, &ring->desc_cb[i]); + hns3_unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); } @@ -1622,7 +1712,7 @@ static int is_valid_clean_head(struct hns3_enet_ring *ring, int h) return u > c ? (h > c && h <= u) : (h > c || h <= u); } -int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget) +bool hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget) { struct net_device *netdev = ring->tqp->handle->kinfo.netdev; struct netdev_queue *dev_queue; @@ -1633,7 +1723,7 @@ int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget) rmb(); /* Make sure head is ready before touch any data */ if (is_ring_empty(ring) || head == ring->next_to_clean) - return 0; /* no data to poll */ + return true; /* no data to poll */ if (!is_valid_clean_head(ring, head)) { netdev_err(netdev, "wrong head (%d, %d-%d)\n", head, @@ -1642,7 +1732,7 @@ int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget) u64_stats_update_begin(&ring->syncp); ring->stats.io_err_cnt++; u64_stats_update_end(&ring->syncp); - return -EIO; + return true; } bytes = 0; @@ -1933,6 +2023,11 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, } } +static void hns3_rx_skb(struct hns3_enet_ring *ring, struct sk_buff *skb) +{ + napi_gro_receive(&ring->tqp_vector->napi, skb); +} + static int hns3_handle_rx_bd(struct hns3_enet_ring *ring, struct sk_buff **out_skb, int *out_bnum) { @@ -2067,7 +2162,9 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring, return 0; } -static int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget) +int hns3_clean_rx_ring( + struct hns3_enet_ring *ring, int budget, + void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *)) { #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16 struct net_device *netdev = ring->tqp->handle->kinfo.netdev; @@ -2105,7 +2202,7 @@ static int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget) /* Do update ip stack process */ skb->protocol = eth_type_trans(skb, netdev); - (void)napi_gro_receive(&ring->tqp_vector->napi, skb); + rx_fn(ring, skb); recv_pkts++; } @@ -2248,7 +2345,8 @@ static int hns3_nic_common_poll(struct napi_struct *napi, int budget) rx_budget = max(budget / tqp_vector->num_tqps, 1); hns3_for_each_ring(ring, tqp_vector->rx_group) { - int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget); + int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget, + hns3_rx_skb); if (rx_cleaned >= rx_budget) clean_complete = false; @@ -2460,9 +2558,8 @@ static int hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv) (void)irq_set_affinity_hint( priv->tqp_vector[i].vector_irq, NULL); - devm_free_irq(&pdev->dev, - priv->tqp_vector[i].vector_irq, - &priv->tqp_vector[i]); + free_irq(priv->tqp_vector[i].vector_irq, + &priv->tqp_vector[i]); } priv->ring_data[i].ring->irq_init_flag = HNS3_VECTOR_NOT_INITED; @@ -2489,16 +2586,16 @@ static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, if (ring_type == HNAE3_RING_TYPE_TX) { ring_data[q->tqp_index].ring = ring; + ring_data[q->tqp_index].queue_index = q->tqp_index; ring->io_base = (u8 __iomem *)q->io_base + HNS3_TX_REG_OFFSET; } else { ring_data[q->tqp_index + queue_num].ring = ring; + ring_data[q->tqp_index + queue_num].queue_index = q->tqp_index; ring->io_base = q->io_base; } hnae_set_bit(ring->flag, HNAE3_RING_TYPE_B, ring_type); - ring_data[q->tqp_index].queue_index = q->tqp_index; - ring->tqp = q; ring->desc = NULL; ring->desc_cb = NULL; @@ -2596,7 +2693,7 @@ static void hns3_fini_ring(struct hns3_enet_ring *ring) ring->next_to_use = 0; } -int hns3_buf_size2type(u32 buf_size) +static int hns3_buf_size2type(u32 buf_size) { int bd_size_type; @@ -2649,7 +2746,7 @@ static void hns3_init_ring_hw(struct hns3_enet_ring *ring) } } -static int hns3_init_all_ring(struct hns3_nic_priv *priv) +int hns3_init_all_ring(struct hns3_nic_priv *priv) { struct hnae3_handle *h = priv->ae_handle; int ring_num = h->kinfo.num_tqps * 2; @@ -2673,12 +2770,12 @@ static int hns3_init_all_ring(struct hns3_nic_priv *priv) out_when_alloc_ring_memory: for (j = i - 1; j >= 0; j--) - hns3_fini_ring(priv->ring_data[i].ring); + hns3_fini_ring(priv->ring_data[j].ring); return -ENOMEM; } -static int hns3_uninit_all_ring(struct hns3_nic_priv *priv) +int hns3_uninit_all_ring(struct hns3_nic_priv *priv) { struct hnae3_handle *h = priv->ae_handle; int i; @@ -2748,6 +2845,9 @@ static int hns3_client_init(struct hnae3_handle *handle) priv->dev = &pdev->dev; priv->netdev = netdev; priv->ae_handle = handle; + priv->last_reset_time = jiffies; + priv->reset_level = HNAE3_FUNC_RESET; + priv->tx_timeout_count = 0; handle->kinfo.netdev = netdev; handle->priv = (void *)priv; @@ -2790,6 +2890,8 @@ static int hns3_client_init(struct hnae3_handle *handle) goto out_reg_netdev_fail; } + hns3_dcbnl_setup(handle); + /* MTU range: (ETH_MIN_MTU(kernel default) - 9706) */ netdev->max_mtu = HNS3_MAX_MTU - (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); @@ -2846,10 +2948,224 @@ static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup) } } -const struct hnae3_client_ops client_ops = { +static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc) +{ + struct hnae3_knic_private_info *kinfo = &handle->kinfo; + struct net_device *ndev = kinfo->netdev; + bool if_running; + int ret; + u8 i; + + if (tc > HNAE3_MAX_TC) + return -EINVAL; + + if (!ndev) + return -ENODEV; + + if_running = netif_running(ndev); + + ret = netdev_set_num_tc(ndev, tc); + if (ret) + return ret; + + if (if_running) { + (void)hns3_nic_net_stop(ndev); + msleep(100); + } + + ret = (kinfo->dcb_ops && kinfo->dcb_ops->map_update) ? + kinfo->dcb_ops->map_update(handle) : -EOPNOTSUPP; + if (ret) + goto err_out; + + if (tc <= 1) { + netdev_reset_tc(ndev); + goto out; + } + + for (i = 0; i < HNAE3_MAX_TC; i++) { + struct hnae3_tc_info *tc_info = &kinfo->tc_info[i]; + + if (tc_info->enable) + netdev_set_tc_queue(ndev, + tc_info->tc, + tc_info->tqp_count, + tc_info->tqp_offset); + } + + for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) { + netdev_set_prio_tc_map(ndev, i, + kinfo->prio_tc[i]); + } + +out: + ret = hns3_nic_set_real_num_queue(ndev); + +err_out: + if (if_running) + (void)hns3_nic_net_open(ndev); + + return ret; +} + +static void hns3_recover_hw_addr(struct net_device *ndev) +{ + struct netdev_hw_addr_list *list; + struct netdev_hw_addr *ha, *tmp; + + /* go through and sync uc_addr entries to the device */ + list = &ndev->uc; + list_for_each_entry_safe(ha, tmp, &list->list, list) + hns3_nic_uc_sync(ndev, ha->addr); + + /* go through and sync mc_addr entries to the device */ + list = &ndev->mc; + list_for_each_entry_safe(ha, tmp, &list->list, list) + hns3_nic_mc_sync(ndev, ha->addr); +} + +static void hns3_drop_skb_data(struct hns3_enet_ring *ring, struct sk_buff *skb) +{ + dev_kfree_skb_any(skb); +} + +static void hns3_clear_all_ring(struct hnae3_handle *h) +{ + struct net_device *ndev = h->kinfo.netdev; + struct hns3_nic_priv *priv = netdev_priv(ndev); + u32 i; + + for (i = 0; i < h->kinfo.num_tqps; i++) { + struct netdev_queue *dev_queue; + struct hns3_enet_ring *ring; + + ring = priv->ring_data[i].ring; + hns3_clean_tx_ring(ring, ring->desc_num); + dev_queue = netdev_get_tx_queue(ndev, + priv->ring_data[i].queue_index); + netdev_tx_reset_queue(dev_queue); + + ring = priv->ring_data[i + h->kinfo.num_tqps].ring; + hns3_clean_rx_ring(ring, ring->desc_num, hns3_drop_skb_data); + } +} + +static int hns3_reset_notify_down_enet(struct hnae3_handle *handle) +{ + struct hnae3_knic_private_info *kinfo = &handle->kinfo; + struct net_device *ndev = kinfo->netdev; + + if (!netif_running(ndev)) + return -EIO; + + return hns3_nic_net_stop(ndev); +} + +static int hns3_reset_notify_up_enet(struct hnae3_handle *handle) +{ + struct hnae3_knic_private_info *kinfo = &handle->kinfo; + struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev); + int ret = 0; + + if (netif_running(kinfo->netdev)) { + ret = hns3_nic_net_up(kinfo->netdev); + if (ret) { + netdev_err(kinfo->netdev, + "hns net up fail, ret=%d!\n", ret); + return ret; + } + + priv->last_reset_time = jiffies; + } + + return ret; +} + +static int hns3_reset_notify_init_enet(struct hnae3_handle *handle) +{ + struct net_device *netdev = handle->kinfo.netdev; + struct hns3_nic_priv *priv = netdev_priv(netdev); + int ret; + + priv->reset_level = 1; + hns3_init_mac_addr(netdev); + hns3_nic_set_rx_mode(netdev); + hns3_recover_hw_addr(netdev); + + /* Carrier off reporting is important to ethtool even BEFORE open */ + netif_carrier_off(netdev); + + ret = hns3_get_ring_config(priv); + if (ret) + return ret; + + ret = hns3_nic_init_vector_data(priv); + if (ret) + return ret; + + ret = hns3_init_all_ring(priv); + if (ret) { + hns3_nic_uninit_vector_data(priv); + priv->ring_data = NULL; + } + + return ret; +} + +static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) +{ + struct net_device *netdev = handle->kinfo.netdev; + struct hns3_nic_priv *priv = netdev_priv(netdev); + int ret; + + hns3_clear_all_ring(handle); + + ret = hns3_nic_uninit_vector_data(priv); + if (ret) { + netdev_err(netdev, "uninit vector error\n"); + return ret; + } + + ret = hns3_uninit_all_ring(priv); + if (ret) + netdev_err(netdev, "uninit ring error\n"); + + priv->ring_data = NULL; + + return ret; +} + +static int hns3_reset_notify(struct hnae3_handle *handle, + enum hnae3_reset_notify_type type) +{ + int ret = 0; + + switch (type) { + case HNAE3_UP_CLIENT: + ret = hns3_reset_notify_up_enet(handle); + break; + case HNAE3_DOWN_CLIENT: + ret = hns3_reset_notify_down_enet(handle); + break; + case HNAE3_INIT_CLIENT: + ret = hns3_reset_notify_init_enet(handle); + break; + case HNAE3_UNINIT_CLIENT: + ret = hns3_reset_notify_uninit_enet(handle); + break; + default: + break; + } + + return ret; +} + +static const struct hnae3_client_ops client_ops = { .init_instance = hns3_client_init, .uninit_instance = hns3_client_uninit, .link_status_change = hns3_link_status_change, + .setup_tc = hns3_client_setup_tc, + .reset_notify = hns3_reset_notify, }; /* hns3_init_module - Driver registration routine diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h index 7e8746189747..8a9de759957b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h @@ -76,6 +76,8 @@ enum hns3_nic_state { #define HNS3_RING_NAME_LEN 16 #define HNS3_BUFFER_SIZE_2048 2048 #define HNS3_RING_MAX_PENDING 32768 +#define HNS3_RING_MIN_PENDING 8 +#define HNS3_RING_BD_MULTIPLE 8 #define HNS3_MAX_MTU 9728 #define HNS3_BD_SIZE_512_TYPE 0 @@ -516,6 +518,8 @@ struct hns3_nic_priv { /* The most recently read link state */ int link; u64 tx_timeout_count; + enum hnae3_reset_type reset_level; + unsigned long last_reset_time; unsigned long state; @@ -587,7 +591,23 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value) #define hns3_for_each_ring(pos, head) \ for (pos = (head).ring; pos; pos = pos->next) +#define hns3_get_handle(ndev) \ + (((struct hns3_nic_priv *)netdev_priv(ndev))->ae_handle) + void hns3_ethtool_set_ops(struct net_device *netdev); -int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget); +bool hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget); +int hns3_init_all_ring(struct hns3_nic_priv *priv); +int hns3_uninit_all_ring(struct hns3_nic_priv *priv); +netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev); +int hns3_clean_rx_ring( + struct hns3_enet_ring *ring, int budget, + void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *)); + +#ifdef CONFIG_HNS3_DCB +void hns3_dcbnl_setup(struct hnae3_handle *handle); +#else +static inline void hns3_dcbnl_setup(struct hnae3_handle *handle) {} +#endif + #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c index d636399232fb..a21470c72da3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c @@ -9,6 +9,7 @@ #include <linux/etherdevice.h> #include <linux/string.h> +#include <linux/phy.h> #include "hns3_enet.h" @@ -59,6 +60,16 @@ static const struct hns3_stats hns3_rxq_stats[] = { #define HNS3_TQP_STATS_COUNT (HNS3_TXQ_STATS_COUNT + HNS3_RXQ_STATS_COUNT) +#define HNS3_SELF_TEST_TPYE_NUM 1 +#define HNS3_NIC_LB_TEST_PKT_NUM 1 +#define HNS3_NIC_LB_TEST_RING_ID 0 +#define HNS3_NIC_LB_TEST_PACKET_SIZE 128 + +/* Nic loopback test err */ +#define HNS3_NIC_LB_TEST_NO_MEM_ERR 1 +#define HNS3_NIC_LB_TEST_TX_CNT_ERR 2 +#define HNS3_NIC_LB_TEST_RX_CNT_ERR 3 + struct hns3_link_mode_mapping { u32 hns3_link_mode; u32 ethtool_link_mode; @@ -77,6 +88,268 @@ static const struct hns3_link_mode_mapping hns3_lm_map[] = { {HNS3_LM_1000BASET_FULL_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT}, }; +static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + int ret; + + if (!h->ae_algo->ops->set_loopback || + !h->ae_algo->ops->set_promisc_mode) + return -EOPNOTSUPP; + + switch (loop) { + case HNAE3_MAC_INTER_LOOP_MAC: + ret = h->ae_algo->ops->set_loopback(h, loop, true); + break; + case HNAE3_MAC_LOOP_NONE: + ret = h->ae_algo->ops->set_loopback(h, + HNAE3_MAC_INTER_LOOP_MAC, false); + break; + default: + ret = -ENOTSUPP; + break; + } + + if (ret) + return ret; + + if (loop == HNAE3_MAC_LOOP_NONE) + h->ae_algo->ops->set_promisc_mode(h, ndev->flags & IFF_PROMISC); + else + h->ae_algo->ops->set_promisc_mode(h, 1); + + return ret; +} + +static int hns3_lp_up(struct net_device *ndev, enum hnae3_loop loop_mode) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + int ret; + + if (!h->ae_algo->ops->start) + return -EOPNOTSUPP; + + ret = h->ae_algo->ops->start(h); + if (ret) { + netdev_err(ndev, + "hns3_lb_up ae start return error: %d\n", ret); + return ret; + } + + ret = hns3_lp_setup(ndev, loop_mode); + usleep_range(10000, 20000); + + return ret; +} + +static int hns3_lp_down(struct net_device *ndev) +{ + struct hnae3_handle *h = hns3_get_handle(ndev); + int ret; + + if (!h->ae_algo->ops->stop) + return -EOPNOTSUPP; + + ret = hns3_lp_setup(ndev, HNAE3_MAC_LOOP_NONE); + if (ret) { + netdev_err(ndev, "lb_setup return error: %d\n", ret); + return ret; + } + + h->ae_algo->ops->stop(h); + usleep_range(10000, 20000); + + return 0; +} + +static void hns3_lp_setup_skb(struct sk_buff *skb) +{ + struct net_device *ndev = skb->dev; + unsigned char *packet; + struct ethhdr *ethh; + unsigned int i; + + skb_reserve(skb, NET_IP_ALIGN); + ethh = skb_put(skb, sizeof(struct ethhdr)); + packet = skb_put(skb, HNS3_NIC_LB_TEST_PACKET_SIZE); + + memcpy(ethh->h_dest, ndev->dev_addr, ETH_ALEN); + eth_zero_addr(ethh->h_source); + ethh->h_proto = htons(ETH_P_ARP); + skb_reset_mac_header(skb); + + for (i = 0; i < HNS3_NIC_LB_TEST_PACKET_SIZE; i++) + packet[i] = (unsigned char)(i & 0xff); +} + +static void hns3_lb_check_skb_data(struct hns3_enet_ring *ring, + struct sk_buff *skb) +{ + struct hns3_enet_tqp_vector *tqp_vector = ring->tqp_vector; + unsigned char *packet = skb->data; + u32 i; + + for (i = 0; i < skb->len; i++) + if (packet[i] != (unsigned char)(i & 0xff)) + break; + + /* The packet is correctly received */ + if (i == skb->len) + tqp_vector->rx_group.total_packets++; + else + print_hex_dump(KERN_ERR, "selftest:", DUMP_PREFIX_OFFSET, 16, 1, + skb->data, skb->len, true); + + dev_kfree_skb_any(skb); +} + +static u32 hns3_lb_check_rx_ring(struct hns3_nic_priv *priv, u32 budget) +{ + struct hnae3_handle *h = priv->ae_handle; + struct hnae3_knic_private_info *kinfo; + u32 i, rcv_good_pkt_total = 0; + + kinfo = &h->kinfo; + for (i = kinfo->num_tqps; i < kinfo->num_tqps * 2; i++) { + struct hns3_enet_ring *ring = priv->ring_data[i].ring; + struct hns3_enet_ring_group *rx_group; + u64 pre_rx_pkt; + + rx_group = &ring->tqp_vector->rx_group; + pre_rx_pkt = rx_group->total_packets; + + hns3_clean_rx_ring(ring, budget, hns3_lb_check_skb_data); + + rcv_good_pkt_total += (rx_group->total_packets - pre_rx_pkt); + rx_group->total_packets = pre_rx_pkt; + } + return rcv_good_pkt_total; +} + +static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32 start_ringid, + u32 end_ringid, u32 budget) +{ + u32 i; + + for (i = start_ringid; i <= end_ringid; i++) { + struct hns3_enet_ring *ring = priv->ring_data[i].ring; + + hns3_clean_tx_ring(ring, budget); + } +} + +/** + * hns3_lp_run_test - run loopback test + * @ndev: net device + * @mode: loopback type + */ +static int hns3_lp_run_test(struct net_device *ndev, enum hnae3_loop mode) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct sk_buff *skb; + u32 i, good_cnt; + int ret_val = 0; + + skb = alloc_skb(HNS3_NIC_LB_TEST_PACKET_SIZE + ETH_HLEN + NET_IP_ALIGN, + GFP_KERNEL); + if (!skb) + return HNS3_NIC_LB_TEST_NO_MEM_ERR; + + skb->dev = ndev; + hns3_lp_setup_skb(skb); + skb->queue_mapping = HNS3_NIC_LB_TEST_RING_ID; + + good_cnt = 0; + for (i = 0; i < HNS3_NIC_LB_TEST_PKT_NUM; i++) { + netdev_tx_t tx_ret; + + skb_get(skb); + tx_ret = hns3_nic_net_xmit(skb, ndev); + if (tx_ret == NETDEV_TX_OK) + good_cnt++; + else + netdev_err(ndev, "hns3_lb_run_test xmit failed: %d\n", + tx_ret); + } + if (good_cnt != HNS3_NIC_LB_TEST_PKT_NUM) { + ret_val = HNS3_NIC_LB_TEST_TX_CNT_ERR; + netdev_err(ndev, "mode %d sent fail, cnt=0x%x, budget=0x%x\n", + mode, good_cnt, HNS3_NIC_LB_TEST_PKT_NUM); + goto out; + } + + /* Allow 200 milliseconds for packets to go from Tx to Rx */ + msleep(200); + + good_cnt = hns3_lb_check_rx_ring(priv, HNS3_NIC_LB_TEST_PKT_NUM); + if (good_cnt != HNS3_NIC_LB_TEST_PKT_NUM) { + ret_val = HNS3_NIC_LB_TEST_RX_CNT_ERR; + netdev_err(ndev, "mode %d recv fail, cnt=0x%x, budget=0x%x\n", + mode, good_cnt, HNS3_NIC_LB_TEST_PKT_NUM); + } + +out: + hns3_lb_clear_tx_ring(priv, HNS3_NIC_LB_TEST_RING_ID, + HNS3_NIC_LB_TEST_RING_ID, + HNS3_NIC_LB_TEST_PKT_NUM); + + kfree_skb(skb); + return ret_val; +} + +/** + * hns3_nic_self_test - self test + * @ndev: net device + * @eth_test: test cmd + * @data: test result + */ +static void hns3_self_test(struct net_device *ndev, + struct ethtool_test *eth_test, u64 *data) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + int st_param[HNS3_SELF_TEST_TPYE_NUM][2]; + bool if_running = netif_running(ndev); + int test_index = 0; + u32 i; + + /* Only do offline selftest, or pass by default */ + if (eth_test->flags != ETH_TEST_FL_OFFLINE) + return; + + st_param[HNAE3_MAC_INTER_LOOP_MAC][0] = HNAE3_MAC_INTER_LOOP_MAC; + st_param[HNAE3_MAC_INTER_LOOP_MAC][1] = + h->flags & HNAE3_SUPPORT_MAC_LOOPBACK; + + if (if_running) + dev_close(ndev); + + set_bit(HNS3_NIC_STATE_TESTING, &priv->state); + + for (i = 0; i < HNS3_SELF_TEST_TPYE_NUM; i++) { + enum hnae3_loop loop_type = (enum hnae3_loop)st_param[i][0]; + + if (!st_param[i][1]) + continue; + + data[test_index] = hns3_lp_up(ndev, loop_type); + if (!data[test_index]) { + data[test_index] = hns3_lp_run_test(ndev, loop_type); + hns3_lp_down(ndev); + } + + if (data[test_index]) + eth_test->flags |= ETH_TEST_FL_FAILED; + + test_index++; + } + + clear_bit(HNS3_NIC_STATE_TESTING, &priv->state); + + if (if_running) + dev_open(ndev); +} + static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd, bool is_advertised) { @@ -86,24 +359,18 @@ static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd, if (!(caps & hns3_lm_map[i].hns3_link_mode)) continue; - if (is_advertised) { - ethtool_link_ksettings_zero_link_mode(cmd, - advertising); + if (is_advertised) __set_bit(hns3_lm_map[i].ethtool_link_mode, cmd->link_modes.advertising); - } else { - ethtool_link_ksettings_zero_link_mode(cmd, - supported); + else __set_bit(hns3_lm_map[i].ethtool_link_mode, cmd->link_modes.supported); - } } } static int hns3_get_sset_count(struct net_device *netdev, int stringset) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); const struct hnae3_ae_ops *ops = h->ae_algo->ops; if (!ops->get_sset_count) @@ -164,8 +431,7 @@ static u8 *hns3_get_strings_tqps(struct hnae3_handle *handle, u8 *data) static void hns3_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); const struct hnae3_ae_ops *ops = h->ae_algo->ops; char *buff = (char *)data; @@ -217,11 +483,10 @@ static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data) * @stats: statistics info. * @data: statistics data. */ -void hns3_get_stats(struct net_device *netdev, struct ethtool_stats *stats, - u64 *data) +static void hns3_get_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); u64 *p = data; if (!h->ae_algo->ops->get_stats || !h->ae_algo->ops->update_stats) { @@ -262,10 +527,7 @@ static void hns3_get_drvinfo(struct net_device *netdev, static u32 hns3_get_link(struct net_device *netdev) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h; - - h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_status) return h->ae_algo->ops->get_status(h); @@ -277,7 +539,8 @@ static void hns3_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *param) { struct hns3_nic_priv *priv = netdev_priv(netdev); - int queue_num = priv->ae_handle->kinfo.num_tqps; + struct hnae3_handle *h = priv->ae_handle; + int queue_num = h->kinfo.num_tqps; param->tx_max_pending = HNS3_RING_MAX_PENDING; param->rx_max_pending = HNS3_RING_MAX_PENDING; @@ -289,8 +552,7 @@ static void hns3_get_ringparam(struct net_device *netdev, static void hns3_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *param) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_pauseparam) h->ae_algo->ops->get_pauseparam(h, ¶m->autoneg, @@ -300,32 +562,30 @@ static void hns3_get_pauseparam(struct net_device *netdev, static int hns3_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); u32 supported_caps; u32 advertised_caps; u8 media_type = HNAE3_MEDIA_TYPE_UNKNOWN; u8 link_stat; - u8 auto_neg; - u8 duplex; - u32 speed; if (!h->ae_algo || !h->ae_algo->ops) return -EOPNOTSUPP; /* 1.auto_neg & speed & duplex from cmd */ - if (h->ae_algo->ops->get_ksettings_an_result) { - h->ae_algo->ops->get_ksettings_an_result(h, &auto_neg, - &speed, &duplex); - cmd->base.autoneg = auto_neg; - cmd->base.speed = speed; - cmd->base.duplex = duplex; - - link_stat = hns3_get_link(netdev); - if (!link_stat) { - cmd->base.speed = (u32)SPEED_UNKNOWN; - cmd->base.duplex = DUPLEX_UNKNOWN; - } + if (netdev->phydev) + phy_ethtool_ksettings_get(netdev->phydev, cmd); + else if (h->ae_algo->ops->get_ksettings_an_result) + h->ae_algo->ops->get_ksettings_an_result(h, + &cmd->base.autoneg, + &cmd->base.speed, + &cmd->base.duplex); + else + return -EOPNOTSUPP; + + link_stat = hns3_get_link(netdev); + if (!link_stat) { + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } /* 2.media_type get from bios parameter block */ @@ -375,6 +635,9 @@ static int hns3_get_link_ksettings(struct net_device *netdev, break; } + if (!cmd->base.autoneg) + advertised_caps &= ~HNS3_LM_AUTONEG_BIT; + /* now, map driver link modes to ethtool link modes */ hns3_driv_to_eth_caps(supported_caps, cmd, false); hns3_driv_to_eth_caps(advertised_caps, cmd, true); @@ -390,10 +653,19 @@ static int hns3_get_link_ksettings(struct net_device *netdev, return 0; } +static int hns3_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) +{ + /* Only support ksettings_set for netdev with phy attached for now */ + if (netdev->phydev) + return phy_ethtool_ksettings_set(netdev->phydev, cmd); + + return -EOPNOTSUPP; +} + static u32 hns3_get_rss_key_size(struct net_device *netdev) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss_key_size) @@ -404,8 +676,7 @@ static u32 hns3_get_rss_key_size(struct net_device *netdev) static u32 hns3_get_rss_indir_size(struct net_device *netdev) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss_indir_size) @@ -417,8 +688,7 @@ static u32 hns3_get_rss_indir_size(struct net_device *netdev) static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss) return -EOPNOTSUPP; @@ -429,8 +699,7 @@ static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key, static int hns3_set_rss(struct net_device *netdev, const u32 *indir, const u8 *key, const u8 hfunc) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss) return -EOPNOTSUPP; @@ -454,16 +723,17 @@ static int hns3_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, u32 *rule_locs) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; + struct hnae3_handle *h = hns3_get_handle(netdev); - if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_tc_size) + if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss_tuple) return -EOPNOTSUPP; switch (cmd->cmd) { case ETHTOOL_GRXRINGS: - cmd->data = h->ae_algo->ops->get_tc_size(h); + cmd->data = h->kinfo.num_tc * h->kinfo.rss_size; break; + case ETHTOOL_GRXFH: + return h->ae_algo->ops->get_rss_tuple(h, cmd); default: return -EOPNOTSUPP; } @@ -471,20 +741,133 @@ static int hns3_get_rxnfc(struct net_device *netdev, return 0; } +static int hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv, + u32 new_desc_num) +{ + struct hnae3_handle *h = priv->ae_handle; + int i; + + h->kinfo.num_desc = new_desc_num; + + for (i = 0; i < h->kinfo.num_tqps * 2; i++) + priv->ring_data[i].ring->desc_num = new_desc_num; + + return hns3_init_all_ring(priv); +} + +static int hns3_set_ringparam(struct net_device *ndev, + struct ethtool_ringparam *param) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + bool if_running = netif_running(ndev); + u32 old_desc_num, new_desc_num; + int ret; + + if (param->rx_mini_pending || param->rx_jumbo_pending) + return -EINVAL; + + if (param->tx_pending != param->rx_pending) { + netdev_err(ndev, + "Descriptors of tx and rx must be equal"); + return -EINVAL; + } + + if (param->tx_pending > HNS3_RING_MAX_PENDING || + param->tx_pending < HNS3_RING_MIN_PENDING) { + netdev_err(ndev, + "Descriptors requested (Tx/Rx: %d) out of range [%d-%d]\n", + param->tx_pending, HNS3_RING_MIN_PENDING, + HNS3_RING_MAX_PENDING); + return -EINVAL; + } + + new_desc_num = param->tx_pending; + + /* Hardware requires that its descriptors must be multiple of eight */ + new_desc_num = ALIGN(new_desc_num, HNS3_RING_BD_MULTIPLE); + old_desc_num = h->kinfo.num_desc; + if (old_desc_num == new_desc_num) + return 0; + + netdev_info(ndev, + "Changing descriptor count from %d to %d.\n", + old_desc_num, new_desc_num); + + if (if_running) + dev_close(ndev); + + ret = hns3_uninit_all_ring(priv); + if (ret) + return ret; + + ret = hns3_change_all_ring_bd_num(priv, new_desc_num); + if (ret) { + ret = hns3_change_all_ring_bd_num(priv, old_desc_num); + if (ret) { + netdev_err(ndev, + "Revert to old bd num fail, ret=%d.\n", ret); + return ret; + } + } + + if (if_running) + ret = dev_open(ndev); + + return ret; +} + +static int hns3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) +{ + struct hnae3_handle *h = hns3_get_handle(netdev); + + if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss_tuple) + return -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXFH: + return h->ae_algo->ops->set_rss_tuple(h, cmd); + default: + return -EOPNOTSUPP; + } +} + +static int hns3_nway_reset(struct net_device *netdev) +{ + struct phy_device *phy = netdev->phydev; + + if (!netif_running(netdev)) + return 0; + + /* Only support nway_reset for netdev with phy attached for now */ + if (!phy) + return -EOPNOTSUPP; + + if (phy->autoneg != AUTONEG_ENABLE) + return -EINVAL; + + return genphy_restart_aneg(phy); +} + static const struct ethtool_ops hns3_ethtool_ops = { + .self_test = hns3_self_test, .get_drvinfo = hns3_get_drvinfo, .get_link = hns3_get_link, .get_ringparam = hns3_get_ringparam, + .set_ringparam = hns3_set_ringparam, .get_pauseparam = hns3_get_pauseparam, .get_strings = hns3_get_strings, .get_ethtool_stats = hns3_get_stats, .get_sset_count = hns3_get_sset_count, .get_rxnfc = hns3_get_rxnfc, + .set_rxnfc = hns3_set_rxnfc, .get_rxfh_key_size = hns3_get_rss_key_size, .get_rxfh_indir_size = hns3_get_rss_indir_size, .get_rxfh = hns3_get_rss, .set_rxfh = hns3_set_rss, .get_link_ksettings = hns3_get_link_ksettings, + .set_link_ksettings = hns3_set_link_ksettings, + .nway_reset = hns3_nway_reset, }; void hns3_ethtool_set_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 1d4f712b15a8..e2e5cdc7119c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -26,6 +26,7 @@ #include <linux/skbuff.h> #include <linux/dma-mapping.h> #include <linux/prefetch.h> +#include <linux/cpumask.h> #include <asm/barrier.h> #include "hinic_common.h" @@ -171,11 +172,10 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq) struct hinic_sge sge; dma_addr_t dma_addr; struct sk_buff *skb; - int i, alloc_more; u16 prod_idx; + int i; free_wqebbs = hinic_get_rq_free_wqebbs(rxq->rq); - alloc_more = 0; /* Limit the allocation chunks */ if (free_wqebbs > nic_dev->rx_weight) @@ -185,7 +185,6 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq) skb = rx_alloc_skb(rxq, &dma_addr); if (!skb) { netdev_err(rxq->netdev, "Failed to alloc Rx skb\n"); - alloc_more = 1; goto skb_out; } @@ -195,7 +194,6 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq) &prod_idx); if (!rq_wqe) { rx_free_skb(rxq, skb, dma_addr); - alloc_more = 1; goto skb_out; } @@ -211,9 +209,7 @@ skb_out: hinic_rq_update(rxq->rq, prod_idx); } - if (alloc_more) - tasklet_schedule(&rxq->rx_task); - + tasklet_schedule(&rxq->rx_task); return i; } @@ -357,7 +353,7 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget) } if (pkts) - tasklet_schedule(&rxq->rx_task); /* hinic_rx_alloc_pkts */ + tasklet_schedule(&rxq->rx_task); /* rx_alloc_pkts */ u64_stats_update_begin(&rxq->rxq_stats.syncp); rxq->rxq_stats.pkts += pkts; @@ -417,6 +413,8 @@ static int rx_request_irq(struct hinic_rxq *rxq) struct hinic_dev *nic_dev = netdev_priv(rxq->netdev); struct hinic_hwdev *hwdev = nic_dev->hwdev; struct hinic_rq *rq = rxq->rq; + struct hinic_qp *qp; + struct cpumask mask; int err; rx_add_napi(rxq); @@ -432,7 +430,9 @@ static int rx_request_irq(struct hinic_rxq *rxq) return err; } - return 0; + qp = container_of(rq, struct hinic_qp, rq); + cpumask_set_cpu(qp->q_id % num_online_cpus(), &mask); + return irq_set_affinity_hint(rq->irq, &mask); } static void rx_free_irq(struct hinic_rxq *rxq) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index abe3e38cd342..9128858479c4 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -212,10 +212,19 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) sq_wqe = hinic_sq_get_wqe(txq->sq, wqe_size, &prod_idx); if (!sq_wqe) { - tx_unmap_skb(nic_dev, skb, txq->sges); - netif_stop_subqueue(netdev, qp->q_id); + /* Check for the case free_tx_poll is called in another cpu + * and we stopped the subqueue after free_tx_poll check. + */ + sq_wqe = hinic_sq_get_wqe(txq->sq, wqe_size, &prod_idx); + if (sq_wqe) { + netif_wake_subqueue(nic_dev->netdev, qp->q_id); + goto process_sq_wqe; + } + + tx_unmap_skb(nic_dev, skb, txq->sges); + u64_stats_update_begin(&txq->txq_stats.syncp); txq->txq_stats.tx_busy++; u64_stats_update_end(&txq->txq_stats.syncp); @@ -223,6 +232,7 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) goto flush_skbs; } +process_sq_wqe: hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges); hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index c66abd476023..04aaacbc3d45 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -75,6 +75,7 @@ #include <asm/firmware.h> #include <linux/workqueue.h> #include <linux/if_vlan.h> +#include <linux/utsname.h> #include "ibmvnic.h" @@ -115,6 +116,7 @@ static int init_sub_crqs(struct ibmvnic_adapter *); static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter); static int ibmvnic_init(struct ibmvnic_adapter *); static void release_crq_queue(struct ibmvnic_adapter *); +static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p); struct ibmvnic_stat { char name[ETH_GSTRING_LEN]; @@ -553,6 +555,10 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter) if (rc) return rc; + rc = reset_long_term_buff(adapter, &tx_pool->tso_ltb); + if (rc) + return rc; + memset(tx_pool->tx_buff, 0, adapter->req_tx_entries_per_subcrq * sizeof(struct ibmvnic_tx_buff)); @@ -562,11 +568,21 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter) tx_pool->consumer_index = 0; tx_pool->producer_index = 0; + tx_pool->tso_index = 0; } return 0; } +static void release_vpd_data(struct ibmvnic_adapter *adapter) +{ + if (!adapter->vpd) + return; + + kfree(adapter->vpd->buff); + kfree(adapter->vpd); +} + static void release_tx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_tx_pool *tx_pool; @@ -581,6 +597,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter) tx_pool = &adapter->tx_pool[i]; kfree(tx_pool->tx_buff); free_long_term_buff(adapter, &tx_pool->long_term_buff); + free_long_term_buff(adapter, &tx_pool->tso_ltb); kfree(tx_pool->free_map); } @@ -625,6 +642,16 @@ static int init_tx_pools(struct net_device *netdev) return -1; } + /* alloc TSO ltb */ + if (alloc_long_term_buff(adapter, &tx_pool->tso_ltb, + IBMVNIC_TSO_BUFS * + IBMVNIC_TSO_BUF_SZ)) { + release_tx_pools(adapter); + return -1; + } + + tx_pool->tso_index = 0; + tx_pool->free_map = kcalloc(adapter->req_tx_entries_per_subcrq, sizeof(int), GFP_KERNEL); if (!tx_pool->free_map) { @@ -736,6 +763,8 @@ static void release_resources(struct ibmvnic_adapter *adapter) { int i; + release_vpd_data(adapter); + release_tx_pools(adapter); release_rx_pools(adapter); @@ -816,6 +845,57 @@ static int set_real_num_queues(struct net_device *netdev) return rc; } +static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + union ibmvnic_crq crq; + dma_addr_t dma_addr; + int len = 0; + + if (adapter->vpd->buff) + len = adapter->vpd->len; + + reinit_completion(&adapter->fw_done); + crq.get_vpd_size.first = IBMVNIC_CRQ_CMD; + crq.get_vpd_size.cmd = GET_VPD_SIZE; + ibmvnic_send_crq(adapter, &crq); + wait_for_completion(&adapter->fw_done); + + if (!adapter->vpd->len) + return -ENODATA; + + if (!adapter->vpd->buff) + adapter->vpd->buff = kzalloc(adapter->vpd->len, GFP_KERNEL); + else if (adapter->vpd->len != len) + adapter->vpd->buff = + krealloc(adapter->vpd->buff, + adapter->vpd->len, GFP_KERNEL); + + if (!adapter->vpd->buff) { + dev_err(dev, "Could allocate VPD buffer\n"); + return -ENOMEM; + } + + adapter->vpd->dma_addr = + dma_map_single(dev, adapter->vpd->buff, adapter->vpd->len, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + dev_err(dev, "Could not map VPD buffer\n"); + kfree(adapter->vpd->buff); + return -ENOMEM; + } + + reinit_completion(&adapter->fw_done); + crq.get_vpd.first = IBMVNIC_CRQ_CMD; + crq.get_vpd.cmd = GET_VPD; + crq.get_vpd.ioba = cpu_to_be32(adapter->vpd->dma_addr); + crq.get_vpd.len = cpu_to_be32((u32)adapter->vpd->len); + ibmvnic_send_crq(adapter, &crq); + wait_for_completion(&adapter->fw_done); + + return 0; +} + static int init_resources(struct ibmvnic_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -833,6 +913,10 @@ static int init_resources(struct ibmvnic_adapter *adapter) if (rc) return rc; + adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL); + if (!adapter->vpd) + return -ENOMEM; + adapter->map_id = 1; adapter->napi = kcalloc(adapter->req_rx_queues, sizeof(struct napi_struct), GFP_KERNEL); @@ -906,10 +990,15 @@ static int __ibmvnic_open(struct net_device *netdev) static int ibmvnic_open(struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); - int rc; + int rc, vpd; mutex_lock(&adapter->reset_lock); + if (adapter->mac_change_pending) { + __ibmvnic_set_mac(netdev, &adapter->desired.mac); + adapter->mac_change_pending = false; + } + if (adapter->state != VNIC_CLOSED) { rc = ibmvnic_login(netdev); if (rc) { @@ -927,6 +1016,13 @@ static int ibmvnic_open(struct net_device *netdev) } rc = __ibmvnic_open(netdev); + netif_carrier_on(netdev); + + /* Vital Product Data (VPD) */ + vpd = ibmvnic_get_vpd(adapter); + if (vpd) + netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n"); + mutex_unlock(&adapter->reset_lock); return rc; @@ -1200,11 +1296,41 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs)); index = tx_pool->free_map[tx_pool->consumer_index]; - offset = index * adapter->req_mtu; - dst = tx_pool->long_term_buff.buff + offset; - memset(dst, 0, adapter->req_mtu); - skb_copy_from_linear_data(skb, dst, skb->len); - data_dma_addr = tx_pool->long_term_buff.addr + offset; + + if (skb_is_gso(skb)) { + offset = tx_pool->tso_index * IBMVNIC_TSO_BUF_SZ; + dst = tx_pool->tso_ltb.buff + offset; + memset(dst, 0, IBMVNIC_TSO_BUF_SZ); + data_dma_addr = tx_pool->tso_ltb.addr + offset; + tx_pool->tso_index++; + if (tx_pool->tso_index == IBMVNIC_TSO_BUFS) + tx_pool->tso_index = 0; + } else { + offset = index * adapter->req_mtu; + dst = tx_pool->long_term_buff.buff + offset; + memset(dst, 0, adapter->req_mtu); + data_dma_addr = tx_pool->long_term_buff.addr + offset; + } + + if (skb_shinfo(skb)->nr_frags) { + int cur, i; + + /* Copy the head */ + skb_copy_from_linear_data(skb, dst, skb_headlen(skb)); + cur = skb_headlen(skb); + + /* Copy the frags */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + memcpy(dst + cur, + page_address(skb_frag_page(frag)) + + frag->page_offset, skb_frag_size(frag)); + cur += skb_frag_size(frag); + } + } else { + skb_copy_from_linear_data(skb, dst, skb->len); + } tx_pool->consumer_index = (tx_pool->consumer_index + 1) % @@ -1225,7 +1351,10 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.n_sge = 1; tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED; tx_crq.v1.correlator = cpu_to_be32(index); - tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id); + if (skb_is_gso(skb)) + tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->tso_ltb.map_id); + else + tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id); tx_crq.v1.sge_len = cpu_to_be32(skb->len); tx_crq.v1.ioba = cpu_to_be64(data_dma_addr); @@ -1250,6 +1379,11 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD; hdrs += 2; } + if (skb_is_gso(skb)) { + tx_crq.v1.flags1 |= IBMVNIC_TX_LSO; + tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + hdrs += 2; + } /* determine if l2/3/4 headers are sent to firmware */ if ((*hdrs >> 7) & 1 && (skb->protocol == htons(ETH_P_IP) || @@ -1371,7 +1505,7 @@ static void ibmvnic_set_multi(struct net_device *netdev) } } -static int ibmvnic_set_mac(struct net_device *netdev, void *p) +static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); struct sockaddr *addr = p; @@ -1389,6 +1523,22 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) return 0; } +static int ibmvnic_set_mac(struct net_device *netdev, void *p) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + struct sockaddr *addr = p; + + if (adapter->state != VNIC_OPEN) { + memcpy(&adapter->desired.mac, addr, sizeof(struct sockaddr)); + adapter->mac_change_pending = true; + return 0; + } + + __ibmvnic_set_mac(netdev, addr); + + return 0; +} + /** * do_reset returns zero if we are able to keep processing reset events, or * non-zero if we hit a fatal error and must halt. @@ -1415,6 +1565,13 @@ static int do_reset(struct ibmvnic_adapter *adapter, if (rc) return rc; + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM || + adapter->wait_for_reset) { + release_resources(adapter); + release_sub_crqs(adapter); + release_crq_queue(adapter); + } + if (adapter->reset_reason != VNIC_RESET_NON_FATAL) { /* remove the closed state so when we call open it appears * we are coming from the probed state. @@ -1423,7 +1580,7 @@ static int do_reset(struct ibmvnic_adapter *adapter, rc = ibmvnic_init(adapter); if (rc) - return 0; + return IBMVNIC_INIT_FAILED; /* If the adapter was in PROBE state prior to the reset, * exit here. @@ -1437,16 +1594,23 @@ static int do_reset(struct ibmvnic_adapter *adapter, return 0; } - rc = reset_tx_pools(adapter); - if (rc) - return rc; + if (adapter->reset_reason == VNIC_RESET_CHANGE_PARAM || + adapter->wait_for_reset) { + rc = init_resources(adapter); + if (rc) + return rc; + } else { + rc = reset_tx_pools(adapter); + if (rc) + return rc; - rc = reset_rx_pools(adapter); - if (rc) - return rc; + rc = reset_rx_pools(adapter); + if (rc) + return rc; - if (reset_state == VNIC_CLOSED) - return 0; + if (reset_state == VNIC_CLOSED) + return 0; + } } rc = __ibmvnic_open(netdev); @@ -1506,7 +1670,7 @@ static void __ibmvnic_reset(struct work_struct *work) struct ibmvnic_adapter *adapter; struct net_device *netdev; u32 reset_state; - int rc; + int rc = 0; adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); netdev = adapter->netdev; @@ -1519,12 +1683,18 @@ static void __ibmvnic_reset(struct work_struct *work) while (rwi) { rc = do_reset(adapter, rwi, reset_state); kfree(rwi); - if (rc) + if (rc && rc != IBMVNIC_INIT_FAILED) break; rwi = get_next_rwi(adapter); } + if (adapter->wait_for_reset) { + adapter->wait_for_reset = false; + adapter->reset_done_rc = rc; + complete(&adapter->reset_done); + } + if (rc) { netdev_dbg(adapter->netdev, "Reset failed\n"); free_all_rwi(adapter); @@ -1704,9 +1874,42 @@ static void ibmvnic_netpoll_controller(struct net_device *dev) } #endif +static int wait_for_reset(struct ibmvnic_adapter *adapter) +{ + adapter->fallback.mtu = adapter->req_mtu; + adapter->fallback.rx_queues = adapter->req_rx_queues; + adapter->fallback.tx_queues = adapter->req_tx_queues; + adapter->fallback.rx_entries = adapter->req_rx_add_entries_per_subcrq; + adapter->fallback.tx_entries = adapter->req_tx_entries_per_subcrq; + + init_completion(&adapter->reset_done); + ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM); + adapter->wait_for_reset = true; + wait_for_completion(&adapter->reset_done); + + if (adapter->reset_done_rc) { + adapter->desired.mtu = adapter->fallback.mtu; + adapter->desired.rx_queues = adapter->fallback.rx_queues; + adapter->desired.tx_queues = adapter->fallback.tx_queues; + adapter->desired.rx_entries = adapter->fallback.rx_entries; + adapter->desired.tx_entries = adapter->fallback.tx_entries; + + init_completion(&adapter->reset_done); + ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM); + wait_for_completion(&adapter->reset_done); + } + adapter->wait_for_reset = false; + + return adapter->reset_done_rc; +} + static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu) { - return -EOPNOTSUPP; + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + adapter->desired.mtu = new_mtu + ETH_HLEN; + + return wait_for_reset(adapter); } static const struct net_device_ops ibmvnic_netdev_ops = { @@ -1748,11 +1951,15 @@ static int ibmvnic_get_link_ksettings(struct net_device *netdev, return 0; } -static void ibmvnic_get_drvinfo(struct net_device *dev, +static void ibmvnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + strlcpy(info->driver, ibmvnic_driver_name, sizeof(info->driver)); strlcpy(info->version, IBMVNIC_DRIVER_VERSION, sizeof(info->version)); + strlcpy(info->fw_version, adapter->fw_version, + sizeof(info->fw_version)); } static u32 ibmvnic_get_msglevel(struct net_device *netdev) @@ -1794,6 +2001,27 @@ static void ibmvnic_get_ringparam(struct net_device *netdev, ring->rx_jumbo_pending = 0; } +static int ibmvnic_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + if (ring->rx_pending > adapter->max_rx_add_entries_per_subcrq || + ring->tx_pending > adapter->max_tx_entries_per_subcrq) { + netdev_err(netdev, "Invalid request.\n"); + netdev_err(netdev, "Max tx buffers = %llu\n", + adapter->max_rx_add_entries_per_subcrq); + netdev_err(netdev, "Max rx buffers = %llu\n", + adapter->max_tx_entries_per_subcrq); + return -EINVAL; + } + + adapter->desired.rx_entries = ring->rx_pending; + adapter->desired.tx_entries = ring->tx_pending; + + return wait_for_reset(adapter); +} + static void ibmvnic_get_channels(struct net_device *netdev, struct ethtool_channels *channels) { @@ -1809,6 +2037,17 @@ static void ibmvnic_get_channels(struct net_device *netdev, channels->combined_count = 0; } +static int ibmvnic_set_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ibmvnic_adapter *adapter = netdev_priv(netdev); + + adapter->desired.rx_queues = channels->rx_count; + adapter->desired.tx_queues = channels->tx_count; + + return wait_for_reset(adapter); +} + static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct ibmvnic_adapter *adapter = netdev_priv(dev); @@ -1905,7 +2144,9 @@ static const struct ethtool_ops ibmvnic_ethtool_ops = { .set_msglevel = ibmvnic_set_msglevel, .get_link = ibmvnic_get_link, .get_ringparam = ibmvnic_get_ringparam, + .set_ringparam = ibmvnic_set_ringparam, .get_channels = ibmvnic_get_channels, + .set_channels = ibmvnic_set_channels, .get_strings = ibmvnic_get_strings, .get_sset_count = ibmvnic_get_sset_count, .get_ethtool_stats = ibmvnic_get_ethtool_stats, @@ -2371,6 +2612,7 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) { struct device *dev = &adapter->vdev->dev; union ibmvnic_crq crq; + int max_entries; if (!retry) { /* Sub-CRQ entries are 32 byte long */ @@ -2382,21 +2624,60 @@ static void ibmvnic_send_req_caps(struct ibmvnic_adapter *adapter, int retry) return; } - /* Get the minimum between the queried max and the entries - * that fit in our PAGE_SIZE - */ - adapter->req_tx_entries_per_subcrq = - adapter->max_tx_entries_per_subcrq > entries_page ? - entries_page : adapter->max_tx_entries_per_subcrq; - adapter->req_rx_add_entries_per_subcrq = - adapter->max_rx_add_entries_per_subcrq > entries_page ? - entries_page : adapter->max_rx_add_entries_per_subcrq; - - adapter->req_tx_queues = adapter->opt_tx_comp_sub_queues; - adapter->req_rx_queues = adapter->opt_rx_comp_queues; - adapter->req_rx_add_queues = adapter->max_rx_add_queues; + if (adapter->desired.mtu) + adapter->req_mtu = adapter->desired.mtu; + else + adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN; + + if (!adapter->desired.tx_entries) + adapter->desired.tx_entries = + adapter->max_tx_entries_per_subcrq; + if (!adapter->desired.rx_entries) + adapter->desired.rx_entries = + adapter->max_rx_add_entries_per_subcrq; - adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN; + max_entries = IBMVNIC_MAX_LTB_SIZE / + (adapter->req_mtu + IBMVNIC_BUFFER_HLEN); + + if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * + adapter->desired.tx_entries > IBMVNIC_MAX_LTB_SIZE) { + adapter->desired.tx_entries = max_entries; + } + + if ((adapter->req_mtu + IBMVNIC_BUFFER_HLEN) * + adapter->desired.rx_entries > IBMVNIC_MAX_LTB_SIZE) { + adapter->desired.rx_entries = max_entries; + } + + if (adapter->desired.tx_entries) + adapter->req_tx_entries_per_subcrq = + adapter->desired.tx_entries; + else + adapter->req_tx_entries_per_subcrq = + adapter->max_tx_entries_per_subcrq; + + if (adapter->desired.rx_entries) + adapter->req_rx_add_entries_per_subcrq = + adapter->desired.rx_entries; + else + adapter->req_rx_add_entries_per_subcrq = + adapter->max_rx_add_entries_per_subcrq; + + if (adapter->desired.tx_queues) + adapter->req_tx_queues = + adapter->desired.tx_queues; + else + adapter->req_tx_queues = + adapter->opt_tx_comp_sub_queues; + + if (adapter->desired.rx_queues) + adapter->req_rx_queues = + adapter->desired.rx_queues; + else + adapter->req_rx_queues = + adapter->opt_rx_comp_queues; + + adapter->req_rx_add_queues = adapter->max_rx_add_queues; } memset(&crq, 0, sizeof(crq)); @@ -2609,6 +2890,55 @@ static int send_version_xchg(struct ibmvnic_adapter *adapter) return ibmvnic_send_crq(adapter, &crq); } +struct vnic_login_client_data { + u8 type; + __be16 len; + char name; +} __packed; + +static int vnic_client_data_len(struct ibmvnic_adapter *adapter) +{ + int len; + + /* Calculate the amount of buffer space needed for the + * vnic client data in the login buffer. There are four entries, + * OS name, LPAR name, device name, and a null last entry. + */ + len = 4 * sizeof(struct vnic_login_client_data); + len += 6; /* "Linux" plus NULL */ + len += strlen(utsname()->nodename) + 1; + len += strlen(adapter->netdev->name) + 1; + + return len; +} + +static void vnic_add_client_data(struct ibmvnic_adapter *adapter, + struct vnic_login_client_data *vlcd) +{ + const char *os_name = "Linux"; + int len; + + /* Type 1 - LPAR OS */ + vlcd->type = 1; + len = strlen(os_name) + 1; + vlcd->len = cpu_to_be16(len); + strncpy(&vlcd->name, os_name, len); + vlcd = (struct vnic_login_client_data *)((char *)&vlcd->name + len); + + /* Type 2 - LPAR name */ + vlcd->type = 2; + len = strlen(utsname()->nodename) + 1; + vlcd->len = cpu_to_be16(len); + strncpy(&vlcd->name, utsname()->nodename, len); + vlcd = (struct vnic_login_client_data *)((char *)&vlcd->name + len); + + /* Type 3 - device name */ + vlcd->type = 3; + len = strlen(adapter->netdev->name) + 1; + vlcd->len = cpu_to_be16(len); + strncpy(&vlcd->name, adapter->netdev->name, len); +} + static void send_login(struct ibmvnic_adapter *adapter) { struct ibmvnic_login_rsp_buffer *login_rsp_buffer; @@ -2621,13 +2951,18 @@ static void send_login(struct ibmvnic_adapter *adapter) size_t buffer_size; __be64 *tx_list_p; __be64 *rx_list_p; + int client_data_len; + struct vnic_login_client_data *vlcd; int i; + client_data_len = vnic_client_data_len(adapter); + buffer_size = sizeof(struct ibmvnic_login_buffer) + - sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues); + sizeof(u64) * (adapter->req_tx_queues + adapter->req_rx_queues) + + client_data_len; - login_buffer = kmalloc(buffer_size, GFP_ATOMIC); + login_buffer = kzalloc(buffer_size, GFP_ATOMIC); if (!login_buffer) goto buf_alloc_failed; @@ -2694,6 +3029,15 @@ static void send_login(struct ibmvnic_adapter *adapter) } } + /* Insert vNIC login client data */ + vlcd = (struct vnic_login_client_data *) + ((char *)rx_list_p + (sizeof(u64) * adapter->req_rx_queues)); + login_buffer->client_data_offset = + cpu_to_be32((char *)vlcd - (char *)login_buffer); + login_buffer->client_data_len = cpu_to_be32(client_data_len); + + vnic_add_client_data(adapter, vlcd); + netdev_dbg(adapter->netdev, "Login Buffer:\n"); for (i = 0; i < (adapter->login_buf_sz - 1) / 8 + 1; i++) { netdev_dbg(adapter->netdev, "%016lx\n", @@ -2872,6 +3216,73 @@ static void send_cap_queries(struct ibmvnic_adapter *adapter) ibmvnic_send_crq(adapter, &crq); } +static void handle_vpd_size_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + + if (crq->get_vpd_size_rsp.rc.code) { + dev_err(dev, "Error retrieving VPD size, rc=%x\n", + crq->get_vpd_size_rsp.rc.code); + complete(&adapter->fw_done); + return; + } + + adapter->vpd->len = be64_to_cpu(crq->get_vpd_size_rsp.len); + complete(&adapter->fw_done); +} + +static void handle_vpd_rsp(union ibmvnic_crq *crq, + struct ibmvnic_adapter *adapter) +{ + struct device *dev = &adapter->vdev->dev; + unsigned char *substr = NULL, *ptr = NULL; + u8 fw_level_len = 0; + + memset(adapter->fw_version, 0, 32); + + dma_unmap_single(dev, adapter->vpd->dma_addr, adapter->vpd->len, + DMA_FROM_DEVICE); + + if (crq->get_vpd_rsp.rc.code) { + dev_err(dev, "Error retrieving VPD from device, rc=%x\n", + crq->get_vpd_rsp.rc.code); + goto complete; + } + + /* get the position of the firmware version info + * located after the ASCII 'RM' substring in the buffer + */ + substr = strnstr(adapter->vpd->buff, "RM", adapter->vpd->len); + if (!substr) { + dev_info(dev, "No FW level provided by VPD\n"); + goto complete; + } + + /* get length of firmware level ASCII substring */ + if ((substr + 2) < (adapter->vpd->buff + adapter->vpd->len)) { + fw_level_len = *(substr + 2); + } else { + dev_info(dev, "Length of FW substr extrapolated VDP buff\n"); + goto complete; + } + + /* copy firmware version string from vpd into adapter */ + if ((substr + 3 + fw_level_len) < + (adapter->vpd->buff + adapter->vpd->len)) { + ptr = strncpy((char *)adapter->fw_version, + substr + 3, fw_level_len); + + if (!ptr) + dev_err(dev, "Failed to isolate FW level string\n"); + } else { + dev_info(dev, "FW substr extrapolated VPD buff\n"); + } + +complete: + complete(&adapter->fw_done); +} + static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; @@ -2940,14 +3351,14 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) adapter->ip_offload_ctrl.udp_ipv4_chksum = buf->udp_ipv4_chksum; adapter->ip_offload_ctrl.tcp_ipv6_chksum = buf->tcp_ipv6_chksum; adapter->ip_offload_ctrl.udp_ipv6_chksum = buf->udp_ipv6_chksum; + adapter->ip_offload_ctrl.large_tx_ipv4 = buf->large_tx_ipv4; + adapter->ip_offload_ctrl.large_tx_ipv6 = buf->large_tx_ipv6; - /* large_tx/rx disabled for now, additional features needed */ - adapter->ip_offload_ctrl.large_tx_ipv4 = 0; - adapter->ip_offload_ctrl.large_tx_ipv6 = 0; + /* large_rx disabled for now, additional features needed */ adapter->ip_offload_ctrl.large_rx_ipv4 = 0; adapter->ip_offload_ctrl.large_rx_ipv6 = 0; - adapter->netdev->features = NETIF_F_GSO; + adapter->netdev->features = NETIF_F_SG | NETIF_F_GSO; if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum) adapter->netdev->features |= NETIF_F_IP_CSUM; @@ -2959,6 +3370,13 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter) (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) adapter->netdev->features |= NETIF_F_RXCSUM; + if (buf->large_tx_ipv4) + adapter->netdev->features |= NETIF_F_TSO; + if (buf->large_tx_ipv6) + adapter->netdev->features |= NETIF_F_TSO6; + + adapter->netdev->hw_features |= adapter->netdev->features; + memset(&crq, 0, sizeof(crq)); crq.control_ip_offload.first = IBMVNIC_CRQ_CMD; crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD; @@ -3210,6 +3628,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; + struct net_device *netdev = adapter->netdev; struct ibmvnic_login_rsp_buffer *login_rsp = adapter->login_rsp_buf; struct ibmvnic_login_buffer *login = adapter->login_buf; int i; @@ -3229,6 +3648,8 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, return 0; } + netdev->mtu = adapter->req_mtu - ETH_HLEN; + netdev_dbg(adapter->netdev, "Login Response Buffer:\n"); for (i = 0; i < (adapter->login_rsp_buf_sz - 1) / 8 + 1; i++) { netdev_dbg(adapter->netdev, "%016lx\n", @@ -3593,6 +4014,12 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, netdev_dbg(netdev, "Got Collect firmware trace Response\n"); complete(&adapter->fw_done); break; + case GET_VPD_SIZE_RSP: + handle_vpd_size_rsp(crq, adapter); + break; + case GET_VPD_RSP: + handle_vpd_rsp(crq, adapter); + break; default: netdev_err(netdev, "Got an invalid cmd type 0x%02x\n", gen_crq->cmd); @@ -3784,7 +4211,7 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter) unsigned long timeout = msecs_to_jiffies(30000); int rc; - if (adapter->resetting) { + if (adapter->resetting && !adapter->wait_for_reset) { rc = ibmvnic_reset_crq(adapter); if (!rc) rc = vio_enable_interrupts(adapter->vdev); @@ -3818,7 +4245,7 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter) return -1; } - if (adapter->resetting) + if (adapter->resetting && !adapter->wait_for_reset) rc = reset_sub_crq_queues(adapter); else rc = init_sub_crqs(adapter); @@ -3887,6 +4314,8 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) mutex_init(&adapter->rwi_lock); adapter->resetting = false; + adapter->mac_change_pending = false; + do { rc = ibmvnic_init(adapter); if (rc && rc != EAGAIN) @@ -3894,11 +4323,14 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) } while (rc == EAGAIN); netdev->mtu = adapter->req_mtu - ETH_HLEN; + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; rc = device_create_file(&dev->dev, &dev_attr_failover); if (rc) goto ibmvnic_init_fail; + netif_carrier_off(netdev); rc = register_netdev(netdev); if (rc) { dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); @@ -3907,6 +4339,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) dev_info(&dev->dev, "ibmvnic registered\n"); adapter->state = VNIC_PROBED; + + adapter->wait_for_reset = false; + return 0; ibmvnic_register_fail: diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index d02257ccc377..4487f1e2c266 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -30,6 +30,8 @@ #define IBMVNIC_DRIVER_VERSION "1.0.1" #define IBMVNIC_INVALID_MAP -1 #define IBMVNIC_STATS_TIMEOUT 1 +#define IBMVNIC_INIT_FAILED 2 + /* basic structures plus 100 2k buffers */ #define IBMVNIC_IO_ENTITLEMENT_DEFAULT 610305 @@ -39,6 +41,12 @@ #define IBMVNIC_BUFFS_PER_POOL 100 #define IBMVNIC_MAX_TX_QUEUES 5 +#define IBMVNIC_TSO_BUF_SZ 65536 +#define IBMVNIC_TSO_BUFS 64 + +#define IBMVNIC_MAX_LTB_SIZE ((1 << (MAX_ORDER - 1)) * PAGE_SIZE) +#define IBMVNIC_BUFFER_HLEN 500 + struct ibmvnic_login_buffer { __be32 len; __be32 version; @@ -49,6 +57,8 @@ struct ibmvnic_login_buffer { __be32 off_rxcomp_subcrqs; __be32 login_rsp_ioba; __be32 login_rsp_len; + __be32 client_data_offset; + __be32 client_data_len; } __packed __aligned(8); struct ibmvnic_login_rsp_buffer { @@ -550,6 +560,12 @@ struct ibmvnic_multicast_ctrl { struct ibmvnic_rc rc; } __packed __aligned(8); +struct ibmvnic_get_vpd_size { + u8 first; + u8 cmd; + u8 reserved[14]; +} __packed __aligned(8); + struct ibmvnic_get_vpd_size_rsp { u8 first; u8 cmd; @@ -567,6 +583,13 @@ struct ibmvnic_get_vpd { u8 reserved[4]; } __packed __aligned(8); +struct ibmvnic_get_vpd_rsp { + u8 first; + u8 cmd; + u8 reserved[10]; + struct ibmvnic_rc rc; +} __packed __aligned(8); + struct ibmvnic_acl_change_indication { u8 first; u8 cmd; @@ -692,10 +715,10 @@ union ibmvnic_crq { struct ibmvnic_change_mac_addr change_mac_addr_rsp; struct ibmvnic_multicast_ctrl multicast_ctrl; struct ibmvnic_multicast_ctrl multicast_ctrl_rsp; - struct ibmvnic_generic_crq get_vpd_size; + struct ibmvnic_get_vpd_size get_vpd_size; struct ibmvnic_get_vpd_size_rsp get_vpd_size_rsp; struct ibmvnic_get_vpd get_vpd; - struct ibmvnic_generic_crq get_vpd_rsp; + struct ibmvnic_get_vpd_rsp get_vpd_rsp; struct ibmvnic_acl_change_indication acl_change_indication; struct ibmvnic_acl_query acl_query; struct ibmvnic_generic_crq acl_query_rsp; @@ -896,6 +919,8 @@ struct ibmvnic_tx_pool { wait_queue_head_t ibmvnic_tx_comp_q; struct task_struct *work_thread; struct ibmvnic_long_term_buff long_term_buff; + struct ibmvnic_long_term_buff tso_ltb; + int tso_index; }; struct ibmvnic_rx_buff { @@ -927,6 +952,12 @@ struct ibmvnic_error_buff { __be32 error_id; }; +struct ibmvnic_vpd { + unsigned char *buff; + dma_addr_t dma_addr; + u64 len; +}; + enum vnic_state {VNIC_PROBING = 1, VNIC_PROBED, VNIC_OPENING, @@ -940,13 +971,23 @@ enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1, VNIC_RESET_MOBILITY, VNIC_RESET_FATAL, VNIC_RESET_NON_FATAL, - VNIC_RESET_TIMEOUT}; + VNIC_RESET_TIMEOUT, + VNIC_RESET_CHANGE_PARAM}; struct ibmvnic_rwi { enum ibmvnic_reset_reason reset_reason; struct list_head list; }; +struct ibmvnic_tunables { + u64 rx_queues; + u64 tx_queues; + u64 rx_entries; + u64 tx_entries; + u64 mtu; + struct sockaddr mac; +}; + struct ibmvnic_adapter { struct vio_dev *vdev; struct net_device *netdev; @@ -958,6 +999,10 @@ struct ibmvnic_adapter { dma_addr_t ip_offload_ctrl_tok; u32 msg_enable; + /* Vital Product Data (VPD) */ + struct ibmvnic_vpd *vpd; + char fw_version[32]; + /* Statistics */ struct ibmvnic_statistics stats; dma_addr_t stats_token; @@ -1007,6 +1052,10 @@ struct ibmvnic_adapter { struct completion fw_done; int fw_done_rc; + struct completion reset_done; + int reset_done_rc; + bool wait_for_reset; + /* partner capabilities */ u64 min_tx_queues; u64 min_rx_queues; @@ -1051,4 +1100,9 @@ struct ibmvnic_adapter { struct work_struct ibmvnic_reset; bool resetting; bool napi_enabled, from_passive_init; + + bool mac_change_pending; + + struct ibmvnic_tunables desired; + struct ibmvnic_tunables fallback; }; diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 4d10270ddf8f..44b3937f7e81 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1710,9 +1710,9 @@ static void e100_adjust_adaptive_ifs(struct nic *nic, int speed, int duplex) } } -static void e100_watchdog(unsigned long data) +static void e100_watchdog(struct timer_list *t) { - struct nic *nic = (struct nic *)data; + struct nic *nic = from_timer(nic, t, watchdog); struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; u32 speed; @@ -1910,11 +1910,10 @@ static int e100_alloc_cbs(struct nic *nic) nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean = NULL; nic->cbs_avail = 0; - nic->cbs = pci_pool_alloc(nic->cbs_pool, GFP_KERNEL, - &nic->cbs_dma_addr); + nic->cbs = pci_pool_zalloc(nic->cbs_pool, GFP_KERNEL, + &nic->cbs_dma_addr); if (!nic->cbs) return -ENOMEM; - memset(nic->cbs, 0, count * sizeof(struct cb)); for (cb = nic->cbs, i = 0; i < count; cb++, i++) { cb->next = (i + 1 < count) ? cb + 1 : nic->cbs; @@ -2921,7 +2920,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); - setup_timer(&nic->watchdog, e100_watchdog, (unsigned long)nic); + timer_setup(&nic->watchdog, e100_watchdog, 0); INIT_WORK(&nic->tx_timeout_task, e100_tx_timeout_task); diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index 0641c0098738..afb7ebe20b24 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -398,6 +398,7 @@ #define E1000_ICR_LSC 0x00000004 /* Link Status Change */ #define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */ #define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ +#define E1000_ICR_RXO 0x00000040 /* Receiver Overrun */ #define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ #define E1000_ICR_ECCER 0x00400000 /* Uncorrectable ECC Error */ /* If this bit asserted, the driver should claim the interrupt */ diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 98e68888abb1..2311b31bdcac 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -94,10 +94,6 @@ struct e1000_info; */ #define E1000_CHECK_RESET_COUNT 25 -#define DEFAULT_RDTR 0 -#define DEFAULT_RADV 8 -#define BURST_RDTR 0x20 -#define BURST_RADV 0x20 #define PCICFG_DESC_RING_STATUS 0xe4 #define FLUSH_DESC_REQUIRED 0x100 diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index b322011ec282..f457c5703d0c 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -410,6 +410,9 @@ void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw) * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. + * + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link + * up). **/ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) { @@ -423,7 +426,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 0; + return 1; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -461,10 +464,12 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) + if (ret_val) { e_dbg("Error configuring flow control\n"); + return ret_val; + } - return ret_val; + return 1; } /** diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 327dfe5bedc0..f2f49239b015 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1071,7 +1071,8 @@ next_desc: } static void e1000_put_txbuf(struct e1000_ring *tx_ring, - struct e1000_buffer *buffer_info) + struct e1000_buffer *buffer_info, + bool drop) { struct e1000_adapter *adapter = tx_ring->adapter; @@ -1085,7 +1086,10 @@ static void e1000_put_txbuf(struct e1000_ring *tx_ring, buffer_info->dma = 0; } if (buffer_info->skb) { - dev_kfree_skb_any(buffer_info->skb); + if (drop) + dev_kfree_skb_any(buffer_info->skb); + else + dev_consume_skb_any(buffer_info->skb); buffer_info->skb = NULL; } buffer_info->time_stamp = 0; @@ -1199,7 +1203,7 @@ static void e1000e_tx_hwtstamp_work(struct work_struct *work) wmb(); /* force write prior to skb_tstamp_tx */ skb_tstamp_tx(skb, &shhwtstamps); - dev_kfree_skb_any(skb); + dev_consume_skb_any(skb); } else if (time_after(jiffies, adapter->tx_hwtstamp_start + adapter->tx_timeout_factor * HZ)) { dev_kfree_skb_any(adapter->tx_hwtstamp_skb); @@ -1254,7 +1258,7 @@ static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring) } } - e1000_put_txbuf(tx_ring, buffer_info); + e1000_put_txbuf(tx_ring, buffer_info, false); tx_desc->upper.data = 0; i++; @@ -1910,14 +1914,30 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) struct net_device *netdev = data; struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + u32 icr; + bool enable = true; + + icr = er32(ICR); + if (icr & E1000_ICR_RXO) { + ew32(ICR, E1000_ICR_RXO); + enable = false; + /* napi poll will re-enable Other, make sure it runs */ + if (napi_schedule_prep(&adapter->napi)) { + adapter->total_rx_bytes = 0; + adapter->total_rx_packets = 0; + __napi_schedule(&adapter->napi); + } + } + if (icr & E1000_ICR_LSC) { + ew32(ICR, E1000_ICR_LSC); + hw->mac.get_link_status = true; + /* guard against interrupt when we're going down */ + if (!test_bit(__E1000_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } - hw->mac.get_link_status = true; - - /* guard against interrupt when we're going down */ - if (!test_bit(__E1000_DOWN, &adapter->state)) { - mod_timer(&adapter->watchdog_timer, jiffies + 1); + if (enable && !test_bit(__E1000_DOWN, &adapter->state)) ew32(IMS, E1000_IMS_OTHER); - } return IRQ_HANDLED; } @@ -2421,7 +2441,7 @@ static void e1000_clean_tx_ring(struct e1000_ring *tx_ring) for (i = 0; i < tx_ring->count; i++) { buffer_info = &tx_ring->buffer_info[i]; - e1000_put_txbuf(tx_ring, buffer_info); + e1000_put_txbuf(tx_ring, buffer_info, false); } netdev_reset_queue(adapter->netdev); @@ -2687,7 +2707,8 @@ static int e1000e_poll(struct napi_struct *napi, int weight) napi_complete_done(napi, work_done); if (!test_bit(__E1000_DOWN, &adapter->state)) { if (adapter->msix_entries) - ew32(IMS, adapter->rx_ring->ims_val); + ew32(IMS, adapter->rx_ring->ims_val | + E1000_IMS_OTHER); else e1000_irq_enable(adapter); } @@ -3004,8 +3025,8 @@ static void e1000_configure_tx(struct e1000_adapter *adapter) hw->mac.ops.config_collision_dist(hw); - /* SPT and CNP Si errata workaround to avoid data corruption */ - if (hw->mac.type >= e1000_pch_spt) { + /* SPT and KBL Si errata workaround to avoid data corruption */ + if (hw->mac.type == e1000_pch_spt) { u32 reg_val; reg_val = er32(IOSFPC); @@ -3013,7 +3034,9 @@ static void e1000_configure_tx(struct e1000_adapter *adapter) ew32(IOSFPC, reg_val); reg_val = er32(TARC(0)); - reg_val |= E1000_TARC0_CB_MULTIQ_3_REQ; + /* SPT and KBL Si errata workaround to avoid Tx hang */ + reg_val &= ~BIT(28); + reg_val |= BIT(29); ew32(TARC(0), reg_val); } } @@ -3223,14 +3246,6 @@ static void e1000_configure_rx(struct e1000_adapter *adapter) */ ew32(RXDCTL(0), E1000_RXDCTL_DMA_BURST_ENABLE); ew32(RXDCTL(1), E1000_RXDCTL_DMA_BURST_ENABLE); - - /* override the delay timers for enabling bursting, only if - * the value was not set by the user via module options - */ - if (adapter->rx_int_delay == DEFAULT_RDTR) - adapter->rx_int_delay = BURST_RDTR; - if (adapter->rx_abs_int_delay == DEFAULT_RADV) - adapter->rx_abs_int_delay = BURST_RADV; } /* set the Receive Delay Timer Register */ @@ -4204,7 +4219,7 @@ static void e1000e_trigger_lsc(struct e1000_adapter *adapter) struct e1000_hw *hw = &adapter->hw; if (adapter->msix_entries) - ew32(ICS, E1000_ICS_OTHER); + ew32(ICS, E1000_ICS_LSC | E1000_ICS_OTHER); else ew32(ICS, E1000_ICS_LSC); } @@ -4808,9 +4823,9 @@ static void e1000e_update_phy_task(struct work_struct *work) * Need to wait a few seconds after link up to get diagnostic information from * the phy **/ -static void e1000_update_phy_info(unsigned long data) +static void e1000_update_phy_info(struct timer_list *t) { - struct e1000_adapter *adapter = (struct e1000_adapter *)data; + struct e1000_adapter *adapter = from_timer(adapter, t, phy_info_timer); if (test_bit(__E1000_DOWN, &adapter->state)) return; @@ -5074,14 +5089,14 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) /* get_link_status is set on LSC (link status) interrupt or * Rx sequence error interrupt. get_link_status will stay - * false until the check_for_link establishes link + * true until the check_for_link establishes link * for copper adapters ONLY */ switch (hw->phy.media_type) { case e1000_media_type_copper: if (hw->mac.get_link_status) { ret_val = hw->mac.ops.check_for_link(hw); - link_active = !hw->mac.get_link_status; + link_active = ret_val > 0; } else { link_active = true; } @@ -5092,14 +5107,14 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) break; case e1000_media_type_internal_serdes: ret_val = hw->mac.ops.check_for_link(hw); - link_active = adapter->hw.mac.serdes_has_link; + link_active = hw->mac.serdes_has_link; break; default: case e1000_media_type_unknown: break; } - if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) && + if ((ret_val == -E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) && (er32(CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) { /* See e1000_kmrn_lock_loss_workaround_ich8lan() */ e_info("Gigabit has been disabled, downgrading speed\n"); @@ -5144,9 +5159,9 @@ static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter) * e1000_watchdog - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ -static void e1000_watchdog(unsigned long data) +static void e1000_watchdog(struct timer_list *t) { - struct e1000_adapter *adapter = (struct e1000_adapter *)data; + struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer); /* Do the rest outside of interrupt context */ schedule_work(&adapter->watchdog_task); @@ -5614,7 +5629,7 @@ dma_error: i += tx_ring->count; i--; buffer_info = &tx_ring->buffer_info[i]; - e1000_put_txbuf(tx_ring, buffer_info); + e1000_put_txbuf(tx_ring, buffer_info, true); } return 0; @@ -7252,13 +7267,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_eeprom; } - init_timer(&adapter->watchdog_timer); - adapter->watchdog_timer.function = e1000_watchdog; - adapter->watchdog_timer.data = (unsigned long)adapter; - - init_timer(&adapter->phy_info_timer); - adapter->phy_info_timer.function = e1000_update_phy_info; - adapter->phy_info_timer.data = (unsigned long)adapter; + timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0); + timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0); INIT_WORK(&adapter->reset_task, e1000_reset_task); INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task); @@ -7411,7 +7421,7 @@ static void e1000_remove(struct pci_dev *pdev) if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) { cancel_work_sync(&adapter->tx_hwtstamp_work); if (adapter->tx_hwtstamp_skb) { - dev_kfree_skb_any(adapter->tx_hwtstamp_skb); + dev_consume_skb_any(adapter->tx_hwtstamp_skb); adapter->tx_hwtstamp_skb = NULL; } } diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c index 6d8c39abee16..47da51864543 100644 --- a/drivers/net/ethernet/intel/e1000e/param.c +++ b/drivers/net/ethernet/intel/e1000e/param.c @@ -73,17 +73,25 @@ E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay"); /* Receive Interrupt Delay in units of 1.024 microseconds * hardware will likely hang if you set this to anything but zero. * + * Burst variant is used as default if device has FLAG2_DMA_BURST. + * * Valid Range: 0-65535 */ E1000_PARAM(RxIntDelay, "Receive Interrupt Delay"); +#define DEFAULT_RDTR 0 +#define BURST_RDTR 0x20 #define MAX_RXDELAY 0xFFFF #define MIN_RXDELAY 0 /* Receive Absolute Interrupt Delay in units of 1.024 microseconds * + * Burst variant is used as default if device has FLAG2_DMA_BURST. + * * Valid Range: 0-65535 */ E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay"); +#define DEFAULT_RADV 8 +#define BURST_RADV 0x20 #define MAX_RXABSDELAY 0xFFFF #define MIN_RXABSDELAY 0 @@ -297,6 +305,9 @@ void e1000e_check_options(struct e1000_adapter *adapter) .max = MAX_RXDELAY } } }; + if (adapter->flags2 & FLAG2_DMA_BURST) + opt.def = BURST_RDTR; + if (num_RxIntDelay > bd) { adapter->rx_int_delay = RxIntDelay[bd]; e1000_validate_option(&adapter->rx_int_delay, &opt, @@ -307,7 +318,7 @@ void e1000e_check_options(struct e1000_adapter *adapter) } /* Receive Absolute Interrupt Delay */ { - static const struct e1000_option opt = { + static struct e1000_option opt = { .type = range_option, .name = "Receive Absolute Interrupt Delay", .err = "using default of " @@ -317,6 +328,9 @@ void e1000e_check_options(struct e1000_adapter *adapter) .max = MAX_RXABSDELAY } } }; + if (adapter->flags2 & FLAG2_DMA_BURST) + opt.def = BURST_RADV; + if (num_RxAbsIntDelay > bd) { adapter->rx_abs_int_delay = RxAbsIntDelay[bd]; e1000_validate_option(&adapter->rx_abs_int_delay, &opt, diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index d78d47b41a71..86ff0969efb6 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -1744,6 +1744,7 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, s32 ret_val = 0; u16 i, phy_status; + *success = false; for (i = 0; i < iterations; i++) { /* Some PHYs require the MII_BMSR register to be read * twice due to the link bit being sticky. No harm doing @@ -1763,16 +1764,16 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, ret_val = e1e_rphy(hw, MII_BMSR, &phy_status); if (ret_val) break; - if (phy_status & BMSR_LSTATUS) + if (phy_status & BMSR_LSTATUS) { + *success = true; break; + } if (usec_interval >= 1000) msleep(usec_interval / 1000); else udelay(usec_interval); } - *success = (i < iterations); - return ret_val; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 689c413b7782..46973fb234c5 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -248,6 +248,29 @@ struct fm10k_udp_port { __be16 port; }; +enum fm10k_macvlan_request_type { + FM10K_UC_MAC_REQUEST, + FM10K_MC_MAC_REQUEST, + FM10K_VLAN_REQUEST +}; + +struct fm10k_macvlan_request { + enum fm10k_macvlan_request_type type; + struct list_head list; + union { + struct fm10k_mac_request { + u8 addr[ETH_ALEN]; + u16 glort; + u16 vid; + } mac; + struct fm10k_vlan_request { + u32 vid; + u8 vsi; + } vlan; + }; + bool set; +}; + /* one work queue for entire driver */ extern struct workqueue_struct *fm10k_workqueue; @@ -270,11 +293,15 @@ enum fm10k_flags_t { enum fm10k_state_t { __FM10K_RESETTING, + __FM10K_RESET_DETACHED, + __FM10K_RESET_SUSPENDED, __FM10K_DOWN, __FM10K_SERVICE_SCHED, __FM10K_SERVICE_REQUEST, __FM10K_SERVICE_DISABLE, - __FM10K_MBX_LOCK, + __FM10K_MACVLAN_SCHED, + __FM10K_MACVLAN_REQUEST, + __FM10K_MACVLAN_DISABLE, __FM10K_LINK_DOWN, __FM10K_UPDATING_STATS, /* This value must be last and determines the BITMAP size */ @@ -344,6 +371,8 @@ struct fm10k_intfc { struct fm10k_hw_stats stats; struct fm10k_hw hw; + /* Mailbox lock */ + spinlock_t mbx_lock; u32 __iomem *uc_addr; u32 __iomem *sw_addr; u16 msg_enable; @@ -365,6 +394,12 @@ struct fm10k_intfc { struct list_head vxlan_port; struct list_head geneve_port; + /* MAC/VLAN update queue */ + struct list_head macvlan_requests; + struct delayed_work macvlan_task; + /* MAC/VLAN update queue lock */ + spinlock_t macvlan_lock; + #ifdef CONFIG_DEBUG_FS struct dentry *dbg_intfc; #endif /* CONFIG_DEBUG_FS */ @@ -384,23 +419,17 @@ struct fm10k_intfc { static inline void fm10k_mbx_lock(struct fm10k_intfc *interface) { - /* busy loop if we cannot obtain the lock as some calls - * such as ndo_set_rx_mode may be made in atomic context - */ - while (test_and_set_bit(__FM10K_MBX_LOCK, interface->state)) - udelay(20); + spin_lock(&interface->mbx_lock); } static inline void fm10k_mbx_unlock(struct fm10k_intfc *interface) { - /* flush memory to make sure state is correct */ - smp_mb__before_atomic(); - clear_bit(__FM10K_MBX_LOCK, interface->state); + spin_unlock(&interface->mbx_lock); } static inline int fm10k_mbx_trylock(struct fm10k_intfc *interface) { - return !test_and_set_bit(__FM10K_MBX_LOCK, interface->state); + return spin_trylock(&interface->mbx_lock); } /* fm10k_test_staterr - test bits in Rx descriptor status and error fields */ @@ -490,6 +519,7 @@ void fm10k_up(struct fm10k_intfc *interface); void fm10k_down(struct fm10k_intfc *interface); void fm10k_update_stats(struct fm10k_intfc *interface); void fm10k_service_event_schedule(struct fm10k_intfc *interface); +void fm10k_macvlan_schedule(struct fm10k_intfc *interface); void fm10k_update_rx_drop_en(struct fm10k_intfc *interface); #ifdef CONFIG_NET_POLL_CONTROLLER void fm10k_netpoll(struct net_device *netdev); @@ -510,6 +540,12 @@ void fm10k_reset_rx_state(struct fm10k_intfc *); int fm10k_setup_tc(struct net_device *dev, u8 tc); int fm10k_open(struct net_device *netdev); int fm10k_close(struct net_device *netdev); +int fm10k_queue_vlan_request(struct fm10k_intfc *interface, u32 vid, + u8 vsi, bool set); +int fm10k_queue_mac_request(struct fm10k_intfc *interface, u16 glort, + const unsigned char *addr, u16 vid, bool set); +void fm10k_clear_macvlan_queue(struct fm10k_intfc *interface, + u16 glort, bool vlans); /* Ethtool */ void fm10k_set_ethtool_ops(struct net_device *dev); @@ -526,8 +562,8 @@ s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid); int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac); int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, u8 qos, __be16 vlan_proto); -int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate, - int unused); +int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, + int __always_unused min_rate, int max_rate); int fm10k_ndo_get_vf_config(struct net_device *netdev, int vf_idx, struct ifla_vf_info *ivi); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c index 62a6ad9b3eed..736a9f087bc9 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -517,8 +517,8 @@ s32 fm10k_get_host_state_generic(struct fm10k_hw *hw, bool *host_ready) goto out; } - /* verify Mailbox is still valid */ - if (!mbx->ops.tx_ready(mbx, FM10K_VFMBX_MSG_MTU)) + /* verify Mailbox is still open */ + if (mbx->state != FM10K_STATE_OPEN) goto out; /* interface cannot receive traffic without logical ports */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c index 5116fd043630..14df09e2d964 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c @@ -52,9 +52,9 @@ static void fm10k_dbg_desc_seq_stop(struct seq_file __always_unused *s, static void fm10k_dbg_desc_break(struct seq_file *s, int i) { while (i--) - seq_puts(s, "-"); + seq_putc(s, '-'); - seq_puts(s, "\n"); + seq_putc(s, '\n'); } static int fm10k_dbg_tx_desc_seq_show(struct seq_file *s, void *v) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 5f4dac0d36ef..ea3ab24265ee 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -35,10 +35,133 @@ static s32 fm10k_iov_msg_error(struct fm10k_hw *hw, u32 **results, return fm10k_tlv_msg_error(hw, results, mbx); } +/** + * fm10k_iov_msg_queue_mac_vlan - Message handler for MAC/VLAN request from VF + * @hw: Pointer to hardware structure + * @results: Pointer array to message, results[0] is pointer to message + * @mbx: Pointer to mailbox information structure + * + * This function is a custom handler for MAC/VLAN requests from the VF. The + * assumption is that it is acceptable to directly hand off the message from + * the VF to the PF's switch manager. However, we use a MAC/VLAN message + * queue to avoid overloading the mailbox when a large number of requests + * come in. + **/ +static s32 fm10k_iov_msg_queue_mac_vlan(struct fm10k_hw *hw, u32 **results, + struct fm10k_mbx_info *mbx) +{ + struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx; + struct fm10k_intfc *interface = hw->back; + u8 mac[ETH_ALEN]; + u32 *result; + int err = 0; + bool set; + u16 vlan; + u32 vid; + + /* we shouldn't be updating rules on a disabled interface */ + if (!FM10K_VF_FLAG_ENABLED(vf_info)) + err = FM10K_ERR_PARAM; + + if (!err && !!results[FM10K_MAC_VLAN_MSG_VLAN]) { + result = results[FM10K_MAC_VLAN_MSG_VLAN]; + + /* record VLAN id requested */ + err = fm10k_tlv_attr_get_u32(result, &vid); + if (err) + return err; + + set = !(vid & FM10K_VLAN_CLEAR); + vid &= ~FM10K_VLAN_CLEAR; + + /* if the length field has been set, this is a multi-bit + * update request. For multi-bit requests, simply disallow + * them when the pf_vid has been set. In this case, the PF + * should have already cleared the VLAN_TABLE, and if we + * allowed them, it could allow a rogue VF to receive traffic + * on a VLAN it was not assigned. In the single-bit case, we + * need to modify requests for VLAN 0 to use the default PF or + * SW vid when assigned. + */ + + if (vid >> 16) { + /* prevent multi-bit requests when PF has + * administratively set the VLAN for this VF + */ + if (vf_info->pf_vid) + return FM10K_ERR_PARAM; + } else { + err = fm10k_iov_select_vid(vf_info, (u16)vid); + if (err < 0) + return err; + + vid = err; + } + + /* update VSI info for VF in regards to VLAN table */ + err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set); + } + + if (!err && !!results[FM10K_MAC_VLAN_MSG_MAC]) { + result = results[FM10K_MAC_VLAN_MSG_MAC]; + + /* record unicast MAC address requested */ + err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan); + if (err) + return err; + + /* block attempts to set MAC for a locked device */ + if (is_valid_ether_addr(vf_info->mac) && + !ether_addr_equal(mac, vf_info->mac)) + return FM10K_ERR_PARAM; + + set = !(vlan & FM10K_VLAN_CLEAR); + vlan &= ~FM10K_VLAN_CLEAR; + + err = fm10k_iov_select_vid(vf_info, vlan); + if (err < 0) + return err; + + vlan = (u16)err; + + /* Add this request to the MAC/VLAN queue */ + err = fm10k_queue_mac_request(interface, vf_info->glort, + mac, vlan, set); + } + + if (!err && !!results[FM10K_MAC_VLAN_MSG_MULTICAST]) { + result = results[FM10K_MAC_VLAN_MSG_MULTICAST]; + + /* record multicast MAC address requested */ + err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan); + if (err) + return err; + + /* verify that the VF is allowed to request multicast */ + if (!(vf_info->vf_flags & FM10K_VF_FLAG_MULTI_ENABLED)) + return FM10K_ERR_PARAM; + + set = !(vlan & FM10K_VLAN_CLEAR); + vlan &= ~FM10K_VLAN_CLEAR; + + err = fm10k_iov_select_vid(vf_info, vlan); + if (err < 0) + return err; + + vlan = (u16)err; + + /* Add this request to the MAC/VLAN queue */ + err = fm10k_queue_mac_request(interface, vf_info->glort, + mac, vlan, set); + } + + return err; +} + static const struct fm10k_msg_data iov_mbx_data[] = { FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test), FM10K_VF_MSG_MSIX_HANDLER(fm10k_iov_msg_msix_pf), - FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_mac_vlan_pf), + FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_queue_mac_vlan), FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_iov_msg_lport_state_pf), FM10K_TLV_MSG_ERROR_HANDLER(fm10k_iov_msg_error), }; @@ -66,25 +189,21 @@ s32 fm10k_iov_event(struct fm10k_intfc *interface) goto read_unlock; /* read VFLRE to determine if any VFs have been reset */ - do { - vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(0)); - vflre <<= 32; - vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(1)); - vflre = (vflre << 32) | (vflre >> 32); - vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0)); + vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(1)); + vflre <<= 32; + vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0)); - i = iov_data->num_vfs; + i = iov_data->num_vfs; - for (vflre <<= 64 - i; vflre && i--; vflre += vflre) { - struct fm10k_vf_info *vf_info = &iov_data->vf_info[i]; + for (vflre <<= 64 - i; vflre && i--; vflre += vflre) { + struct fm10k_vf_info *vf_info = &iov_data->vf_info[i]; - if (vflre >= 0) - continue; + if (vflre >= 0) + continue; - hw->iov.ops.reset_resources(hw, vf_info); - vf_info->mbx.ops.connect(hw, &vf_info->mbx); - } - } while (i != iov_data->num_vfs); + hw->iov.ops.reset_resources(hw, vf_info); + vf_info->mbx.ops.connect(hw, &vf_info->mbx); + } read_unlock: rcu_read_unlock(); @@ -126,9 +245,14 @@ process_mbx: struct fm10k_mbx_info *mbx = &vf_info->mbx; u16 glort = vf_info->glort; + /* process the SM mailbox first to drain outgoing messages */ + hw->mbx.ops.process(hw, &hw->mbx); + /* verify port mapping is valid, if not reset port */ - if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) + if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) { hw->iov.ops.reset_lport(hw, vf_info); + fm10k_clear_macvlan_queue(interface, glort, false); + } /* reset VFs that have mailbox timed out */ if (!mbx->timeout) { @@ -140,6 +264,10 @@ process_mbx: if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) { /* keep track of how many times this occurs */ interface->hw_sm_mbx_full++; + + /* make sure we try again momentarily */ + fm10k_service_event_schedule(interface); + break; } @@ -187,6 +315,7 @@ void fm10k_iov_suspend(struct pci_dev *pdev) hw->iov.ops.reset_resources(hw, vf_info); hw->iov.ops.reset_lport(hw, vf_info); + fm10k_clear_macvlan_queue(interface, vf_info->glort, false); } } @@ -411,6 +540,8 @@ static inline void fm10k_reset_vf_info(struct fm10k_intfc *interface, /* disable LPORT for this VF which clears switch rules */ hw->iov.ops.reset_lport(hw, vf_info); + fm10k_clear_macvlan_queue(interface, vf_info->glort, false); + /* assign new MAC+VLAN for this VF */ hw->iov.ops.assign_default_mac_vlan(hw, vf_info); @@ -482,7 +613,7 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, } int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, - int __always_unused unused, int rate) + int __always_unused min_rate, int max_rate) { struct fm10k_intfc *interface = netdev_priv(netdev); struct fm10k_iov_data *iov_data = interface->iov_data; @@ -493,14 +624,15 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, return -EINVAL; /* rate limit cannot be less than 10Mbs or greater than link speed */ - if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX)) + if (max_rate && + (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX)) return -EINVAL; /* store values */ - iov_data->vf_info[vf_idx].rate = rate; + iov_data->vf_info[vf_idx].rate = max_rate; /* update hardware configuration */ - hw->iov.ops.configure_tc(hw, vf_idx, rate); + hw->iov.ops.configure_tc(hw, vf_idx, max_rate); return 0; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 9dffaba85ae6..dbd69310f263 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -28,7 +28,7 @@ #include "fm10k.h" -#define DRV_VERSION "0.21.7-k" +#define DRV_VERSION "0.22.1-k" #define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver" const char fm10k_driver_version[] = DRV_VERSION; char fm10k_driver_name[] = "fm10k"; @@ -806,9 +806,10 @@ static int fm10k_tso(struct fm10k_ring *tx_ring, tx_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); return 1; + err_vxlan: tx_ring->netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL; - if (!net_ratelimit()) + if (net_ratelimit()) netdev_err(tx_ring->netdev, "TSO requested for unsupported tunnel, disabling offload\n"); return -1; @@ -876,6 +877,7 @@ static void fm10k_tx_csum(struct fm10k_ring *tx_ring, case IPPROTO_GRE: if (skb->encapsulation) break; + /* fall through */ default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c index 334088a101c3..244d3ad58ca7 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -1586,7 +1586,7 @@ s32 fm10k_pfvf_mbx_init(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx, mbx->mbmem_reg = FM10K_MBMEM_VF(id, 0); break; } - /* fallthough */ + /* fall through */ default: return FM10K_MBX_ERR_NO_MBX; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index e69d49d91d67..adc62fb38c49 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -643,9 +643,13 @@ int fm10k_close(struct net_device *netdev) static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) { struct fm10k_intfc *interface = netdev_priv(dev); + int num_tx_queues = READ_ONCE(interface->num_tx_queues); unsigned int r_idx = skb->queue_mapping; int err; + if (!num_tx_queues) + return NETDEV_TX_BUSY; + if ((skb->protocol == htons(ETH_P_8021Q)) && !skb_vlan_tag_present(skb)) { /* FM10K only supports hardware tagging, any tags in frame @@ -698,8 +702,8 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) __skb_put(skb, pad_len); } - if (r_idx >= interface->num_tx_queues) - r_idx %= interface->num_tx_queues; + if (r_idx >= num_tx_queues) + r_idx %= num_tx_queues; err = fm10k_xmit_frame_ring(skb, interface->tx_ring[r_idx]); @@ -754,11 +758,132 @@ static bool fm10k_host_mbx_ready(struct fm10k_intfc *interface) return (hw->mac.type == fm10k_mac_vf || interface->host_ready); } +/** + * fm10k_queue_vlan_request - Queue a VLAN update request + * @interface: the fm10k interface structure + * @vid: the VLAN vid + * @vsi: VSI index number + * @set: whether to set or clear + * + * This function queues up a VLAN update. For VFs, this must be sent to the + * managing PF over the mailbox. For PFs, we'll use the same handling so that + * it's similar to the VF. This avoids storming the PF<->VF mailbox with too + * many VLAN updates during reset. + */ +int fm10k_queue_vlan_request(struct fm10k_intfc *interface, + u32 vid, u8 vsi, bool set) +{ + struct fm10k_macvlan_request *request; + unsigned long flags; + + /* This must be atomic since we may be called while the netdev + * addr_list_lock is held + */ + request = kzalloc(sizeof(*request), GFP_ATOMIC); + if (!request) + return -ENOMEM; + + request->type = FM10K_VLAN_REQUEST; + request->vlan.vid = vid; + request->vlan.vsi = vsi; + request->set = set; + + spin_lock_irqsave(&interface->macvlan_lock, flags); + list_add_tail(&request->list, &interface->macvlan_requests); + spin_unlock_irqrestore(&interface->macvlan_lock, flags); + + fm10k_macvlan_schedule(interface); + + return 0; +} + +/** + * fm10k_queue_mac_request - Queue a MAC update request + * @interface: the fm10k interface structure + * @glort: the target glort for this update + * @addr: the address to update + * @vid: the vid to update + * @sync: whether to add or remove + * + * This function queues up a MAC request for sending to the switch manager. + * A separate thread monitors the queue and sends updates to the switch + * manager. Return 0 on success, and negative error code on failure. + **/ +int fm10k_queue_mac_request(struct fm10k_intfc *interface, u16 glort, + const unsigned char *addr, u16 vid, bool set) +{ + struct fm10k_macvlan_request *request; + unsigned long flags; + + /* This must be atomic since we may be called while the netdev + * addr_list_lock is held + */ + request = kzalloc(sizeof(*request), GFP_ATOMIC); + if (!request) + return -ENOMEM; + + if (is_multicast_ether_addr(addr)) + request->type = FM10K_MC_MAC_REQUEST; + else + request->type = FM10K_UC_MAC_REQUEST; + + ether_addr_copy(request->mac.addr, addr); + request->mac.glort = glort; + request->mac.vid = vid; + request->set = set; + + spin_lock_irqsave(&interface->macvlan_lock, flags); + list_add_tail(&request->list, &interface->macvlan_requests); + spin_unlock_irqrestore(&interface->macvlan_lock, flags); + + fm10k_macvlan_schedule(interface); + + return 0; +} + +/** + * fm10k_clear_macvlan_queue - Cancel pending updates for a given glort + * @interface: the fm10k interface structure + * @glort: the target glort to clear + * @vlans: true to clear VLAN messages, false to ignore them + * + * Cancel any outstanding MAC/VLAN requests for a given glort. This is + * expected to be called when a logical port goes down. + **/ +void fm10k_clear_macvlan_queue(struct fm10k_intfc *interface, + u16 glort, bool vlans) + +{ + struct fm10k_macvlan_request *r, *tmp; + unsigned long flags; + + spin_lock_irqsave(&interface->macvlan_lock, flags); + + /* Free any outstanding MAC/VLAN requests for this interface */ + list_for_each_entry_safe(r, tmp, &interface->macvlan_requests, list) { + switch (r->type) { + case FM10K_MC_MAC_REQUEST: + case FM10K_UC_MAC_REQUEST: + /* Don't free requests for other interfaces */ + if (r->mac.glort != glort) + break; + /* fall through */ + case FM10K_VLAN_REQUEST: + if (vlans) { + list_del(&r->list); + kfree(r); + } + break; + } + } + + spin_unlock_irqrestore(&interface->macvlan_lock, flags); +} + static int fm10k_uc_vlan_unsync(struct net_device *netdev, const unsigned char *uc_addr) { struct fm10k_intfc *interface = netdev_priv(netdev); - struct fm10k_hw *hw = &interface->hw; u16 glort = interface->glort; u16 vid = interface->vid; bool set = !!(vid / VLAN_N_VID); @@ -767,10 +892,7 @@ static int fm10k_uc_vlan_unsync(struct net_device *netdev, /* drop any leading bits on the VLAN ID */ vid &= VLAN_N_VID - 1; - if (fm10k_host_mbx_ready(interface)) - err = hw->mac.ops.update_uc_addr(hw, glort, uc_addr, - vid, set, 0); - + err = fm10k_queue_mac_request(interface, glort, uc_addr, vid, set); if (err) return err; @@ -782,7 +904,6 @@ static int fm10k_mc_vlan_unsync(struct net_device *netdev, const unsigned char *mc_addr) { struct fm10k_intfc *interface = netdev_priv(netdev); - struct fm10k_hw *hw = &interface->hw; u16 glort = interface->glort; u16 vid = interface->vid; bool set = !!(vid / VLAN_N_VID); @@ -791,9 +912,7 @@ static int fm10k_mc_vlan_unsync(struct net_device *netdev, /* drop any leading bits on the VLAN ID */ vid &= VLAN_N_VID - 1; - if (fm10k_host_mbx_ready(interface)) - err = hw->mac.ops.update_mc_addr(hw, glort, mc_addr, vid, set); - + err = fm10k_queue_mac_request(interface, glort, mc_addr, vid, set); if (err) return err; @@ -851,18 +970,14 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set) /* only need to update the VLAN if not in promiscuous mode */ if (!(netdev->flags & IFF_PROMISC)) { - err = hw->mac.ops.update_vlan(hw, vid, 0, set); + err = fm10k_queue_vlan_request(interface, vid, 0, set); if (err) goto err_out; } - /* update our base MAC address if host's mailbox is ready */ - if (fm10k_host_mbx_ready(interface)) - err = hw->mac.ops.update_uc_addr(hw, interface->glort, - hw->mac.addr, vid, set, 0); - else - err = -EHOSTDOWN; - + /* Update our base MAC address */ + err = fm10k_queue_mac_request(interface, interface->glort, + hw->mac.addr, vid, set); if (err) goto err_out; @@ -906,7 +1021,6 @@ static u16 fm10k_find_next_vlan(struct fm10k_intfc *interface, u16 vid) static void fm10k_clear_unused_vlans(struct fm10k_intfc *interface) { - struct fm10k_hw *hw = &interface->hw; u32 vid, prev_vid; /* loop through and find any gaps in the table */ @@ -918,7 +1032,7 @@ static void fm10k_clear_unused_vlans(struct fm10k_intfc *interface) /* send request to clear multiple bits at a time */ prev_vid += (vid - prev_vid - 1) << FM10K_VLAN_LENGTH_SHIFT; - hw->mac.ops.update_vlan(hw, prev_vid, 0, false); + fm10k_queue_vlan_request(interface, prev_vid, 0, false); } } @@ -933,15 +1047,11 @@ static int __fm10k_uc_sync(struct net_device *dev, if (!is_valid_ether_addr(addr)) return -EADDRNOTAVAIL; - /* update table with current entries if host's mailbox is ready */ - if (!fm10k_host_mbx_ready(interface)) - return -EHOSTDOWN; - for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { - err = hw->mac.ops.update_uc_addr(hw, glort, addr, - vid, sync, 0); + err = fm10k_queue_mac_request(interface, glort, + addr, vid, sync); if (err) return err; } @@ -998,15 +1108,18 @@ static int __fm10k_mc_sync(struct net_device *dev, struct fm10k_intfc *interface = netdev_priv(dev); struct fm10k_hw *hw = &interface->hw; u16 vid, glort = interface->glort; + s32 err; - /* update table with current entries if host's mailbox is ready */ - if (!fm10k_host_mbx_ready(interface)) - return 0; + if (!is_multicast_ether_addr(addr)) + return -EADDRNOTAVAIL; for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { - hw->mac.ops.update_mc_addr(hw, glort, addr, vid, sync); + err = fm10k_queue_mac_request(interface, glort, + addr, vid, sync); + if (err) + return err; } return 0; @@ -1046,7 +1159,8 @@ static void fm10k_set_rx_mode(struct net_device *dev) if (interface->xcast_mode != xcast_mode) { /* update VLAN table */ if (xcast_mode == FM10K_XCAST_MODE_PROMISC) - hw->mac.ops.update_vlan(hw, FM10K_VLAN_ALL, 0, true); + fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, + 0, true); if (interface->xcast_mode == FM10K_XCAST_MODE_PROMISC) fm10k_clear_unused_vlans(interface); @@ -1094,22 +1208,20 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface) interface->glort_count, true); /* update VLAN table */ - hw->mac.ops.update_vlan(hw, FM10K_VLAN_ALL, 0, - xcast_mode == FM10K_XCAST_MODE_PROMISC); + fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, 0, + xcast_mode == FM10K_XCAST_MODE_PROMISC); /* Add filter for VLAN 0 */ - hw->mac.ops.update_vlan(hw, 0, 0, true); + fm10k_queue_vlan_request(interface, 0, 0, true); /* update table with current entries */ for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { - hw->mac.ops.update_vlan(hw, vid, 0, true); + fm10k_queue_vlan_request(interface, vid, 0, true); - /* Update unicast entries if host's mailbox is ready */ - if (fm10k_host_mbx_ready(interface)) - hw->mac.ops.update_uc_addr(hw, glort, hw->mac.addr, - vid, true, 0); + fm10k_queue_mac_request(interface, glort, + hw->mac.addr, vid, true); } /* update xcast mode before synchronizing addresses if host's mailbox @@ -1136,6 +1248,13 @@ void fm10k_reset_rx_state(struct fm10k_intfc *interface) struct net_device *netdev = interface->netdev; struct fm10k_hw *hw = &interface->hw; + /* Wait for MAC/VLAN work to finish */ + while (test_bit(__FM10K_MACVLAN_SCHED, interface->state)) + usleep_range(1000, 2000); + + /* Cancel pending MAC/VLAN requests */ + fm10k_clear_macvlan_queue(interface, interface->glort, true); + fm10k_mbx_lock(interface); /* clear the logical port state on lower device if host's mailbox is @@ -1270,7 +1389,7 @@ static int __fm10k_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct tc_mqprio_qopt *mqprio = type_data; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; @@ -1370,8 +1489,8 @@ static void *fm10k_dfwd_add_station(struct net_device *dev, if (fm10k_host_mbx_ready(interface)) { hw->mac.ops.update_xcast_mode(hw, glort, FM10K_XCAST_MODE_MULTI); - hw->mac.ops.update_uc_addr(hw, glort, sdev->dev_addr, - 0, true, 0); + fm10k_queue_mac_request(interface, glort, sdev->dev_addr, + 0, true); } fm10k_mbx_unlock(interface); @@ -1410,8 +1529,8 @@ static void fm10k_dfwd_del_station(struct net_device *dev, void *priv) if (fm10k_host_mbx_ready(interface)) { hw->mac.ops.update_xcast_mode(hw, glort, FM10K_XCAST_MODE_NONE); - hw->mac.ops.update_uc_addr(hw, glort, sdev->dev_addr, - 0, false, 0); + fm10k_queue_mac_request(interface, glort, sdev->dev_addr, + 0, false); } fm10k_mbx_unlock(interface); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 63784576ae8b..7f605221a686 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -91,6 +91,76 @@ static int fm10k_hw_ready(struct fm10k_intfc *interface) return FM10K_REMOVED(hw->hw_addr) ? -ENODEV : 0; } +/** + * fm10k_macvlan_schedule - Schedule MAC/VLAN queue task + * @interface: fm10k private interface structure + * + * Schedule the MAC/VLAN queue monitor task. If the MAC/VLAN task cannot be + * started immediately, request that it be restarted when possible. + */ +void fm10k_macvlan_schedule(struct fm10k_intfc *interface) +{ + /* Avoid processing the MAC/VLAN queue when the service task is + * disabled, or when we're resetting the device. + */ + if (!test_bit(__FM10K_MACVLAN_DISABLE, interface->state) && + !test_and_set_bit(__FM10K_MACVLAN_SCHED, interface->state)) { + clear_bit(__FM10K_MACVLAN_REQUEST, interface->state); + /* We delay the actual start of execution in order to allow + * multiple MAC/VLAN updates to accumulate before handling + * them, and to allow some time to let the mailbox drain + * between runs. + */ + queue_delayed_work(fm10k_workqueue, + &interface->macvlan_task, 10); + } else { + set_bit(__FM10K_MACVLAN_REQUEST, interface->state); + } +} + +/** + * fm10k_stop_macvlan_task - Stop the MAC/VLAN queue monitor + * @interface: fm10k private interface structure + * + * Wait until the MAC/VLAN queue task has stopped, and cancel any future + * requests. + */ +static void fm10k_stop_macvlan_task(struct fm10k_intfc *interface) +{ + /* Disable the MAC/VLAN work item */ + set_bit(__FM10K_MACVLAN_DISABLE, interface->state); + + /* Make sure we waited until any current invocations have stopped */ + cancel_delayed_work_sync(&interface->macvlan_task); + + /* We set the __FM10K_MACVLAN_SCHED bit when we schedule the task. + * However, it may not be unset of the MAC/VLAN task never actually + * got a chance to run. Since we've canceled the task here, and it + * cannot be rescheuled right now, we need to ensure the scheduled bit + * gets unset. + */ + clear_bit(__FM10K_MACVLAN_SCHED, interface->state); +} + +/** + * fm10k_resume_macvlan_task - Restart the MAC/VLAN queue monitor + * @interface: fm10k private interface structure + * + * Clear the __FM10K_MACVLAN_DISABLE bit and, if a request occurred, schedule + * the MAC/VLAN work monitor. + */ +static void fm10k_resume_macvlan_task(struct fm10k_intfc *interface) +{ + /* Re-enable the MAC/VLAN work item */ + clear_bit(__FM10K_MACVLAN_DISABLE, interface->state); + + /* We might have received a MAC/VLAN request while disabled. If so, + * kick off the queue now. + */ + if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state)) + fm10k_macvlan_schedule(interface); +} + void fm10k_service_event_schedule(struct fm10k_intfc *interface) { if (!test_bit(__FM10K_SERVICE_DISABLE, interface->state) && @@ -118,13 +188,35 @@ static void fm10k_service_event_complete(struct fm10k_intfc *interface) fm10k_service_event_schedule(interface); } +static void fm10k_stop_service_event(struct fm10k_intfc *interface) +{ + set_bit(__FM10K_SERVICE_DISABLE, interface->state); + cancel_work_sync(&interface->service_task); + + /* It's possible that cancel_work_sync stopped the service task from + * running before it could actually start. In this case the + * __FM10K_SERVICE_SCHED bit will never be cleared. Since we know that + * the service task cannot be running at this point, we need to clear + * the scheduled bit, as otherwise the service task may never be + * restarted. + */ + clear_bit(__FM10K_SERVICE_SCHED, interface->state); +} + +static void fm10k_start_service_event(struct fm10k_intfc *interface) +{ + clear_bit(__FM10K_SERVICE_DISABLE, interface->state); + fm10k_service_event_schedule(interface); +} + /** * fm10k_service_timer - Timer Call-back * @data: pointer to interface cast into an unsigned long **/ -static void fm10k_service_timer(unsigned long data) +static void fm10k_service_timer(struct timer_list *t) { - struct fm10k_intfc *interface = (struct fm10k_intfc *)data; + struct fm10k_intfc *interface = from_timer(interface, t, + service_timer); /* Reset the timer */ mod_timer(&interface->service_timer, (HZ * 2) + jiffies); @@ -132,36 +224,15 @@ static void fm10k_service_timer(unsigned long data) fm10k_service_event_schedule(interface); } -static void fm10k_detach_subtask(struct fm10k_intfc *interface) -{ - struct net_device *netdev = interface->netdev; - u32 __iomem *hw_addr; - u32 value; - - /* do nothing if device is still present or hw_addr is set */ - if (netif_device_present(netdev) || interface->hw.hw_addr) - return; - - /* check the real address space to see if we've recovered */ - hw_addr = READ_ONCE(interface->uc_addr); - value = readl(hw_addr); - if (~value) { - interface->hw.hw_addr = interface->uc_addr; - netif_device_attach(netdev); - set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags); - netdev_warn(netdev, "PCIe link restored, device now attached\n"); - return; - } - - rtnl_lock(); - - if (netif_running(netdev)) - dev_close(netdev); - - rtnl_unlock(); -} - -static void fm10k_prepare_for_reset(struct fm10k_intfc *interface) +/** + * fm10k_prepare_for_reset - Prepare the driver and device for a pending reset + * @interface: fm10k private data structure + * + * This function prepares for a device reset by shutting as much down as we + * can. It does nothing and returns false if __FM10K_RESETTING was already set + * prior to calling this function. It returns true if it actually did work. + */ +static bool fm10k_prepare_for_reset(struct fm10k_intfc *interface) { struct net_device *netdev = interface->netdev; @@ -170,8 +241,15 @@ static void fm10k_prepare_for_reset(struct fm10k_intfc *interface) /* put off any impending NetWatchDogTimeout */ netif_trans_update(netdev); - while (test_and_set_bit(__FM10K_RESETTING, interface->state)) - usleep_range(1000, 2000); + /* Nothing to do if a reset is already in progress */ + if (test_and_set_bit(__FM10K_RESETTING, interface->state)) + return false; + + /* As the MAC/VLAN task will be accessing registers it must not be + * running while we reset. Although the task will not be scheduled + * once we start resetting it may already be running + */ + fm10k_stop_macvlan_task(interface); rtnl_lock(); @@ -189,6 +267,8 @@ static void fm10k_prepare_for_reset(struct fm10k_intfc *interface) interface->last_reset = jiffies + (10 * HZ); rtnl_unlock(); + + return true; } static int fm10k_handle_reset(struct fm10k_intfc *interface) @@ -197,6 +277,8 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface) struct fm10k_hw *hw = &interface->hw; int err; + WARN_ON(!test_bit(__FM10K_RESETTING, interface->state)); + rtnl_lock(); pci_set_master(interface->pdev); @@ -253,6 +335,8 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface) rtnl_unlock(); + fm10k_resume_macvlan_task(interface); + clear_bit(__FM10K_RESETTING, interface->state); return err; @@ -270,27 +354,80 @@ reinit_err: return err; } -static void fm10k_reinit(struct fm10k_intfc *interface) +static void fm10k_detach_subtask(struct fm10k_intfc *interface) { + struct net_device *netdev = interface->netdev; + u32 __iomem *hw_addr; + u32 value; int err; - fm10k_prepare_for_reset(interface); + /* do nothing if netdev is still present or hw_addr is set */ + if (netif_device_present(netdev) || interface->hw.hw_addr) + return; - err = fm10k_handle_reset(interface); - if (err) - dev_err(&interface->pdev->dev, - "fm10k_handle_reset failed: %d\n", err); + /* We've lost the PCIe register space, and can no longer access the + * device. Shut everything except the detach subtask down and prepare + * to reset the device in case we recover. If we actually prepare for + * reset, indicate that we're detached. + */ + if (fm10k_prepare_for_reset(interface)) + set_bit(__FM10K_RESET_DETACHED, interface->state); + + /* check the real address space to see if we've recovered */ + hw_addr = READ_ONCE(interface->uc_addr); + value = readl(hw_addr); + if (~value) { + /* Make sure the reset was initiated because we detached, + * otherwise we might race with a different reset flow. + */ + if (!test_and_clear_bit(__FM10K_RESET_DETACHED, + interface->state)) + return; + + /* Restore the hardware address */ + interface->hw.hw_addr = interface->uc_addr; + + /* PCIe link has been restored, and the device is active + * again. Restore everything and reset the device. + */ + err = fm10k_handle_reset(interface); + if (err) { + netdev_err(netdev, "Unable to reset device: %d\n", err); + interface->hw.hw_addr = NULL; + return; + } + + /* Re-attach the netdev */ + netif_device_attach(netdev); + netdev_warn(netdev, "PCIe link restored, device now attached\n"); + return; + } } static void fm10k_reset_subtask(struct fm10k_intfc *interface) { + int err; + if (!test_and_clear_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags)) return; + /* If another thread has already prepared to reset the device, we + * should not attempt to handle a reset here, since we'd race with + * that thread. This may happen if we suspend the device or if the + * PCIe link is lost. In this case, we'll just ignore the RESET + * request, as it will (eventually) be taken care of when the thread + * which actually started the reset is finished. + */ + if (!fm10k_prepare_for_reset(interface)) + return; + netdev_err(interface->netdev, "Reset interface\n"); - fm10k_reinit(interface); + err = fm10k_handle_reset(interface); + if (err) + dev_err(&interface->pdev->dev, + "fm10k_handle_reset failed: %d\n", err); } /** @@ -360,6 +497,10 @@ static void fm10k_watchdog_update_host_state(struct fm10k_intfc *interface) **/ static void fm10k_mbx_subtask(struct fm10k_intfc *interface) { + /* If we're resetting, bail out */ + if (test_bit(__FM10K_RESETTING, interface->state)) + return; + /* process upstream mailbox and update device state */ fm10k_watchdog_update_host_state(interface); @@ -609,9 +750,11 @@ static void fm10k_service_task(struct work_struct *work) interface = container_of(work, struct fm10k_intfc, service_task); + /* Check whether we're detached first */ + fm10k_detach_subtask(interface); + /* tasks run even when interface is down */ fm10k_mbx_subtask(interface); - fm10k_detach_subtask(interface); fm10k_reset_subtask(interface); /* tasks only run when interface is up */ @@ -623,6 +766,112 @@ static void fm10k_service_task(struct work_struct *work) } /** + * fm10k_macvlan_task - send queued MAC/VLAN requests to switch manager + * @work: pointer to work_struct containing our data + * + * This work item handles sending MAC/VLAN updates to the switch manager. When + * the interface is up, it will attempt to queue mailbox messages to the + * switch manager requesting updates for MAC/VLAN pairs. If the Tx fifo of the + * mailbox is full, it will reschedule itself to try again in a short while. + * This ensures that the driver does not overload the switch mailbox with too + * many simultaneous requests, causing an unnecessary reset. + **/ +static void fm10k_macvlan_task(struct work_struct *work) +{ + struct fm10k_macvlan_request *item; + struct fm10k_intfc *interface; + struct delayed_work *dwork; + struct list_head *requests; + struct fm10k_hw *hw; + unsigned long flags; + + dwork = to_delayed_work(work); + interface = container_of(dwork, struct fm10k_intfc, macvlan_task); + hw = &interface->hw; + requests = &interface->macvlan_requests; + + do { + /* Pop the first item off the list */ + spin_lock_irqsave(&interface->macvlan_lock, flags); + item = list_first_entry_or_null(requests, + struct fm10k_macvlan_request, + list); + if (item) + list_del_init(&item->list); + + spin_unlock_irqrestore(&interface->macvlan_lock, flags); + + /* We have no more items to process */ + if (!item) + goto done; + + fm10k_mbx_lock(interface); + + /* Check that we have plenty of space to send the message. We + * want to ensure that the mailbox stays low enough to avoid a + * change in the host state, otherwise we may see spurious + * link up / link down notifications. + */ + if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU + 5)) { + hw->mbx.ops.process(hw, &hw->mbx); + set_bit(__FM10K_MACVLAN_REQUEST, interface->state); + fm10k_mbx_unlock(interface); + + /* Put the request back on the list */ + spin_lock_irqsave(&interface->macvlan_lock, flags); + list_add(&item->list, requests); + spin_unlock_irqrestore(&interface->macvlan_lock, flags); + break; + } + + switch (item->type) { + case FM10K_MC_MAC_REQUEST: + hw->mac.ops.update_mc_addr(hw, + item->mac.glort, + item->mac.addr, + item->mac.vid, + item->set); + break; + case FM10K_UC_MAC_REQUEST: + hw->mac.ops.update_uc_addr(hw, + item->mac.glort, + item->mac.addr, + item->mac.vid, + item->set, + 0); + break; + case FM10K_VLAN_REQUEST: + hw->mac.ops.update_vlan(hw, + item->vlan.vid, + item->vlan.vsi, + item->set); + break; + default: + break; + } + + fm10k_mbx_unlock(interface); + + /* Free the item now that we've sent the update */ + kfree(item); + } while (true); + +done: + WARN_ON(!test_bit(__FM10K_MACVLAN_SCHED, interface->state)); + + /* flush memory to make sure state is correct */ + smp_mb__before_atomic(); + clear_bit(__FM10K_MACVLAN_SCHED, interface->state); + + /* If a MAC/VLAN request was scheduled since we started, we should + * re-schedule. However, there is no reason to re-schedule if there is + * no work to do. + */ + if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state)) + fm10k_macvlan_schedule(interface); +} + +/** * fm10k_configure_tx_ring - Configure Tx ring after Reset * @interface: board private structure * @ring: structure containing ring specific data @@ -1544,7 +1793,7 @@ int fm10k_qv_request_irq(struct fm10k_intfc *interface) struct net_device *dev = interface->netdev; struct fm10k_hw *hw = &interface->hw; struct msix_entry *entry; - int ri = 0, ti = 0; + unsigned int ri = 0, ti = 0; int vector, err; entry = &interface->msix_entries[NON_Q_VECTORS(hw)]; @@ -1554,15 +1803,15 @@ int fm10k_qv_request_irq(struct fm10k_intfc *interface) /* name the vector */ if (q_vector->tx.count && q_vector->rx.count) { - snprintf(q_vector->name, sizeof(q_vector->name) - 1, - "%s-TxRx-%d", dev->name, ri++); + snprintf(q_vector->name, sizeof(q_vector->name), + "%s-TxRx-%u", dev->name, ri++); ti++; } else if (q_vector->rx.count) { - snprintf(q_vector->name, sizeof(q_vector->name) - 1, - "%s-rx-%d", dev->name, ri++); + snprintf(q_vector->name, sizeof(q_vector->name), + "%s-rx-%u", dev->name, ri++); } else if (q_vector->tx.count) { - snprintf(q_vector->name, sizeof(q_vector->name) - 1, - "%s-tx-%d", dev->name, ti++); + snprintf(q_vector->name, sizeof(q_vector->name), + "%s-tx-%u", dev->name, ti++); } else { /* skip this unused q_vector */ continue; @@ -1800,9 +2049,6 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, netdev->vlan_features |= NETIF_F_HIGHDMA; } - /* delay any future reset requests */ - interface->last_reset = jiffies + (10 * HZ); - /* reset and initialize the hardware so it is in a known state */ err = hw->mac.ops.reset_hw(hw); if (err) { @@ -1857,9 +2103,16 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, INIT_LIST_HEAD(&interface->vxlan_port); INIT_LIST_HEAD(&interface->geneve_port); + /* Initialize the MAC/VLAN queue */ + INIT_LIST_HEAD(&interface->macvlan_requests); + netdev_rss_key_fill(rss_key, sizeof(rss_key)); memcpy(interface->rssrk, rss_key, sizeof(rss_key)); + /* Initialize the mailbox lock */ + spin_lock_init(&interface->mbx_lock); + spin_lock_init(&interface->macvlan_lock); + /* Start off interface as being down */ set_bit(__FM10K_DOWN, interface->state); set_bit(__FM10K_UPDATING_STATS, interface->state); @@ -2063,10 +2316,12 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Initialize service timer and service task late in order to avoid * cleanup issues. */ - setup_timer(&interface->service_timer, &fm10k_service_timer, - (unsigned long)interface); + timer_setup(&interface->service_timer, fm10k_service_timer, 0); INIT_WORK(&interface->service_task, fm10k_service_task); + /* Setup the MAC/VLAN queue */ + INIT_DELAYED_WORK(&interface->macvlan_task, fm10k_macvlan_task); + /* kick off service timer now, even when interface is down */ mod_timer(&interface->service_timer, (HZ * 2) + jiffies); @@ -2079,8 +2334,9 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* enable SR-IOV after registering netdev to enforce PF/VF ordering */ fm10k_iov_configure(pdev, 0); - /* clear the service task disable bit to allow service task to start */ + /* clear the service task disable bit and kick off service task */ clear_bit(__FM10K_SERVICE_DISABLE, interface->state); + fm10k_service_event_schedule(interface); return 0; @@ -2118,8 +2374,11 @@ static void fm10k_remove(struct pci_dev *pdev) del_timer_sync(&interface->service_timer); - set_bit(__FM10K_SERVICE_DISABLE, interface->state); - cancel_work_sync(&interface->service_task); + fm10k_stop_service_event(interface); + fm10k_stop_macvlan_task(interface); + + /* Remove all pending MAC/VLAN requests */ + fm10k_clear_macvlan_queue(interface, interface->glort, true); /* free netdev, this may bounce the interrupts due to setup_tc */ if (netdev->reg_state == NETREG_REGISTERED) @@ -2156,11 +2415,14 @@ static void fm10k_prepare_suspend(struct fm10k_intfc *interface) * a surprise remove if the PCIe device is disabled while we're * stopped. We stop the watchdog task until after we resume software * activity. + * + * Note that the MAC/VLAN task will be stopped as part of preparing + * for reset so we don't need to handle it here. */ - set_bit(__FM10K_SERVICE_DISABLE, interface->state); - cancel_work_sync(&interface->service_task); + fm10k_stop_service_event(interface); - fm10k_prepare_for_reset(interface); + if (fm10k_prepare_for_reset(interface)) + set_bit(__FM10K_RESET_SUSPENDED, interface->state); } static int fm10k_handle_resume(struct fm10k_intfc *interface) @@ -2168,6 +2430,13 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) struct fm10k_hw *hw = &interface->hw; int err; + /* Even if we didn't properly prepare for reset in + * fm10k_prepare_suspend, we'll attempt to resume anyways. + */ + if (!test_and_clear_bit(__FM10K_RESET_SUSPENDED, interface->state)) + dev_warn(&interface->pdev->dev, + "Device was shut down as part of suspend... Attempting to recover\n"); + /* reset statistics starting values */ hw->mac.ops.rebind_hw_stats(hw, &interface->stats); @@ -2185,45 +2454,30 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) interface->link_down_event = jiffies + (HZ); set_bit(__FM10K_LINK_DOWN, interface->state); - /* clear the service task disable bit to allow service task to start */ - clear_bit(__FM10K_SERVICE_DISABLE, interface->state); - fm10k_service_event_schedule(interface); + /* restart the service task */ + fm10k_start_service_event(interface); + + /* Restart the MAC/VLAN request queue in-case of outstanding events */ + fm10k_macvlan_schedule(interface); return err; } #ifdef CONFIG_PM /** - * fm10k_resume - Restore device to pre-sleep state - * @pdev: PCI device information struct + * fm10k_resume - Generic PM resume hook + * @dev: generic device structure * - * fm10k_resume is called after the system has powered back up from a sleep - * state and is ready to resume operation. This function is meant to restore - * the device back to its pre-sleep state. + * Generic PM hook used when waking the device from a low power state after + * suspend or hibernation. This function does not need to handle lower PCIe + * device state as the stack takes care of that for us. **/ -static int fm10k_resume(struct pci_dev *pdev) +static int fm10k_resume(struct device *dev) { - struct fm10k_intfc *interface = pci_get_drvdata(pdev); + struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev)); struct net_device *netdev = interface->netdev; struct fm10k_hw *hw = &interface->hw; - u32 err; - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - - /* pci_restore_state clears dev->state_saved so call - * pci_save_state to restore it. - */ - pci_save_state(pdev); - - err = pci_enable_device_mem(pdev); - if (err) { - dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); - return err; - } - pci_set_master(pdev); - - pci_wake_from_d3(pdev, false); + int err; /* refresh hw_addr in case it was dropped */ hw->hw_addr = interface->uc_addr; @@ -2238,36 +2492,27 @@ static int fm10k_resume(struct pci_dev *pdev) } /** - * fm10k_suspend - Prepare the device for a system sleep state - * @pdev: PCI device information struct + * fm10k_suspend - Generic PM suspend hook + * @dev: generic device structure * - * fm10k_suspend is meant to shutdown the device prior to the system entering - * a sleep state. The fm10k hardware does not support wake on lan so the - * driver simply needs to shut down the device so it is in a low power state. + * Generic PM hook used when setting the device into a low power state for + * system suspend or hibernation. This function does not need to handle lower + * PCIe device state as the stack takes care of that for us. **/ -static int fm10k_suspend(struct pci_dev *pdev, - pm_message_t __always_unused state) +static int fm10k_suspend(struct device *dev) { - struct fm10k_intfc *interface = pci_get_drvdata(pdev); + struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev)); struct net_device *netdev = interface->netdev; - int err = 0; netif_device_detach(netdev); fm10k_prepare_suspend(interface); - err = pci_save_state(pdev); - if (err) - return err; - - pci_disable_device(pdev); - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - return 0; } #endif /* CONFIG_PM */ + /** * fm10k_io_error_detected - called when PCI error is detected * @pdev: Pointer to PCI device @@ -2343,11 +2588,18 @@ static void fm10k_io_resume(struct pci_dev *pdev) if (err) dev_warn(&pdev->dev, - "fm10k_io_resume failed: %d\n", err); + "%s failed: %d\n", __func__, err); else netif_device_attach(netdev); } +/** + * fm10k_io_reset_prepare - called when PCI function is about to be reset + * @pdev: Pointer to PCI device + * + * This callback is called when the PCI function is about to be reset, + * allowing the device driver to prepare for it. + */ static void fm10k_io_reset_prepare(struct pci_dev *pdev) { /* warn incase we have any active VF devices */ @@ -2357,6 +2609,13 @@ static void fm10k_io_reset_prepare(struct pci_dev *pdev) fm10k_prepare_suspend(pci_get_drvdata(pdev)); } +/** + * fm10k_io_reset_done - called when PCI function has finished resetting + * @pdev: Pointer to PCI device + * + * This callback is called just after the PCI function is reset, such as via + * /sys/class/net/<enpX>/device/reset or similar. + */ static void fm10k_io_reset_done(struct pci_dev *pdev) { struct fm10k_intfc *interface = pci_get_drvdata(pdev); @@ -2364,7 +2623,7 @@ static void fm10k_io_reset_done(struct pci_dev *pdev) if (err) { dev_warn(&pdev->dev, - "fm10k_io_reset_notify failed: %d\n", err); + "%s failed: %d\n", __func__, err); netif_device_detach(interface->netdev); } } @@ -2377,15 +2636,18 @@ static const struct pci_error_handlers fm10k_err_handler = { .reset_done = fm10k_io_reset_done, }; +static SIMPLE_DEV_PM_OPS(fm10k_pm_ops, fm10k_suspend, fm10k_resume); + static struct pci_driver fm10k_driver = { .name = fm10k_driver_name, .id_table = fm10k_pci_tbl, .probe = fm10k_probe, .remove = fm10k_remove, #ifdef CONFIG_PM - .suspend = fm10k_suspend, - .resume = fm10k_resume, -#endif + .driver = { + .pm = &fm10k_pm_ops, + }, +#endif /* CONFIG_PM */ .sriov_configure = fm10k_iov_configure, .err_handler = &fm10k_err_handler }; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 40ee0242a80a..425d814aed4d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -1186,7 +1186,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results, * Will report an error if the VLAN ID is out of range. For VID = 0, it will * return either the pf_vid or sw_vid depending on which one is set. */ -static s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid) +s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid) { if (!vid) return vf_info->pf_vid ? vf_info->pf_vid : vf_info->sw_vid; @@ -1334,19 +1334,19 @@ static u8 fm10k_iov_supported_xcast_mode_pf(struct fm10k_vf_info *vf_info, case FM10K_XCAST_MODE_PROMISC: if (vf_flags & FM10K_VF_FLAG_PROMISC_CAPABLE) return FM10K_XCAST_MODE_PROMISC; - /* fallthough */ + /* fall through */ case FM10K_XCAST_MODE_ALLMULTI: if (vf_flags & FM10K_VF_FLAG_ALLMULTI_CAPABLE) return FM10K_XCAST_MODE_ALLMULTI; - /* fallthough */ + /* fall through */ case FM10K_XCAST_MODE_MULTI: if (vf_flags & FM10K_VF_FLAG_MULTI_CAPABLE) return FM10K_XCAST_MODE_MULTI; - /* fallthough */ + /* fall through */ case FM10K_XCAST_MODE_NONE: if (vf_flags & FM10K_VF_FLAG_NONE_CAPABLE) return FM10K_XCAST_MODE_NONE; - /* fallthough */ + /* fall through */ default: break; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h index 3336d3c10760..e04d41f1a532 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -114,6 +114,7 @@ extern const struct fm10k_tlv_attr fm10k_err_msg_attr[]; #define FM10K_PF_MSG_ERR_HANDLER(msg, func) \ FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_##msg, fm10k_err_msg_attr, func) +s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid); s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *); s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *); diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d0c1bf5441d8..5829715fa342 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -54,6 +54,9 @@ #include <linux/clocksource.h> #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> #include "i40e_type.h" #include "i40e_prototype.h" #include "i40e_client.h" @@ -77,6 +80,7 @@ #define i40e_default_queues_per_vmdq(pf) \ (((pf)->hw_features & I40E_HW_RSS_AQ_CAPABLE) ? 4 : 1) #define I40E_DEFAULT_QUEUES_PER_VF 4 +#define I40E_MAX_VF_QUEUES 16 #define I40E_DEFAULT_QUEUES_PER_TC 1 /* should be a power of 2 */ #define i40e_pf_get_max_q_per_tc(pf) \ (((pf)->hw_features & I40E_HW_128_QP_RSS_CAPABLE) ? 128 : 64) @@ -86,6 +90,7 @@ #define I40E_AQ_LEN 256 #define I40E_AQ_WORK_LIMIT 66 /* max number of VFs + a little */ #define I40E_MAX_USER_PRIORITY 8 +#define I40E_MAX_QUEUES_PER_CH 64 #define I40E_DEFAULT_TRAFFIC_CLASS BIT(0) #define I40E_DEFAULT_MSG_ENABLE 4 #define I40E_QUEUE_WAIT_RETRY_LIMIT 10 @@ -125,6 +130,11 @@ /* default to trying for four seconds */ #define I40E_TRY_LINK_TIMEOUT (4 * HZ) +/* BW rate limiting */ +#define I40E_BW_CREDIT_DIVISOR 50 /* 50Mbps per BW credit */ +#define I40E_BW_MBPS_DIVISOR 125000 /* rate / (1000000 / 8) Mbps */ +#define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */ + /* driver state flags */ enum i40e_state_t { __I40E_TESTING, @@ -136,6 +146,7 @@ enum i40e_state_t { __I40E_MDD_EVENT_PENDING, __I40E_VFLR_EVENT_PENDING, __I40E_RESET_RECOVERY_PENDING, + __I40E_MISC_IRQ_REQUESTED, __I40E_RESET_INTR_RECEIVED, __I40E_REINIT_REQUESTED, __I40E_PF_RESET_REQUESTED, @@ -155,6 +166,8 @@ enum i40e_state_t { __I40E_STATE_SIZE__, }; +#define I40E_PF_RESET_FLAG BIT_ULL(__I40E_PF_RESET_REQUESTED) + /* VSI state flags */ enum i40e_vsi_state_t { __I40E_VSI_DOWN, @@ -242,6 +255,58 @@ struct i40e_fdir_filter { u32 fd_id; }; +#define I40E_CLOUD_FIELD_OMAC 0x01 +#define I40E_CLOUD_FIELD_IMAC 0x02 +#define I40E_CLOUD_FIELD_IVLAN 0x04 +#define I40E_CLOUD_FIELD_TEN_ID 0x08 +#define I40E_CLOUD_FIELD_IIP 0x10 + +#define I40E_CLOUD_FILTER_FLAGS_OMAC I40E_CLOUD_FIELD_OMAC +#define I40E_CLOUD_FILTER_FLAGS_IMAC I40E_CLOUD_FIELD_IMAC +#define I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN (I40E_CLOUD_FIELD_IMAC | \ + I40E_CLOUD_FIELD_IVLAN) +#define I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID (I40E_CLOUD_FIELD_IMAC | \ + I40E_CLOUD_FIELD_TEN_ID) +#define I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC (I40E_CLOUD_FIELD_OMAC | \ + I40E_CLOUD_FIELD_IMAC | \ + I40E_CLOUD_FIELD_TEN_ID) +#define I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID (I40E_CLOUD_FIELD_IMAC | \ + I40E_CLOUD_FIELD_IVLAN | \ + I40E_CLOUD_FIELD_TEN_ID) +#define I40E_CLOUD_FILTER_FLAGS_IIP I40E_CLOUD_FIELD_IIP + +struct i40e_cloud_filter { + struct hlist_node cloud_node; + unsigned long cookie; + /* cloud filter input set follows */ + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; + __be16 vlan_id; + u16 seid; /* filter control */ + __be16 dst_port; + __be16 src_port; + u32 tenant_id; + union { + struct { + struct in_addr dst_ip; + struct in_addr src_ip; + } v4; + struct { + struct in6_addr dst_ip6; + struct in6_addr src_ip6; + } v6; + } ip; +#define dst_ipv6 ip.v6.dst_ip6.s6_addr32 +#define src_ipv6 ip.v6.src_ip6.s6_addr32 +#define dst_ipv4 ip.v4.dst_ip.s_addr +#define src_ipv4 ip.v4.src_ip.s_addr + u16 n_proto; /* Ethernet Protocol */ + u8 ip_proto; /* IPPROTO value */ + u8 flags; +#define I40E_CLOUD_TNL_TYPE_NONE 0xff + u8 tunnel_type; +}; + #define I40E_ETH_P_LLDP 0x88cc #define I40E_DCB_PRIO_TYPE_STRICT 0 @@ -336,6 +401,25 @@ struct i40e_flex_pit { u8 pit_index; }; +struct i40e_channel { + struct list_head list; + bool initialized; + u8 type; + u16 vsi_number; /* Assigned VSI number from AQ 'Add VSI' response */ + u16 stat_counter_idx; + u16 base_queue; + u16 num_queue_pairs; /* Requested by user */ + u16 seid; + + u8 enabled_tc; + struct i40e_aqc_vsi_properties_data info; + + u64 max_tx_rate; + + /* track this channel belongs to which VSI */ + struct i40e_vsi *parent_vsi; +}; + /* struct that defines the Ethernet device */ struct i40e_pf { struct pci_dev *pdev; @@ -348,7 +432,7 @@ struct i40e_pf { u16 num_vmdq_vsis; /* num vmdq vsis this PF has set up */ u16 num_vmdq_qps; /* num queue pairs per vmdq pool */ u16 num_vmdq_msix; /* num queue vectors per vmdq pool */ - u16 num_req_vfs; /* num VFs requested for this VF */ + u16 num_req_vfs; /* num VFs requested for this PF */ u16 num_vf_qps; /* num queue pairs per VF */ u16 num_lan_qps; /* num lan queues this PF has set up */ u16 num_lan_msix; /* num queue vectors for the base PF vsi */ @@ -390,6 +474,9 @@ struct i40e_pf { struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS]; u16 pending_udp_bitmap; + struct hlist_head cloud_filter_list; + u16 num_cloud_filters; + enum i40e_interrupt_policy int_policy; u16 rx_itr_default; u16 tx_itr_default; @@ -401,55 +488,60 @@ struct i40e_pf { struct timer_list service_timer; struct work_struct service_task; - u64 hw_features; -#define I40E_HW_RSS_AQ_CAPABLE BIT_ULL(0) -#define I40E_HW_128_QP_RSS_CAPABLE BIT_ULL(1) -#define I40E_HW_ATR_EVICT_CAPABLE BIT_ULL(2) -#define I40E_HW_WB_ON_ITR_CAPABLE BIT_ULL(3) -#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE BIT_ULL(4) -#define I40E_HW_NO_PCI_LINK_CHECK BIT_ULL(5) -#define I40E_HW_100M_SGMII_CAPABLE BIT_ULL(6) -#define I40E_HW_NO_DCB_SUPPORT BIT_ULL(7) -#define I40E_HW_USE_SET_LLDP_MIB BIT_ULL(8) -#define I40E_HW_GENEVE_OFFLOAD_CAPABLE BIT_ULL(9) -#define I40E_HW_PTP_L4_CAPABLE BIT_ULL(10) -#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE BIT_ULL(11) -#define I40E_HW_MPLS_HDR_OFFLOAD_CAPABLE BIT_ULL(12) -#define I40E_HW_HAVE_CRT_RETIMER BIT_ULL(13) -#define I40E_HW_OUTER_UDP_CSUM_CAPABLE BIT_ULL(14) -#define I40E_HW_PHY_CONTROLS_LEDS BIT_ULL(15) -#define I40E_HW_STOP_FW_LLDP BIT_ULL(16) -#define I40E_HW_PORT_ID_VALID BIT_ULL(17) -#define I40E_HW_RESTART_AUTONEG BIT_ULL(18) - - u64 flags; -#define I40E_FLAG_RX_CSUM_ENABLED BIT_ULL(1) -#define I40E_FLAG_MSI_ENABLED BIT_ULL(2) -#define I40E_FLAG_MSIX_ENABLED BIT_ULL(3) -#define I40E_FLAG_HW_ATR_EVICT_ENABLED BIT_ULL(4) -#define I40E_FLAG_RSS_ENABLED BIT_ULL(6) -#define I40E_FLAG_VMDQ_ENABLED BIT_ULL(7) -#define I40E_FLAG_IWARP_ENABLED BIT_ULL(10) -#define I40E_FLAG_FILTER_SYNC BIT_ULL(15) -#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT_ULL(16) -#define I40E_FLAG_SRIOV_ENABLED BIT_ULL(19) -#define I40E_FLAG_DCB_ENABLED BIT_ULL(20) -#define I40E_FLAG_FD_SB_ENABLED BIT_ULL(21) -#define I40E_FLAG_FD_ATR_ENABLED BIT_ULL(22) -#define I40E_FLAG_FD_SB_AUTO_DISABLED BIT_ULL(23) -#define I40E_FLAG_FD_ATR_AUTO_DISABLED BIT_ULL(24) -#define I40E_FLAG_PTP BIT_ULL(25) -#define I40E_FLAG_MFP_ENABLED BIT_ULL(26) -#define I40E_FLAG_UDP_FILTER_SYNC BIT_ULL(27) -#define I40E_FLAG_DCB_CAPABLE BIT_ULL(29) -#define I40E_FLAG_VEB_STATS_ENABLED BIT_ULL(37) -#define I40E_FLAG_LINK_POLLING_ENABLED BIT_ULL(39) -#define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40) -#define I40E_FLAG_TRUE_PROMISC_SUPPORT BIT_ULL(51) -#define I40E_FLAG_CLIENT_RESET BIT_ULL(54) -#define I40E_FLAG_TEMP_LINK_POLLING BIT_ULL(55) -#define I40E_FLAG_CLIENT_L2_CHANGE BIT_ULL(56) -#define I40E_FLAG_LEGACY_RX BIT_ULL(58) + u32 hw_features; +#define I40E_HW_RSS_AQ_CAPABLE BIT(0) +#define I40E_HW_128_QP_RSS_CAPABLE BIT(1) +#define I40E_HW_ATR_EVICT_CAPABLE BIT(2) +#define I40E_HW_WB_ON_ITR_CAPABLE BIT(3) +#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE BIT(4) +#define I40E_HW_NO_PCI_LINK_CHECK BIT(5) +#define I40E_HW_100M_SGMII_CAPABLE BIT(6) +#define I40E_HW_NO_DCB_SUPPORT BIT(7) +#define I40E_HW_USE_SET_LLDP_MIB BIT(8) +#define I40E_HW_GENEVE_OFFLOAD_CAPABLE BIT(9) +#define I40E_HW_PTP_L4_CAPABLE BIT(10) +#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE BIT(11) +#define I40E_HW_MPLS_HDR_OFFLOAD_CAPABLE BIT(12) +#define I40E_HW_HAVE_CRT_RETIMER BIT(13) +#define I40E_HW_OUTER_UDP_CSUM_CAPABLE BIT(14) +#define I40E_HW_PHY_CONTROLS_LEDS BIT(15) +#define I40E_HW_STOP_FW_LLDP BIT(16) +#define I40E_HW_PORT_ID_VALID BIT(17) +#define I40E_HW_RESTART_AUTONEG BIT(18) + + u32 flags; +#define I40E_FLAG_RX_CSUM_ENABLED BIT(0) +#define I40E_FLAG_MSI_ENABLED BIT(1) +#define I40E_FLAG_MSIX_ENABLED BIT(2) +#define I40E_FLAG_RSS_ENABLED BIT(3) +#define I40E_FLAG_VMDQ_ENABLED BIT(4) +#define I40E_FLAG_FILTER_SYNC BIT(5) +#define I40E_FLAG_SRIOV_ENABLED BIT(6) +#define I40E_FLAG_DCB_CAPABLE BIT(7) +#define I40E_FLAG_DCB_ENABLED BIT(8) +#define I40E_FLAG_FD_SB_ENABLED BIT(9) +#define I40E_FLAG_FD_ATR_ENABLED BIT(10) +#define I40E_FLAG_FD_SB_AUTO_DISABLED BIT(11) +#define I40E_FLAG_FD_ATR_AUTO_DISABLED BIT(12) +#define I40E_FLAG_MFP_ENABLED BIT(13) +#define I40E_FLAG_UDP_FILTER_SYNC BIT(14) +#define I40E_FLAG_HW_ATR_EVICT_ENABLED BIT(15) +#define I40E_FLAG_VEB_MODE_ENABLED BIT(16) +#define I40E_FLAG_VEB_STATS_ENABLED BIT(17) +#define I40E_FLAG_LINK_POLLING_ENABLED BIT(18) +#define I40E_FLAG_TRUE_PROMISC_SUPPORT BIT(19) +#define I40E_FLAG_TEMP_LINK_POLLING BIT(20) +#define I40E_FLAG_LEGACY_RX BIT(21) +#define I40E_FLAG_PTP BIT(22) +#define I40E_FLAG_IWARP_ENABLED BIT(23) +#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT(24) +#define I40E_FLAG_CLIENT_L2_CHANGE BIT(25) +#define I40E_FLAG_CLIENT_RESET BIT(26) +#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT(27) +#define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT(28) +#define I40E_FLAG_TC_MQPRIO BIT(29) +#define I40E_FLAG_FD_SB_INACTIVE BIT(30) +#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER BIT(31) struct i40e_client_instance *cinst; bool stat_offsets_loaded; @@ -530,6 +622,10 @@ struct i40e_pf { u32 ioremap_len; u32 fd_inv; u16 phy_led_val; + + u16 override_q_count; + u16 last_sw_conf_flags; + u16 last_sw_conf_valid_flags; }; /** @@ -673,6 +769,7 @@ struct i40e_vsi { enum i40e_vsi_type type; /* VSI type, e.g., LAN, FCoE, etc */ s16 vf_id; /* Virtual function ID for SRIOV VSIs */ + struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */ struct i40e_tc_configuration tc_config; struct i40e_aqc_vsi_properties_data info; @@ -694,6 +791,17 @@ struct i40e_vsi { bool current_isup; /* Sync 'link up' logging */ enum i40e_aq_link_speed current_speed; /* Sync link speed logging */ + /* channel specific fields */ + u16 cnt_q_avail; /* num of queues available for channel usage */ + u16 orig_rss_size; + u16 current_rss_size; + bool reconfig_rss; + + u16 next_base_queue; /* next queue to be used for channel setup */ + + struct list_head ch_list; + u16 tc_seid_map[I40E_MAX_TRAFFIC_CLASS]; + void *priv; /* client driver data reference. */ /* VSI specific handlers */ @@ -945,9 +1053,6 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector) struct i40e_hw *hw = &pf->hw; u32 val; - /* definitely clear the PBA here, as this function is meant to - * clean out all previous interrupts AND enable the interrupt - */ val = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); @@ -956,7 +1061,7 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector) } void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf); -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba); +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf); int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); int i40e_open(struct net_device *netdev); int i40e_close(struct net_device *netdev); @@ -1001,4 +1106,7 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) { return !!vsi->xdp_prog; } + +int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); +int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate); #endif /* _I40E_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index ba04988e0598..9dcb2a961197 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -607,6 +607,18 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw) &oem_lo); hw->nvm.oem_ver = ((u32)oem_hi << 16) | oem_lo; + if (hw->mac.type == I40E_MAC_XL710 && + hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && + hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) { + hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE; + } + + /* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */ + if (hw->aq.api_maj_ver > 1 || + (hw->aq.api_maj_ver == 1 && + hw->aq.api_min_ver >= 7)) + hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE; + if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) { ret_code = I40E_ERR_FIRMWARE_API_VERSION; goto init_adminq_free_arq; diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 5d5f422cbae5..b0188b8f91ba 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -34,7 +34,15 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0005 +#define I40E_FW_API_VERSION_MINOR_X722 0x0005 +#define I40E_FW_API_VERSION_MINOR_X710 0x0007 + +#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \ + I40E_FW_API_VERSION_MINOR_X710 : \ + I40E_FW_API_VERSION_MINOR_X722) + +/* API version 1.7 implements additional link and PHY-specific APIs */ +#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007 struct i40e_aq_desc { __le16 flags; @@ -236,6 +244,8 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_set_phy_debug = 0x0622, i40e_aqc_opc_upload_ext_phy_fm = 0x0625, i40e_aqc_opc_run_phy_activity = 0x0626, + i40e_aqc_opc_set_phy_register = 0x0628, + i40e_aqc_opc_get_phy_register = 0x0629, /* NVM commands */ i40e_aqc_opc_nvm_read = 0x0701, @@ -765,7 +775,50 @@ struct i40e_aqc_set_switch_config { #define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 __le16 valid_flags; - u8 reserved[12]; + /* The ethertype in switch_tag is dropped on ingress and used + * internally by the switch. Set this to zero for the default + * of 0x88a8 (802.1ad). Should be zero for firmware API + * versions lower than 1.7. + */ + __le16 switch_tag; + /* The ethertypes in first_tag and second_tag are used to + * match the outer and inner VLAN tags (respectively) when HW + * double VLAN tagging is enabled via the set port parameters + * AQ command. Otherwise these are both ignored. Set them to + * zero for their defaults of 0x8100 (802.1Q). Should be zero + * for firmware API versions lower than 1.7. + */ + __le16 first_tag; + __le16 second_tag; + /* Next byte is split into following: + * Bit 7 : 0 : No action, 1: Switch to mode defined by bits 6:0 + * Bit 6 : 0 : Destination Port, 1: source port + * Bit 5..4 : L4 type + * 0: rsvd + * 1: TCP + * 2: UDP + * 3: Both TCP and UDP + * Bits 3:0 Mode + * 0: default mode + * 1: L4 port only mode + * 2: non-tunneled mode + * 3: tunneled mode + */ +#define I40E_AQ_SET_SWITCH_BIT7_VALID 0x80 + +#define I40E_AQ_SET_SWITCH_L4_SRC_PORT 0x40 + +#define I40E_AQ_SET_SWITCH_L4_TYPE_RSVD 0x00 +#define I40E_AQ_SET_SWITCH_L4_TYPE_TCP 0x10 +#define I40E_AQ_SET_SWITCH_L4_TYPE_UDP 0x20 +#define I40E_AQ_SET_SWITCH_L4_TYPE_BOTH 0x30 + +#define I40E_AQ_SET_SWITCH_MODE_DEFAULT 0x00 +#define I40E_AQ_SET_SWITCH_MODE_L4_PORT 0x01 +#define I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL 0x02 +#define I40E_AQ_SET_SWITCH_MODE_TUNNEL 0x03 + u8 mode; + u8 rsvd5[5]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); @@ -1318,14 +1371,16 @@ struct i40e_aqc_add_remove_cloud_filters { #define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT 0 #define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_MASK (0x3FF << \ I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT) - u8 reserved2[4]; + u8 big_buffer_flag; +#define I40E_AQC_ADD_CLOUD_CMD_BB 1 + u8 reserved2[3]; __le32 addr_high; __le32 addr_low; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_cloud_filters); -struct i40e_aqc_add_remove_cloud_filters_element_data { +struct i40e_aqc_cloud_filters_element_data { u8 outer_mac[6]; u8 inner_mac[6]; __le16 inner_vlan; @@ -1337,6 +1392,9 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { struct { u8 data[16]; } v6; + struct { + __le16 data[8]; + } raw_v6; } ipaddr; __le16 flags; #define I40E_AQC_ADD_CLOUD_FILTER_SHIFT 0 @@ -1355,6 +1413,10 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { #define I40E_AQC_ADD_CLOUD_FILTER_IMAC 0x000A #define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC 0x000B #define I40E_AQC_ADD_CLOUD_FILTER_IIP 0x000C +/* 0x0010 to 0x0017 is for custom filters */ +#define I40E_AQC_ADD_CLOUD_FILTER_IP_PORT 0x0010 /* Dest IP + L4 Port */ +#define I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT 0x0011 /* Dest MAC + L4 Port */ +#define I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT 0x0012 /* Dest MAC + VLAN + L4 Port */ #define I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE 0x0080 #define I40E_AQC_ADD_CLOUD_VNK_SHIFT 6 @@ -1389,6 +1451,49 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { u8 response_reserved[7]; }; +I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_cloud_filters_element_data); + +/* i40e_aqc_cloud_filters_element_bb is used when + * I40E_AQC_CLOUD_CMD_BB flag is set. + */ +struct i40e_aqc_cloud_filters_element_bb { + struct i40e_aqc_cloud_filters_element_data element; + u16 general_fields[32]; +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD0 0 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD1 1 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD2 2 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD0 3 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD1 4 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD2 5 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD0 6 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD1 7 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD2 8 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD0 9 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD1 10 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD2 11 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD0 12 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD1 13 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD2 14 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0 15 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD1 16 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD2 17 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD3 18 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD4 19 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD5 20 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD6 21 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD7 22 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD0 23 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD1 24 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD2 25 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD3 26 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD4 27 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD5 28 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD6 29 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD7 30 +}; + +I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_cloud_filters_element_bb); + struct i40e_aqc_remove_cloud_filters_completion { __le16 perfect_ovlan_used; __le16 perfect_ovlan_free; @@ -1400,6 +1505,60 @@ struct i40e_aqc_remove_cloud_filters_completion { I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_cloud_filters_completion); +/* Replace filter Command 0x025F + * uses the i40e_aqc_replace_cloud_filters, + * and the generic indirect completion structure + */ +struct i40e_filter_data { + u8 filter_type; + u8 input[3]; +}; + +I40E_CHECK_STRUCT_LEN(4, i40e_filter_data); + +struct i40e_aqc_replace_cloud_filters_cmd { + u8 valid_flags; +#define I40E_AQC_REPLACE_L1_FILTER 0x0 +#define I40E_AQC_REPLACE_CLOUD_FILTER 0x1 +#define I40E_AQC_GET_CLOUD_FILTERS 0x2 +#define I40E_AQC_MIRROR_CLOUD_FILTER 0x4 +#define I40E_AQC_HIGH_PRIORITY_CLOUD_FILTER 0x8 + u8 old_filter_type; + u8 new_filter_type; + u8 tr_bit; + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_replace_cloud_filters_cmd); + +struct i40e_aqc_replace_cloud_filters_cmd_buf { + u8 data[32]; +/* Filter type INPUT codes*/ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_ENTRIES_MAX 3 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED BIT(7) + +/* Field Vector offsets */ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_MAC_DA 0 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_ETH 6 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG 7 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_VLAN 8 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_OVLAN 9 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_IVLAN 10 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TUNNLE_KEY 11 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IMAC 12 +/* big FLU */ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IP_DA 14 +/* big FLU */ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_OIP_DA 15 + +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_INNER_VLAN 37 + struct i40e_filter_data filters[8]; +}; + +I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_replace_cloud_filters_cmd_buf); + /* Add Mirror Rule (indirect or direct 0x0260) * Delete Mirror Rule (indirect or direct 0x0261) * note: some rule types (4,5) do not use an external buffer. @@ -1726,6 +1885,8 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_10GBASE_CR1_CU = 0xB, I40E_PHY_TYPE_10GBASE_AOC = 0xC, I40E_PHY_TYPE_40GBASE_AOC = 0xD, + I40E_PHY_TYPE_UNRECOGNIZED = 0xE, + I40E_PHY_TYPE_UNSUPPORTED = 0xF, I40E_PHY_TYPE_100BASE_TX = 0x11, I40E_PHY_TYPE_1000BASE_T = 0x12, I40E_PHY_TYPE_10GBASE_T = 0x13, @@ -1744,7 +1905,12 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_25GBASE_CR = 0x20, I40E_PHY_TYPE_25GBASE_SR = 0x21, I40E_PHY_TYPE_25GBASE_LR = 0x22, - I40E_PHY_TYPE_MAX + I40E_PHY_TYPE_25GBASE_AOC = 0x23, + I40E_PHY_TYPE_25GBASE_ACC = 0x24, + I40E_PHY_TYPE_MAX, + I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP = 0xFD, + I40E_PHY_TYPE_EMPTY = 0xFE, + I40E_PHY_TYPE_DEFAULT = 0xFF, }; #define I40E_LINK_SPEED_100MB_SHIFT 0x1 @@ -1801,6 +1967,8 @@ struct i40e_aq_get_phy_abilities_resp { #define I40E_AQ_PHY_TYPE_EXT_25G_CR 0X02 #define I40E_AQ_PHY_TYPE_EXT_25G_SR 0x04 #define I40E_AQ_PHY_TYPE_EXT_25G_LR 0x08 +#define I40E_AQ_PHY_TYPE_EXT_25G_AOC 0x10 +#define I40E_AQ_PHY_TYPE_EXT_25G_ACC 0x20 u8 fec_cfg_curr_mod_ext_info; #define I40E_AQ_ENABLE_FEC_KR 0x01 #define I40E_AQ_ENABLE_FEC_RS 0x02 @@ -1934,19 +2102,31 @@ struct i40e_aqc_get_link_status { #define I40E_AQ_25G_SERDES_UCODE_ERR 0X04 #define I40E_AQ_25G_NIMB_UCODE_ERR 0X05 u8 loopback; /* use defines from i40e_aqc_set_lb_mode */ +/* Since firmware API 1.7 loopback field keeps power class info as well */ +#define I40E_AQ_LOOPBACK_MASK 0x07 +#define I40E_AQ_PWR_CLASS_SHIFT_LB 6 +#define I40E_AQ_PWR_CLASS_MASK_LB (0x03 << I40E_AQ_PWR_CLASS_SHIFT_LB) __le16 max_frame_size; u8 config; #define I40E_AQ_CONFIG_FEC_KR_ENA 0x01 #define I40E_AQ_CONFIG_FEC_RS_ENA 0x02 #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 power_desc; + union { + struct { + u8 power_desc; #define I40E_AQ_LINK_POWER_CLASS_1 0x00 #define I40E_AQ_LINK_POWER_CLASS_2 0x01 #define I40E_AQ_LINK_POWER_CLASS_3 0x02 #define I40E_AQ_LINK_POWER_CLASS_4 0x03 #define I40E_AQ_PWR_CLASS_MASK 0x03 - u8 reserved[4]; + u8 reserved[4]; + }; + struct { + u8 link_type[4]; + u8 link_type_ext; + }; + }; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status); @@ -2029,6 +2209,22 @@ struct i40e_aqc_run_phy_activity { I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity); +/* Set PHY Register command (0x0628) */ +/* Get PHY Register command (0x0629) */ +struct i40e_aqc_phy_register_access { + u8 phy_interface; +#define I40E_AQ_PHY_REG_ACCESS_INTERNAL 0 +#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL 1 +#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE 2 + u8 dev_address; + u8 reserved1[2]; + __le32 reg_address; + __le32 reg_value; + u8 reserved2[4]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access); + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 111426ba5fbc..0203665cb53c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1180,6 +1180,8 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) case I40E_PHY_TYPE_40GBASE_AOC: case I40E_PHY_TYPE_10GBASE_AOC: case I40E_PHY_TYPE_25GBASE_CR: + case I40E_PHY_TYPE_25GBASE_AOC: + case I40E_PHY_TYPE_25GBASE_ACC: media = I40E_MEDIA_TYPE_DA; break; case I40E_PHY_TYPE_1000BASE_KX: @@ -1567,34 +1569,57 @@ i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw, struct i40e_aq_desc desc; i40e_status status; u16 abilities_size = sizeof(struct i40e_aq_get_phy_abilities_resp); + u16 max_delay = I40E_MAX_PHY_TIMEOUT, total_delay = 0; if (!abilities) return I40E_ERR_PARAM; - i40e_fill_default_direct_cmd_desc(&desc, - i40e_aqc_opc_get_phy_abilities); + do { + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_get_phy_abilities); - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF); - if (abilities_size > I40E_AQ_LARGE_BUF) - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF); + if (abilities_size > I40E_AQ_LARGE_BUF) + desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); - if (qualified_modules) - desc.params.external.param0 |= + if (qualified_modules) + desc.params.external.param0 |= cpu_to_le32(I40E_AQ_PHY_REPORT_QUALIFIED_MODULES); - if (report_init) - desc.params.external.param0 |= + if (report_init) + desc.params.external.param0 |= cpu_to_le32(I40E_AQ_PHY_REPORT_INITIAL_VALUES); - status = i40e_asq_send_command(hw, &desc, abilities, abilities_size, - cmd_details); + status = i40e_asq_send_command(hw, &desc, abilities, + abilities_size, cmd_details); - if (hw->aq.asq_last_status == I40E_AQ_RC_EIO) - status = I40E_ERR_UNKNOWN_PHY; + if (status) + break; + + if (hw->aq.asq_last_status == I40E_AQ_RC_EIO) { + status = I40E_ERR_UNKNOWN_PHY; + break; + } else if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN) { + usleep_range(1000, 2000); + total_delay++; + status = I40E_ERR_TIMEOUT; + } + } while ((hw->aq.asq_last_status != I40E_AQ_RC_OK) && + (total_delay < max_delay)); + + if (status) + return status; if (report_init) { - hw->phy.phy_types = le32_to_cpu(abilities->phy_type); - hw->phy.phy_types |= ((u64)abilities->phy_type_ext << 32); + if (hw->mac.type == I40E_MAC_XL710 && + hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && + hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) { + status = i40e_aq_get_link_info(hw, true, NULL, NULL); + } else { + hw->phy.phy_types = le32_to_cpu(abilities->phy_type); + hw->phy.phy_types |= + ((u64)abilities->phy_type_ext << 32); + } } return status; @@ -1819,7 +1844,7 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, hw_link_info->fec_info = resp->config & (I40E_AQ_CONFIG_FEC_KR_ENA | I40E_AQ_CONFIG_FEC_RS_ENA); hw_link_info->ext_info = resp->ext_info; - hw_link_info->loopback = resp->loopback; + hw_link_info->loopback = resp->loopback & I40E_AQ_LOOPBACK_MASK; hw_link_info->max_frame_size = le16_to_cpu(resp->max_frame_size); hw_link_info->pacing = resp->config & I40E_AQ_CONFIG_PACING_MASK; @@ -1850,6 +1875,15 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE) hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU; + if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && + hw->aq.api_min_ver >= 7) { + __le32 tmp; + + memcpy(&tmp, resp->link_type, sizeof(tmp)); + hw->phy.phy_types = le32_to_cpu(tmp); + hw->phy.phy_types |= ((u64)resp->link_type_ext << 32); + } + /* save link status information */ if (link) *link = *hw_link_info; @@ -2373,13 +2407,14 @@ i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw, * @hw: pointer to the hardware structure * @flags: bit flag values to set * @valid_flags: which bit flags to set + * @mode: cloud filter mode * @cmd_details: pointer to command details structure or NULL * * Set switch configuration bits **/ enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw, u16 flags, - u16 valid_flags, + u16 valid_flags, u8 mode, struct i40e_asq_cmd_details *cmd_details) { struct i40e_aq_desc desc; @@ -2391,7 +2426,12 @@ enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw, i40e_aqc_opc_set_switch_config); scfg->flags = cpu_to_le16(flags); scfg->valid_flags = cpu_to_le16(valid_flags); - + scfg->mode = mode; + if (hw->flags & I40E_HW_FLAG_802_1AD_CAPABLE) { + scfg->switch_tag = cpu_to_le16(hw->switch_tag); + scfg->first_tag = cpu_to_le16(hw->first_tag); + scfg->second_tag = cpu_to_le16(hw->second_tag); + } status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); return status; @@ -4826,6 +4866,74 @@ phy_blinking_end: } /** + * i40e_led_get_reg - read LED register + * @hw: pointer to the HW structure + * @led_addr: LED register address + * @reg_val: read register value + **/ +static enum i40e_status_code i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr, + u32 *reg_val) +{ + enum i40e_status_code status; + u8 phy_addr = 0; + u8 port_num; + u32 i; + + *reg_val = 0; + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + status = + i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL, + I40E_PHY_COM_REG_PAGE, + I40E_PHY_LED_PROV_REG_1, + reg_val, NULL); + } else { + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + status = i40e_read_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + (u16 *)reg_val); + } + return status; +} + +/** + * i40e_led_set_reg - write LED register + * @hw: pointer to the HW structure + * @led_addr: LED register address + * @reg_val: register value to write + **/ +static enum i40e_status_code i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr, + u32 reg_val) +{ + enum i40e_status_code status; + u8 phy_addr = 0; + u8 port_num; + u32 i; + + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + status = + i40e_aq_set_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL, + I40E_PHY_COM_REG_PAGE, + I40E_PHY_LED_PROV_REG_1, + reg_val, NULL); + } else { + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + status = i40e_write_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + (u16)reg_val); + } + + return status; +} + +/** * i40e_led_get_phy - return current on/off mode * @hw: pointer to the hw struct * @led_addr: address of led register to use @@ -4842,7 +4950,19 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, u16 temp_addr; u8 port_num; u32 i; - + u32 reg_val_aq; + + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + status = + i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL, + I40E_PHY_COM_REG_PAGE, + I40E_PHY_LED_PROV_REG_1, + ®_val_aq, NULL); + if (status == I40E_SUCCESS) + *val = (u16)reg_val_aq; + return status; + } temp_addr = I40E_PHY_LED_PROV_REG_1; i = rd32(hw, I40E_PFGEN_PORTNUM); port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); @@ -4877,51 +4997,38 @@ i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on, u16 led_addr, u32 mode) { i40e_status status = 0; - u16 led_ctl = 0; - u16 led_reg = 0; - u8 phy_addr = 0; - u8 port_num; - u32 i; + u32 led_ctl = 0; + u32 led_reg = 0; - i = rd32(hw, I40E_PFGEN_PORTNUM); - port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); - phy_addr = i40e_get_phy_address(hw, port_num); - status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, &led_reg); + status = i40e_led_get_reg(hw, led_addr, &led_reg); if (status) return status; led_ctl = led_reg; if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { led_reg = 0; - status = i40e_write_phy_register_clause45(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, - led_reg); + status = i40e_led_set_reg(hw, led_addr, led_reg); if (status) return status; } - status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, &led_reg); + status = i40e_led_get_reg(hw, led_addr, &led_reg); if (status) goto restore_config; if (on) led_reg = I40E_PHY_LED_MANUAL_ON; else led_reg = 0; - status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_reg); + + status = i40e_led_set_reg(hw, led_addr, led_reg); if (status) goto restore_config; if (mode & I40E_PHY_LED_MODE_ORIG) { led_ctl = (mode & I40E_PHY_LED_MODE_MASK); - status = i40e_write_phy_register_clause45(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_ctl); + status = i40e_led_set_reg(hw, led_addr, led_ctl); } return status; + restore_config: - status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_ctl); + status = i40e_led_set_reg(hw, led_addr, led_ctl); return status; } @@ -5052,6 +5159,75 @@ do_retry: } /** + * i40e_aq_set_phy_register + * @hw: pointer to the hw struct + * @phy_select: select which phy should be accessed + * @dev_addr: PHY device address + * @reg_addr: PHY register address + * @reg_val: new register value + * @cmd_details: pointer to command details structure or NULL + * + * Write the external PHY register. + **/ +i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_phy_register_access *cmd = + (struct i40e_aqc_phy_register_access *)&desc.params.raw; + i40e_status status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_phy_register); + + cmd->phy_interface = phy_select; + cmd->dev_address = dev_addr; + cmd->reg_address = cpu_to_le32(reg_addr); + cmd->reg_value = cpu_to_le32(reg_val); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40e_aq_get_phy_register + * @hw: pointer to the hw struct + * @phy_select: select which phy should be accessed + * @dev_addr: PHY device address + * @reg_addr: PHY register address + * @reg_val: read register value + * @cmd_details: pointer to command details structure or NULL + * + * Read the external PHY register. + **/ +i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_phy_register_access *cmd = + (struct i40e_aqc_phy_register_access *)&desc.params.raw; + i40e_status status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_get_phy_register); + + cmd->phy_interface = phy_select; + cmd->dev_address = dev_addr; + cmd->reg_address = cpu_to_le32(reg_addr); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + if (!status) + *reg_val = le32_to_cpu(cmd->reg_value); + + return status; +} + +/** * i40e_aq_write_ppp - Write pipeline personalization profile (ppp) * @hw: pointer to the hw struct * @buff: command buffer (size in bytes = buff_size) @@ -5260,5 +5436,194 @@ i40e_add_pinfo_to_list(struct i40e_hw *hw, status = i40e_aq_write_ppp(hw, (void *)sec, sec->data_end, track_id, &offset, &info, NULL); + + return status; +} + +/** + * i40e_aq_add_cloud_filters + * @hw: pointer to the hardware structure + * @seid: VSI seid to add cloud filters from + * @filters: Buffer which contains the filters to be added + * @filter_count: number of filters contained in the buffer + * + * Set the cloud filters for a given VSI. The contents of the + * i40e_aqc_cloud_filters_element_data are filled in by the caller + * of the function. + * + **/ +enum i40e_status_code +i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_cloud_filters_element_data *filters, + u8 filter_count) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_add_remove_cloud_filters *cmd = + (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw; + enum i40e_status_code status; + u16 buff_len; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_add_cloud_filters); + + buff_len = filter_count * sizeof(*filters); + desc.datalen = cpu_to_le16(buff_len); + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + cmd->num_filters = filter_count; + cmd->seid = cpu_to_le16(seid); + + status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL); + + return status; +} + +/** + * i40e_aq_add_cloud_filters_bb + * @hw: pointer to the hardware structure + * @seid: VSI seid to add cloud filters from + * @filters: Buffer which contains the filters in big buffer to be added + * @filter_count: number of filters contained in the buffer + * + * Set the big buffer cloud filters for a given VSI. The contents of the + * i40e_aqc_cloud_filters_element_bb are filled in by the caller of the + * function. + * + **/ +i40e_status +i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_cloud_filters_element_bb *filters, + u8 filter_count) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_add_remove_cloud_filters *cmd = + (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw; + i40e_status status; + u16 buff_len; + int i; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_add_cloud_filters); + + buff_len = filter_count * sizeof(*filters); + desc.datalen = cpu_to_le16(buff_len); + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + cmd->num_filters = filter_count; + cmd->seid = cpu_to_le16(seid); + cmd->big_buffer_flag = I40E_AQC_ADD_CLOUD_CMD_BB; + + for (i = 0; i < filter_count; i++) { + u16 tnl_type; + u32 ti; + + tnl_type = (le16_to_cpu(filters[i].element.flags) & + I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >> + I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT; + + /* Due to hardware eccentricities, the VNI for Geneve is shifted + * one more byte further than normally used for Tenant ID in + * other tunnel types. + */ + if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) { + ti = le32_to_cpu(filters[i].element.tenant_id); + filters[i].element.tenant_id = cpu_to_le32(ti << 8); + } + } + + status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL); + + return status; +} + +/** + * i40e_aq_rem_cloud_filters + * @hw: pointer to the hardware structure + * @seid: VSI seid to remove cloud filters from + * @filters: Buffer which contains the filters to be removed + * @filter_count: number of filters contained in the buffer + * + * Remove the cloud filters for a given VSI. The contents of the + * i40e_aqc_cloud_filters_element_data are filled in by the caller + * of the function. + * + **/ +enum i40e_status_code +i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_cloud_filters_element_data *filters, + u8 filter_count) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_add_remove_cloud_filters *cmd = + (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw; + enum i40e_status_code status; + u16 buff_len; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_remove_cloud_filters); + + buff_len = filter_count * sizeof(*filters); + desc.datalen = cpu_to_le16(buff_len); + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + cmd->num_filters = filter_count; + cmd->seid = cpu_to_le16(seid); + + status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL); + + return status; +} + +/** + * i40e_aq_rem_cloud_filters_bb + * @hw: pointer to the hardware structure + * @seid: VSI seid to remove cloud filters from + * @filters: Buffer which contains the filters in big buffer to be removed + * @filter_count: number of filters contained in the buffer + * + * Remove the big buffer cloud filters for a given VSI. The contents of the + * i40e_aqc_cloud_filters_element_bb are filled in by the caller of the + * function. + * + **/ +i40e_status +i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_cloud_filters_element_bb *filters, + u8 filter_count) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_add_remove_cloud_filters *cmd = + (struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw; + i40e_status status; + u16 buff_len; + int i; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_remove_cloud_filters); + + buff_len = filter_count * sizeof(*filters); + desc.datalen = cpu_to_le16(buff_len); + desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD)); + cmd->num_filters = filter_count; + cmd->seid = cpu_to_le16(seid); + cmd->big_buffer_flag = I40E_AQC_ADD_CLOUD_CMD_BB; + + for (i = 0; i < filter_count; i++) { + u16 tnl_type; + u32 ti; + + tnl_type = (le16_to_cpu(filters[i].element.flags) & + I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >> + I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT; + + /* Due to hardware eccentricities, the VNI for Geneve is shifted + * one more byte further than normally used for Tenant ID in + * other tunnel types. + */ + if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) { + ti = le32_to_cpu(filters[i].element.tenant_id); + filters[i].element.tenant_id = cpu_to_le32(ti << 8); + } + } + + status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL); + return status; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 2cb9539c931e..4c3b4243cf65 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -278,8 +278,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->netdev, rx_ring->rx_bi); dev_info(&pf->pdev->dev, - " rx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n", - i, rx_ring->state, + " rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n", + i, *rx_ring->state, rx_ring->queue_index, rx_ring->reg_idx); dev_info(&pf->pdev->dev, @@ -334,8 +334,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) tx_ring->netdev, tx_ring->tx_bi); dev_info(&pf->pdev->dev, - " tx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n", - i, tx_ring->state, + " tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n", + i, *tx_ring->state, tx_ring->queue_index, tx_ring->reg_idx); dev_info(&pf->pdev->dev, @@ -798,8 +798,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, */ if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset_safe(pf, - BIT_ULL(__I40E_PF_RESET_REQUESTED)); + i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); } vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0); diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c index f141e78d409e..76ed56641864 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_diag.c +++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c @@ -36,7 +36,9 @@ static i40e_status i40e_diag_reg_pattern_test(struct i40e_hw *hw, u32 reg, u32 mask) { - const u32 patterns[] = {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; + static const u32 patterns[] = { + 0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF + }; u32 pat, val, orig_val; int i; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index e9e04a485e0a..5f6cf7212d4f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -227,6 +227,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = { I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0), I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0), I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0), + I40E_PRIV_FLAG("disable-source-pruning", + I40E_FLAG_SOURCE_PRUNING_DISABLED, 0), }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags) @@ -251,428 +253,557 @@ static void i40e_partition_setting_complaint(struct i40e_pf *pf) /** * i40e_phy_type_to_ethtool - convert the phy_types to ethtool link modes - * @phy_types: PHY types to convert - * @supported: pointer to the ethtool supported variable to fill in - * @advertising: pointer to the ethtool advertising variable to fill in + * @pf: PF struct with phy_types + * @ks: ethtool link ksettings struct to fill out * **/ -static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported, - u32 *advertising) +static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, + struct ethtool_link_ksettings *ks) { struct i40e_link_status *hw_link_info = &pf->hw.phy.link_info; u64 phy_types = pf->hw.phy.phy_types; - *supported = 0x0; - *advertising = 0x0; + ethtool_link_ksettings_zero_link_mode(ks, supported); + ethtool_link_ksettings_zero_link_mode(ks, advertising); if (phy_types & I40E_CAP_PHY_TYPE_SGMII) { - *supported |= SUPPORTED_Autoneg | - SUPPORTED_1000baseT_Full; - *advertising |= ADVERTISED_Autoneg; + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - *advertising |= ADVERTISED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseT_Full); if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) { - *supported |= SUPPORTED_100baseT_Full; - *advertising |= ADVERTISED_100baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 100baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100baseT_Full); } } if (phy_types & I40E_CAP_PHY_TYPE_XAUI || phy_types & I40E_CAP_PHY_TYPE_XFI || phy_types & I40E_CAP_PHY_TYPE_SFI || phy_types & I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU || - phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC) - *supported |= SUPPORTED_10000baseT_Full; - if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU || - phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 || - phy_types & I40E_CAP_PHY_TYPE_10GBASE_T || - phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR || - phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) { - *supported |= SUPPORTED_Autoneg | - SUPPORTED_10000baseT_Full; - *advertising |= ADVERTISED_Autoneg; + phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_T) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) - *advertising |= ADVERTISED_10000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); } if (phy_types & I40E_CAP_PHY_TYPE_XLAUI || phy_types & I40E_CAP_PHY_TYPE_XLPPI || phy_types & I40E_CAP_PHY_TYPE_40GBASE_AOC) - *supported |= SUPPORTED_40000baseCR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseCR4_Full); if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU || phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4) { - *supported |= SUPPORTED_Autoneg | - SUPPORTED_40000baseCR4_Full; - *advertising |= ADVERTISED_Autoneg; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseCR4_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_40GB) - *advertising |= ADVERTISED_40000baseCR4_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 40000baseCR4_Full); } if (phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) { - *supported |= SUPPORTED_Autoneg | - SUPPORTED_100baseT_Full; - *advertising |= ADVERTISED_Autoneg; + ethtool_link_ksettings_add_link_mode(ks, supported, + 100baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB) - *advertising |= ADVERTISED_100baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100baseT_Full); } - if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T || - phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX || - phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX || - phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) { - *supported |= SUPPORTED_Autoneg | - SUPPORTED_1000baseT_Full; - *advertising |= ADVERTISED_Autoneg; + if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - *advertising |= ADVERTISED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseT_Full); } if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4) - *supported |= SUPPORTED_40000baseSR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseSR4_Full); if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4) - *supported |= SUPPORTED_40000baseLR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseLR4_Full); if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) { - *supported |= SUPPORTED_40000baseKR4_Full | - SUPPORTED_Autoneg; - *advertising |= ADVERTISED_40000baseKR4_Full | - ADVERTISED_Autoneg; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseLR4_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 40000baseLR4_Full); } if (phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) { - *supported |= SUPPORTED_20000baseKR2_Full | - SUPPORTED_Autoneg; - *advertising |= ADVERTISED_Autoneg; + ethtool_link_ksettings_add_link_mode(ks, supported, + 20000baseKR2_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_20GB) - *advertising |= ADVERTISED_20000baseKR2_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 20000baseKR2_Full); } - if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR) { - if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) - *supported |= SUPPORTED_10000baseKR_Full | - SUPPORTED_Autoneg; - *advertising |= ADVERTISED_Autoneg; + if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseKX4_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) - if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) - *advertising |= ADVERTISED_10000baseKR_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseKX4_Full); } - if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) { - *supported |= SUPPORTED_10000baseKX4_Full | - SUPPORTED_Autoneg; - *advertising |= ADVERTISED_Autoneg; + if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR && + !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseKR_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) - *advertising |= ADVERTISED_10000baseKX4_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseKR_Full); } - if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX) { - if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) - *supported |= SUPPORTED_1000baseKX_Full | - SUPPORTED_Autoneg; - *advertising |= ADVERTISED_Autoneg; + if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX && + !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseKX_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) - *advertising |= ADVERTISED_1000baseKX_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseKX_Full); } - if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR || - phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR || - phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR || + /* need to add 25G PHY types */ + if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseKR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseKR_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseCR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseCR_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR || phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR) { - *supported |= SUPPORTED_Autoneg; - *advertising |= ADVERTISED_Autoneg; + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseSR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseSR_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_AOC || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_ACC) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseCR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseCR_Full); + } + /* need to add new 10G PHY types */ + if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseCR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseCR_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseSR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseSR_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseLR_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseLR_Full); + } + if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) { + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseX_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseX_Full); + } + /* Autoneg PHY types */ + if (phy_types & I40E_CAP_PHY_TYPE_SGMII || + phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4 || + phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU || + phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4 || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR || + phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR || + phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2 || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_T || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4 || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU || + phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_T || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX || + phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX || + phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) { + ethtool_link_ksettings_add_link_mode(ks, supported, + Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + Autoneg); } } /** * i40e_get_settings_link_up - Get the Link settings for when link is up * @hw: hw structure - * @ecmd: ethtool command to fill in + * @ks: ethtool ksettings to fill in * @netdev: network interface device structure - * + * @pf: pointer to physical function struct **/ static void i40e_get_settings_link_up(struct i40e_hw *hw, - struct ethtool_link_ksettings *cmd, + struct ethtool_link_ksettings *ks, struct net_device *netdev, struct i40e_pf *pf) { struct i40e_link_status *hw_link_info = &hw->phy.link_info; + struct ethtool_link_ksettings cap_ksettings; u32 link_speed = hw_link_info->link_speed; - u32 e_advertising = 0x0; - u32 e_supported = 0x0; - u32 supported, advertising; - - ethtool_convert_link_mode_to_legacy_u32(&supported, - cmd->link_modes.supported); - ethtool_convert_link_mode_to_legacy_u32(&advertising, - cmd->link_modes.advertising); /* Initialize supported and advertised settings based on phy settings */ switch (hw_link_info->phy_type) { case I40E_PHY_TYPE_40GBASE_CR4: case I40E_PHY_TYPE_40GBASE_CR4_CU: - supported = SUPPORTED_Autoneg | - SUPPORTED_40000baseCR4_Full; - advertising = ADVERTISED_Autoneg | - ADVERTISED_40000baseCR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseCR4_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 40000baseCR4_Full); break; case I40E_PHY_TYPE_XLAUI: case I40E_PHY_TYPE_XLPPI: case I40E_PHY_TYPE_40GBASE_AOC: - supported = SUPPORTED_40000baseCR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseCR4_Full); break; case I40E_PHY_TYPE_40GBASE_SR4: - supported = SUPPORTED_40000baseSR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseSR4_Full); break; case I40E_PHY_TYPE_40GBASE_LR4: - supported = SUPPORTED_40000baseLR4_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseLR4_Full); break; + case I40E_PHY_TYPE_25GBASE_SR: + case I40E_PHY_TYPE_25GBASE_LR: case I40E_PHY_TYPE_10GBASE_SR: case I40E_PHY_TYPE_10GBASE_LR: case I40E_PHY_TYPE_1000BASE_SX: case I40E_PHY_TYPE_1000BASE_LX: - supported = SUPPORTED_10000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseSR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseLR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseLR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseX_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseX_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); if (hw_link_info->module_type[2] & I40E_MODULE_TYPE_1000BASE_SX || hw_link_info->module_type[2] & I40E_MODULE_TYPE_1000BASE_LX) { - supported |= SUPPORTED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - advertising |= ADVERTISED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode( + ks, advertising, 1000baseT_Full); } if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) - advertising |= ADVERTISED_10000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); break; case I40E_PHY_TYPE_10GBASE_T: case I40E_PHY_TYPE_1000BASE_T: case I40E_PHY_TYPE_100BASE_TX: - supported = SUPPORTED_Autoneg | - SUPPORTED_10000baseT_Full | - SUPPORTED_1000baseT_Full | - SUPPORTED_100baseT_Full; - advertising = ADVERTISED_Autoneg; + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 100baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) - advertising |= ADVERTISED_10000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - advertising |= ADVERTISED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB) - advertising |= ADVERTISED_100baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 100baseT_Full); break; case I40E_PHY_TYPE_1000BASE_T_OPTICAL: - supported = SUPPORTED_Autoneg | - SUPPORTED_1000baseT_Full; - advertising = ADVERTISED_Autoneg | - ADVERTISED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseT_Full); break; case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: - supported = SUPPORTED_Autoneg | - SUPPORTED_10000baseT_Full; - advertising = ADVERTISED_Autoneg | - ADVERTISED_10000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); break; case I40E_PHY_TYPE_XAUI: case I40E_PHY_TYPE_XFI: case I40E_PHY_TYPE_SFI: case I40E_PHY_TYPE_10GBASE_SFPP_CU: case I40E_PHY_TYPE_10GBASE_AOC: - supported = SUPPORTED_10000baseT_Full; - advertising = SUPPORTED_10000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseT_Full); + if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB) + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseT_Full); break; case I40E_PHY_TYPE_SGMII: - supported = SUPPORTED_Autoneg | - SUPPORTED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) - advertising |= ADVERTISED_1000baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseT_Full); if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) { - supported |= SUPPORTED_100baseT_Full; + ethtool_link_ksettings_add_link_mode(ks, supported, + 100baseT_Full); if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB) - advertising |= ADVERTISED_100baseT_Full; + ethtool_link_ksettings_add_link_mode( + ks, advertising, 100baseT_Full); } break; case I40E_PHY_TYPE_40GBASE_KR4: + case I40E_PHY_TYPE_25GBASE_KR: case I40E_PHY_TYPE_20GBASE_KR2: case I40E_PHY_TYPE_10GBASE_KR: case I40E_PHY_TYPE_10GBASE_KX4: case I40E_PHY_TYPE_1000BASE_KX: - supported |= SUPPORTED_40000baseKR4_Full | - SUPPORTED_20000baseKR2_Full | - SUPPORTED_10000baseKR_Full | - SUPPORTED_10000baseKX4_Full | - SUPPORTED_1000baseKX_Full | - SUPPORTED_Autoneg; - advertising |= ADVERTISED_40000baseKR4_Full | - ADVERTISED_20000baseKR2_Full | - ADVERTISED_10000baseKR_Full | - ADVERTISED_10000baseKX4_Full | - ADVERTISED_1000baseKX_Full | - ADVERTISED_Autoneg; + ethtool_link_ksettings_add_link_mode(ks, supported, + 40000baseKR4_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 20000baseKR2_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseKX4_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 1000baseKX_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 40000baseKR4_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 20000baseKR2_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseKR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseKX4_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 1000baseKX_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); break; - case I40E_PHY_TYPE_25GBASE_KR: case I40E_PHY_TYPE_25GBASE_CR: - case I40E_PHY_TYPE_25GBASE_SR: - case I40E_PHY_TYPE_25GBASE_LR: - supported = SUPPORTED_Autoneg; - advertising = ADVERTISED_Autoneg; - /* TODO: add speeds when ethtool is ready to support*/ + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseCR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseCR_Full); + break; + case I40E_PHY_TYPE_25GBASE_AOC: + case I40E_PHY_TYPE_25GBASE_ACC: + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, + 25000baseCR_Full); + + ethtool_link_ksettings_add_link_mode(ks, advertising, + 25000baseCR_Full); + ethtool_link_ksettings_add_link_mode(ks, supported, + 10000baseCR_Full); + ethtool_link_ksettings_add_link_mode(ks, advertising, + 10000baseCR_Full); break; default: /* if we got here and link is up something bad is afoot */ - netdev_info(netdev, "WARNING: Link is up but PHY type 0x%x is not recognized.\n", + netdev_info(netdev, + "WARNING: Link is up but PHY type 0x%x is not recognized.\n", hw_link_info->phy_type); } /* Now that we've worked out everything that could be supported by the - * current PHY type, get what is supported by the NVM and them to - * get what is truly supported + * current PHY type, get what is supported by the NVM and intersect + * them to get what is truly supported */ - i40e_phy_type_to_ethtool(pf, &e_supported, - &e_advertising); - - supported = supported & e_supported; - advertising = advertising & e_advertising; + memset(&cap_ksettings, 0, sizeof(struct ethtool_link_ksettings)); + i40e_phy_type_to_ethtool(pf, &cap_ksettings); + ethtool_intersect_link_masks(ks, &cap_ksettings); /* Set speed and duplex */ switch (link_speed) { case I40E_LINK_SPEED_40GB: - cmd->base.speed = SPEED_40000; + ks->base.speed = SPEED_40000; break; case I40E_LINK_SPEED_25GB: -#ifdef SPEED_25000 - cmd->base.speed = SPEED_25000; -#else - netdev_info(netdev, - "Speed is 25G, display not supported by this version of ethtool.\n"); -#endif + ks->base.speed = SPEED_25000; break; case I40E_LINK_SPEED_20GB: - cmd->base.speed = SPEED_20000; + ks->base.speed = SPEED_20000; break; case I40E_LINK_SPEED_10GB: - cmd->base.speed = SPEED_10000; + ks->base.speed = SPEED_10000; break; case I40E_LINK_SPEED_1GB: - cmd->base.speed = SPEED_1000; + ks->base.speed = SPEED_1000; break; case I40E_LINK_SPEED_100MB: - cmd->base.speed = SPEED_100; + ks->base.speed = SPEED_100; break; default: break; } - cmd->base.duplex = DUPLEX_FULL; - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, - advertising); + ks->base.duplex = DUPLEX_FULL; } /** * i40e_get_settings_link_down - Get the Link settings for when link is down * @hw: hw structure - * @ecmd: ethtool command to fill in + * @ks: ethtool ksettings to fill in + * @pf: pointer to physical function struct * * Reports link settings that can be determined when link is down **/ static void i40e_get_settings_link_down(struct i40e_hw *hw, - struct ethtool_link_ksettings *cmd, + struct ethtool_link_ksettings *ks, struct i40e_pf *pf) { - u32 supported, advertising; - /* link is down and the driver needs to fall back on * supported phy types to figure out what info to display */ - i40e_phy_type_to_ethtool(pf, &supported, &advertising); - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, - advertising); + i40e_phy_type_to_ethtool(pf, ks); /* With no link speed and duplex are unknown */ - cmd->base.speed = SPEED_UNKNOWN; - cmd->base.duplex = DUPLEX_UNKNOWN; + ks->base.speed = SPEED_UNKNOWN; + ks->base.duplex = DUPLEX_UNKNOWN; } /** - * i40e_get_settings - Get Link Speed and Duplex settings + * i40e_get_link_ksettings - Get Link Speed and Duplex settings * @netdev: network interface device structure - * @ecmd: ethtool command + * @ks: ethtool ksettings * * Reports speed/duplex settings based on media_type **/ static int i40e_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd) + struct ethtool_link_ksettings *ks) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_pf *pf = np->vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_link_status *hw_link_info = &hw->phy.link_info; bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP; - u32 advertising; + + ethtool_link_ksettings_zero_link_mode(ks, supported); + ethtool_link_ksettings_zero_link_mode(ks, advertising); if (link_up) - i40e_get_settings_link_up(hw, cmd, netdev, pf); + i40e_get_settings_link_up(hw, ks, netdev, pf); else - i40e_get_settings_link_down(hw, cmd, pf); + i40e_get_settings_link_down(hw, ks, pf); /* Now set the settings that don't rely on link being up/down */ /* Set autoneg settings */ - cmd->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ? - AUTONEG_ENABLE : AUTONEG_DISABLE); + ks->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ? + AUTONEG_ENABLE : AUTONEG_DISABLE); + /* Set media type settings */ switch (hw->phy.media_type) { case I40E_MEDIA_TYPE_BACKPLANE: - ethtool_link_ksettings_add_link_mode(cmd, supported, - Autoneg); - ethtool_link_ksettings_add_link_mode(cmd, supported, - Backplane); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - Autoneg); - ethtool_link_ksettings_add_link_mode(cmd, advertising, + ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, supported, Backplane); + ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg); + ethtool_link_ksettings_add_link_mode(ks, advertising, Backplane); - cmd->base.port = PORT_NONE; + ks->base.port = PORT_NONE; break; case I40E_MEDIA_TYPE_BASET: - ethtool_link_ksettings_add_link_mode(cmd, supported, TP); - ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); - cmd->base.port = PORT_TP; + ethtool_link_ksettings_add_link_mode(ks, supported, TP); + ethtool_link_ksettings_add_link_mode(ks, advertising, TP); + ks->base.port = PORT_TP; break; case I40E_MEDIA_TYPE_DA: case I40E_MEDIA_TYPE_CX4: - ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); - ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); - cmd->base.port = PORT_DA; + ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE); + ks->base.port = PORT_DA; break; case I40E_MEDIA_TYPE_FIBER: - ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); - cmd->base.port = PORT_FIBRE; + ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE); + ks->base.port = PORT_FIBRE; break; case I40E_MEDIA_TYPE_UNKNOWN: default: - cmd->base.port = PORT_OTHER; + ks->base.port = PORT_OTHER; break; } /* Set flow control settings */ - ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(ks, supported, Pause); switch (hw->fc.requested_mode) { case I40E_FC_FULL: - ethtool_link_ksettings_add_link_mode(cmd, advertising, - Pause); + ethtool_link_ksettings_add_link_mode(ks, advertising, Pause); break; case I40E_FC_TX_PAUSE: - ethtool_link_ksettings_add_link_mode(cmd, advertising, + ethtool_link_ksettings_add_link_mode(ks, advertising, Asym_Pause); break; case I40E_FC_RX_PAUSE: - ethtool_link_ksettings_add_link_mode(cmd, advertising, - Pause); - ethtool_link_ksettings_add_link_mode(cmd, advertising, + ethtool_link_ksettings_add_link_mode(ks, advertising, Pause); + ethtool_link_ksettings_add_link_mode(ks, advertising, Asym_Pause); break; default: - ethtool_convert_link_mode_to_legacy_u32( - &advertising, cmd->link_modes.advertising); - - advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause); - - ethtool_convert_legacy_u32_to_link_mode( - cmd->link_modes.advertising, advertising); + ethtool_link_ksettings_del_link_mode(ks, advertising, Pause); + ethtool_link_ksettings_del_link_mode(ks, advertising, + Asym_Pause); break; } @@ -680,30 +811,28 @@ static int i40e_get_link_ksettings(struct net_device *netdev, } /** - * i40e_set_settings - Set Speed and Duplex + * i40e_set_link_ksettings - Set Speed and Duplex * @netdev: network interface device structure - * @ecmd: ethtool command + * @ks: ethtool ksettings * * Set speed/duplex per media_types advertised/forced **/ static int i40e_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *cmd) + const struct ethtool_link_ksettings *ks) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_aq_get_phy_abilities_resp abilities; + struct ethtool_link_ksettings safe_ks; + struct ethtool_link_ksettings copy_ks; struct i40e_aq_set_phy_config config; struct i40e_pf *pf = np->vsi->back; struct i40e_vsi *vsi = np->vsi; struct i40e_hw *hw = &pf->hw; - struct ethtool_link_ksettings safe_cmd; - struct ethtool_link_ksettings copy_cmd; + bool autoneg_changed = false; i40e_status status = 0; - bool change = false; int timeout = 50; int err = 0; - u32 autoneg; - u32 advertise; - u32 tmp; + u8 autoneg; /* Changing port settings is not supported if this isn't the * port's controlling PF @@ -712,17 +841,14 @@ static int i40e_set_link_ksettings(struct net_device *netdev, i40e_partition_setting_complaint(pf); return -EOPNOTSUPP; } - if (vsi != pf->vsi[pf->lan_vsi]) return -EOPNOTSUPP; - if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET && hw->phy.media_type != I40E_MEDIA_TYPE_FIBER && hw->phy.media_type != I40E_MEDIA_TYPE_BACKPLANE && hw->phy.media_type != I40E_MEDIA_TYPE_DA && hw->phy.link_info.link_info & I40E_AQ_LINK_UP) return -EOPNOTSUPP; - if (hw->device_id == I40E_DEV_ID_KX_B || hw->device_id == I40E_DEV_ID_KX_C || hw->device_id == I40E_DEV_ID_20G_KR2 || @@ -731,31 +857,37 @@ static int i40e_set_link_ksettings(struct net_device *netdev, return -EOPNOTSUPP; } - /* copy the cmd to copy_cmd to avoid modifying the origin */ - memcpy(©_cmd, cmd, sizeof(struct ethtool_link_ksettings)); + /* copy the ksettings to copy_ks to avoid modifying the origin */ + memcpy(©_ks, ks, sizeof(struct ethtool_link_ksettings)); - /* get our own copy of the bits to check against */ - memset(&safe_cmd, 0, sizeof(struct ethtool_link_ksettings)); - i40e_get_link_ksettings(netdev, &safe_cmd); + /* save autoneg out of ksettings */ + autoneg = copy_ks.base.autoneg; - /* save autoneg and speed out of cmd */ - autoneg = cmd->base.autoneg; - ethtool_convert_link_mode_to_legacy_u32(&advertise, - cmd->link_modes.advertising); + memset(&safe_ks, 0, sizeof(safe_ks)); + /* Get link modes supported by hardware and check against modes + * requested by the user. Return an error if unsupported mode was set. + */ + i40e_phy_type_to_ethtool(pf, &safe_ks); + if (!bitmap_subset(copy_ks.link_modes.advertising, + safe_ks.link_modes.supported, + __ETHTOOL_LINK_MODE_MASK_NBITS)) + return -EINVAL; - /* set autoneg and speed back to what they currently are */ - copy_cmd.base.autoneg = safe_cmd.base.autoneg; - ethtool_convert_link_mode_to_legacy_u32( - &tmp, safe_cmd.link_modes.advertising); - ethtool_convert_legacy_u32_to_link_mode( - copy_cmd.link_modes.advertising, tmp); + /* get our own copy of the bits to check against */ + memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings)); + safe_ks.base.cmd = copy_ks.base.cmd; + safe_ks.base.link_mode_masks_nwords = + copy_ks.base.link_mode_masks_nwords; + i40e_get_link_ksettings(netdev, &safe_ks); - copy_cmd.base.cmd = safe_cmd.base.cmd; + /* set autoneg back to what it currently is */ + copy_ks.base.autoneg = safe_ks.base.autoneg; - /* If copy_cmd and safe_cmd are not the same now, then they are - * trying to set something that we do not support + /* If copy_ks.base and safe_ks.base are not the same now, then they are + * trying to set something that we do not support. */ - if (memcmp(©_cmd, &safe_cmd, sizeof(struct ethtool_link_ksettings))) + if (memcmp(©_ks.base, &safe_ks.base, + sizeof(struct ethtool_link_settings))) return -EOPNOTSUPP; while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) { @@ -784,8 +916,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* If autoneg was not already enabled */ if (!(hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED)) { /* If autoneg is not supported, return error */ - if (!ethtool_link_ksettings_test_link_mode( - &safe_cmd, supported, Autoneg)) { + if (!ethtool_link_ksettings_test_link_mode(&safe_ks, + supported, + Autoneg)) { netdev_info(netdev, "Autoneg not supported on this phy\n"); err = -EINVAL; goto done; @@ -793,7 +926,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* Autoneg is allowed to change */ config.abilities = abilities.abilities | I40E_AQ_PHY_ENABLE_AN; - change = true; + autoneg_changed = true; } } else { /* If autoneg is currently enabled */ @@ -801,8 +934,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* If autoneg is supported 10GBASE_T is the only PHY * that can disable it, so otherwise return error */ - if (ethtool_link_ksettings_test_link_mode( - &safe_cmd, supported, Autoneg) && + if (ethtool_link_ksettings_test_link_mode(&safe_ks, + supported, + Autoneg) && hw->phy.link_info.phy_type != I40E_PHY_TYPE_10GBASE_T) { netdev_info(netdev, "Autoneg cannot be disabled on this phy\n"); @@ -812,32 +946,49 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* Autoneg is allowed to change */ config.abilities = abilities.abilities & ~I40E_AQ_PHY_ENABLE_AN; - change = true; + autoneg_changed = true; } } - ethtool_convert_link_mode_to_legacy_u32(&tmp, - safe_cmd.link_modes.supported); - if (advertise & ~tmp) { - err = -EINVAL; - goto done; - } - - if (advertise & ADVERTISED_100baseT_Full) + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 100baseT_Full)) config.link_speed |= I40E_LINK_SPEED_100MB; - if (advertise & ADVERTISED_1000baseT_Full || - advertise & ADVERTISED_1000baseKX_Full) + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseT_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseX_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 1000baseKX_Full)) config.link_speed |= I40E_LINK_SPEED_1GB; - if (advertise & ADVERTISED_10000baseT_Full || - advertise & ADVERTISED_10000baseKX4_Full || - advertise & ADVERTISED_10000baseKR_Full) + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 10000baseT_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 10000baseKX4_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 10000baseKR_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 10000baseCR_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 10000baseSR_Full)) config.link_speed |= I40E_LINK_SPEED_10GB; - if (advertise & ADVERTISED_20000baseKR2_Full) + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 20000baseKR2_Full)) config.link_speed |= I40E_LINK_SPEED_20GB; - if (advertise & ADVERTISED_40000baseKR4_Full || - advertise & ADVERTISED_40000baseCR4_Full || - advertise & ADVERTISED_40000baseSR4_Full || - advertise & ADVERTISED_40000baseLR4_Full) + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 25000baseCR_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 25000baseKR_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 25000baseSR_Full)) + config.link_speed |= I40E_LINK_SPEED_25GB; + if (ethtool_link_ksettings_test_link_mode(ks, advertising, + 40000baseKR4_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 40000baseCR4_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 40000baseSR4_Full) || + ethtool_link_ksettings_test_link_mode(ks, advertising, + 40000baseLR4_Full)) config.link_speed |= I40E_LINK_SPEED_40GB; /* If speed didn't get set, set it to what it currently is. @@ -846,8 +997,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev, */ if (!config.link_speed) config.link_speed = abilities.link_speed; - - if (change || (abilities.link_speed != config.link_speed)) { + if (autoneg_changed || abilities.link_speed != config.link_speed) { /* copy over the rest of the abilities */ config.phy_type = abilities.phy_type; config.phy_type_ext = abilities.phy_type_ext; @@ -874,7 +1024,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev, /* make the aq call */ status = i40e_aq_set_phy_config(hw, &config, NULL); if (status) { - netdev_info(netdev, "Set phy config failed, err %s aq_err %s\n", + netdev_info(netdev, + "Set phy config failed, err %s aq_err %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); err = -EAGAIN; @@ -883,7 +1034,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev, status = i40e_update_link_info(hw); if (status) - netdev_dbg(netdev, "Updating link info failed with err %s aq_err %s\n", + netdev_dbg(netdev, + "Updating link info failed with err %s aq_err %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); @@ -2008,7 +2160,9 @@ static int i40e_set_phys_id(struct net_device *netdev, if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) { pf->led_status = i40e_led_get(hw); } else { - i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL, NULL); + if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) + i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL, + NULL); ret = i40e_led_get_phy(hw, &temp_status, &pf->phy_led_val); pf->led_status = temp_status; @@ -2033,7 +2187,8 @@ static int i40e_set_phys_id(struct net_device *netdev, ret = i40e_led_set_phy(hw, false, pf->led_status, (pf->phy_led_val | I40E_PHY_LED_MODE_ORIG)); - i40e_aq_set_phy_debug(hw, 0, NULL); + if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) + i40e_aq_set_phy_debug(hw, 0, NULL); } break; default: @@ -2071,14 +2226,13 @@ static int __i40e_get_coalesce(struct net_device *netdev, ec->tx_max_coalesced_frames_irq = vsi->work_limit; ec->rx_max_coalesced_frames_irq = vsi->work_limit; - /* rx and tx usecs has per queue value. If user doesn't specify the queue, - * return queue 0's value to represent. + /* rx and tx usecs has per queue value. If user doesn't specify the + * queue, return queue 0's value to represent. */ - if (queue < 0) { + if (queue < 0) queue = 0; - } else if (queue >= vsi->num_queue_pairs) { + else if (queue >= vsi->num_queue_pairs) return -EINVAL; - } rx_ring = vsi->rx_rings[queue]; tx_ring = vsi->tx_rings[queue]; @@ -2092,7 +2246,6 @@ static int __i40e_get_coalesce(struct net_device *netdev, ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; - /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite * fits the original intent of the ethtool variable, @@ -2142,7 +2295,6 @@ static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue, * * Change the ITR settings for a specific queue. **/ - static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, struct ethtool_coalesce *ec, int queue) @@ -2264,8 +2416,8 @@ static int __i40e_set_coalesce(struct net_device *netdev, vsi->int_rate_limit); } - /* rx and tx usecs has per queue value. If user doesn't specify the queue, - * apply to all queues. + /* rx and tx usecs has per queue value. If user doesn't specify the + * queue, apply to all queues. */ if (queue < 0) { for (i = 0; i < vsi->num_queue_pairs; i++) @@ -2647,7 +2799,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, switch (cmd->cmd) { case ETHTOOL_GRXRINGS: - cmd->data = vsi->num_queue_pairs; + cmd->data = vsi->rss_size; ret = 0; break; case ETHTOOL_GRXFH: @@ -3892,6 +4044,12 @@ static int i40e_set_channels(struct net_device *dev, if (vsi->type != I40E_VSI_MAIN) return -EINVAL; + /* We do not support setting channels via ethtool when TCs are + * configured through mqprio + */ + if (pf->flags & I40E_FLAG_TC_MQPRIO) + return -EINVAL; + /* verify they are not requesting separate vectors */ if (!count || ch->rx_count || ch->tx_count) return -EINVAL; @@ -3959,6 +4117,16 @@ static u32 i40e_get_rxfh_indir_size(struct net_device *netdev) return I40E_HLUT_ARRAY_SIZE; } +/** + * i40e_get_rxfh - get the rx flow hash indirection table + * @netdev: network interface device structure + * @indir: indirection table + * @key: hash key + * @hfunc: hash function + * + * Reads the indirection table directly from the hardware. Returns 0 on + * success. + **/ static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { @@ -4090,7 +4258,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - u64 orig_flags, new_flags, changed_flags; + u32 orig_flags, new_flags, changed_flags; u32 i, j; orig_flags = READ_ONCE(pf->flags); @@ -4142,12 +4310,12 @@ flags_complete: return -EOPNOTSUPP; /* Compare and exchange the new flags into place. If we failed, that - * is if cmpxchg64 returns anything but the old value, this means that + * is if cmpxchg returns anything but the old value, this means that * something else has modified the flags variable since we copied it * originally. We'll just punt with an error and log something in the * message buffer. */ - if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) { + if (cmpxchg(&pf->flags, orig_flags, new_flags) != orig_flags) { dev_warn(&pf->pdev->dev, "Unable to update pf->flags as it was modified by another thread...\n"); return -EAGAIN; @@ -4175,7 +4343,7 @@ flags_complete: sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags, - NULL); + 0, NULL); if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) { dev_info(&pf->pdev->dev, "couldn't set switch config bits, err %s aq_err %s\n", @@ -4189,13 +4357,166 @@ flags_complete: /* Issue reset to cause things to take effect, as additional bits * are added we will need to create a mask of bits requiring reset */ - if ((changed_flags & I40E_FLAG_VEB_STATS_ENABLED) || - ((changed_flags & I40E_FLAG_LEGACY_RX) && netif_running(dev))) + if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED | + I40E_FLAG_LEGACY_RX | + I40E_FLAG_SOURCE_PRUNING_DISABLED)) i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true); return 0; } +/** + * i40e_get_module_info - get (Q)SFP+ module type info + * @netdev: network interface device structure + * @modinfo: module EEPROM size and layout information structure + **/ +static int i40e_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + u32 sff8472_comp = 0; + u32 sff8472_swap = 0; + u32 sff8636_rev = 0; + i40e_status status; + u32 type = 0; + + /* Check if firmware supports reading module EEPROM. */ + if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) { + netdev_err(vsi->netdev, "Module EEPROM memory read not supported. Please update the NVM image.\n"); + return -EINVAL; + } + + status = i40e_update_link_info(hw); + if (status) + return -EIO; + + if (hw->phy.link_info.phy_type == I40E_PHY_TYPE_EMPTY) { + netdev_err(vsi->netdev, "Cannot read module EEPROM memory. No module connected.\n"); + return -EINVAL; + } + + type = hw->phy.link_info.module_type[0]; + + switch (type) { + case I40E_MODULE_TYPE_SFP: + status = i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, + I40E_I2C_EEPROM_DEV_ADDR, + I40E_MODULE_SFF_8472_COMP, + &sff8472_comp, NULL); + if (status) + return -EIO; + + status = i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, + I40E_I2C_EEPROM_DEV_ADDR, + I40E_MODULE_SFF_8472_SWAP, + &sff8472_swap, NULL); + if (status) + return -EIO; + + /* Check if the module requires address swap to access + * the other EEPROM memory page. + */ + if (sff8472_swap & I40E_MODULE_SFF_ADDR_MODE) { + netdev_warn(vsi->netdev, "Module address swap to access page 0xA2 is not supported.\n"); + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else if (sff8472_comp == 0x00) { + /* Module is not SFF-8472 compliant */ + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + } + break; + case I40E_MODULE_TYPE_QSFP_PLUS: + /* Read from memory page 0. */ + status = i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, + 0, + I40E_MODULE_REVISION_ADDR, + &sff8636_rev, NULL); + if (status) + return -EIO; + /* Determine revision compliance byte */ + if (sff8636_rev > 0x02) { + /* Module is SFF-8636 compliant */ + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN; + } + break; + case I40E_MODULE_TYPE_QSFP28: + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN; + break; + default: + netdev_err(vsi->netdev, "Module type unrecognized\n"); + return -EINVAL; + } + return 0; +} + +/** + * i40e_get_module_eeprom - fills buffer with (Q)SFP+ module memory contents + * @netdev: network interface device structure + * @ee: EEPROM dump request structure + * @data: buffer to be filled with EEPROM contents + **/ +static int i40e_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + bool is_sfp = false; + i40e_status status; + u32 value = 0; + int i; + + if (!ee || !ee->len || !data) + return -EINVAL; + + if (hw->phy.link_info.module_type[0] == I40E_MODULE_TYPE_SFP) + is_sfp = true; + + for (i = 0; i < ee->len; i++) { + u32 offset = i + ee->offset; + u32 addr = is_sfp ? I40E_I2C_EEPROM_DEV_ADDR : 0; + + /* Check if we need to access the other memory page */ + if (is_sfp) { + if (offset >= ETH_MODULE_SFF_8079_LEN) { + offset -= ETH_MODULE_SFF_8079_LEN; + addr = I40E_I2C_EEPROM_DEV_ADDR2; + } + } else { + while (offset >= ETH_MODULE_SFF_8436_LEN) { + /* Compute memory page number and offset. */ + offset -= ETH_MODULE_SFF_8436_LEN / 2; + addr++; + } + } + + status = i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, + addr, offset, &value, NULL); + if (status) + return -EIO; + data[i] = value; + } + return 0; +} + static const struct ethtool_ops i40e_ethtool_ops = { .get_drvinfo = i40e_get_drvinfo, .get_regs_len = i40e_get_regs_len, @@ -4228,6 +4549,8 @@ static const struct ethtool_ops i40e_ethtool_ops = { .set_rxfh = i40e_set_rxfh, .get_channels = i40e_get_channels, .set_channels = i40e_set_channels, + .get_module_info = i40e_get_module_info, + .get_module_eeprom = i40e_get_module_eeprom, .get_ts_info = i40e_get_ts_info, .get_priv_flags = i40e_get_priv_flags, .set_priv_flags = i40e_set_priv_flags, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index de1fcac7834d..4a964d6e4a9e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -69,6 +69,15 @@ static int i40e_reset(struct i40e_pf *pf); static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); +static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add); +static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add); +static int i40e_get_capabilities(struct i40e_pf *pf, + enum i40e_admin_queue_opc list_type); + /* i40e_pci_tbl - PCI Device ID Table * @@ -600,6 +609,20 @@ static void i40e_stat_update32(struct i40e_hw *hw, u32 reg, } /** + * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat + * @hw: ptr to the hardware info + * @reg: the hw reg to read and clear + * @stat: ptr to the stat + **/ +static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat) +{ + u32 new_data = rd32(hw, reg); + + wr32(hw, reg, 1); /* must write a nonzero value to clear register */ + *stat += new_data; +} + +/** * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters. * @vsi: the VSI to be updated **/ @@ -1040,18 +1063,15 @@ static void i40e_update_pf_stats(struct i40e_pf *pf) &osd->rx_jabber, &nsd->rx_jabber); /* FDIR stats */ - i40e_stat_update32(hw, - I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(pf->hw.pf_id)), - pf->stat_offsets_loaded, - &osd->fd_atr_match, &nsd->fd_atr_match); - i40e_stat_update32(hw, - I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(pf->hw.pf_id)), - pf->stat_offsets_loaded, - &osd->fd_sb_match, &nsd->fd_sb_match); - i40e_stat_update32(hw, - I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id)), - pf->stat_offsets_loaded, - &osd->fd_atr_tunnel_match, &nsd->fd_atr_tunnel_match); + i40e_stat_update_and_clear32(hw, + I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)), + &nsd->fd_atr_match); + i40e_stat_update_and_clear32(hw, + I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)), + &nsd->fd_sb_match); + i40e_stat_update_and_clear32(hw, + I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)), + &nsd->fd_atr_tunnel_match); val = rd32(hw, I40E_PRTPM_EEE_STAT); nsd->tx_lpi_status = @@ -1578,6 +1598,170 @@ static int i40e_set_mac(struct net_device *netdev, void *p) } /** + * i40e_config_rss_aq - Prepare for RSS using AQ commands + * @vsi: vsi structure + * @seed: RSS hash seed + **/ +static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, + u8 *lut, u16 lut_size) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int ret = 0; + + if (seed) { + struct i40e_aqc_get_set_rss_key_data *seed_dw = + (struct i40e_aqc_get_set_rss_key_data *)seed; + ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS key, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } + if (lut) { + bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; + + ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS lut, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } + return ret; +} + +/** + * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used + * @vsi: VSI structure + **/ +static int i40e_vsi_config_rss(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + u8 seed[I40E_HKEY_ARRAY_SIZE]; + u8 *lut; + int ret; + + if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)) + return 0; + if (!vsi->rss_size) + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + if (!vsi->rss_size) + return -EINVAL; + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* Use the user configured hash keys and lookup table if there is one, + * otherwise use default + */ + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); + else + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); + else + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); + kfree(lut); + return ret; +} + +/** + * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config + * @vsi: the VSI being configured, + * @ctxt: VSI context structure + * @enabled_tc: number of traffic classes to enable + * + * Prepares VSI tc_config to have queue configurations based on MQPRIO options. + **/ +static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi, + struct i40e_vsi_context *ctxt, + u8 enabled_tc) +{ + u16 qcount = 0, max_qcount, qmap, sections = 0; + int i, override_q, pow, num_qps, ret; + u8 netdev_tc = 0, offset = 0; + + if (vsi->type != I40E_VSI_MAIN) + return -EINVAL; + sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; + sections |= I40E_AQ_VSI_PROP_SCHED_VALID; + vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc; + vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1; + num_qps = vsi->mqprio_qopt.qopt.count[0]; + + /* find the next higher power-of-2 of num queue pairs */ + pow = ilog2(num_qps); + if (!is_power_of_2(num_qps)) + pow++; + qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | + (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); + + /* Setup queue offset/count for all TCs for given VSI */ + max_qcount = vsi->mqprio_qopt.qopt.count[0]; + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + /* See if the given TC is enabled for the given VSI */ + if (vsi->tc_config.enabled_tc & BIT(i)) { + offset = vsi->mqprio_qopt.qopt.offset[i]; + qcount = vsi->mqprio_qopt.qopt.count[i]; + if (qcount > max_qcount) + max_qcount = qcount; + vsi->tc_config.tc_info[i].qoffset = offset; + vsi->tc_config.tc_info[i].qcount = qcount; + vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++; + } else { + /* TC is not enabled so set the offset to + * default queue and allocate one queue + * for the given TC. + */ + vsi->tc_config.tc_info[i].qoffset = 0; + vsi->tc_config.tc_info[i].qcount = 1; + vsi->tc_config.tc_info[i].netdev_tc = 0; + } + } + + /* Set actual Tx/Rx queue pairs */ + vsi->num_queue_pairs = offset + qcount; + + /* Setup queue TC[0].qmap for given VSI context */ + ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); + ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue); + ctxt->info.valid_sections |= cpu_to_le16(sections); + + /* Reconfigure RSS for main VSI with max queue count */ + vsi->rss_size = max_qcount; + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Failed to reconfig rss for num_queues (%u)\n", + max_qcount); + return ret; + } + vsi->reconfig_rss = true; + dev_dbg(&vsi->back->pdev->dev, + "Reconfigured rss with num_queues (%u)\n", max_qcount); + + /* Find queue count available for channel VSIs and starting offset + * for channel VSIs + */ + override_q = vsi->mqprio_qopt.qopt.count[0]; + if (override_q && override_q < vsi->num_queue_pairs) { + vsi->cnt_q_avail = vsi->num_queue_pairs - override_q; + vsi->next_base_queue = override_q; + } + return 0; +} + +/** * i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc * @vsi: the VSI being setup * @ctxt: VSI context structure @@ -1615,7 +1799,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, numtc = 1; } } else { - /* At least TC0 is enabled in case of non-DCB case */ + /* At least TC0 is enabled in non-DCB, non-MQPRIO case */ numtc = 1; } @@ -1765,11 +1949,6 @@ static void i40e_set_rx_mode(struct net_device *netdev) vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; vsi->back->flags |= I40E_FLAG_FILTER_SYNC; } - - /* schedule our worker thread which will take care of - * applying the new filter changes - */ - i40e_service_event_schedule(vsi->back); } /** @@ -2873,22 +3052,18 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi) **/ static void i40e_config_xps_tx_ring(struct i40e_ring *ring) { - struct i40e_vsi *vsi = ring->vsi; + int cpu; - if (!ring->q_vector || !ring->netdev) + if (!ring->q_vector || !ring->netdev || ring->ch) return; - if ((vsi->tc_config.numtc <= 1) && - !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) { - netif_set_xps_queue(ring->netdev, - get_cpu_mask(ring->q_vector->v_idx), - ring->queue_index); - } + /* We only initialize XPS once, so as not to overwrite user settings */ + if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state)) + return; - /* schedule our worker thread which will take care of - * applying the new filter changes - */ - i40e_service_event_schedule(vsi->back); + cpu = cpumask_local_spread(ring->q_vector->v_idx, -1); + netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu), + ring->queue_index); } /** @@ -2942,7 +3117,14 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) * initialization. This has to be done regardless of * DCB as by default everything is mapped to TC0. */ - tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]); + + if (ring->ch) + tx_ctx.rdylist = + le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]); + + else + tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]); + tx_ctx.rdylist_act = 0; /* clear the context in the HMC */ @@ -2964,12 +3146,23 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) } /* Now associate this queue with this PCI function */ - if (vsi->type == I40E_VSI_VMDQ2) { - qtx_ctl = I40E_QTX_CTL_VM_QUEUE; - qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) & - I40E_QTX_CTL_VFVM_INDX_MASK; + if (ring->ch) { + if (ring->ch->type == I40E_VSI_VMDQ2) + qtx_ctl = I40E_QTX_CTL_VM_QUEUE; + else + return -EINVAL; + + qtx_ctl |= (ring->ch->vsi_number << + I40E_QTX_CTL_VFVM_INDX_SHIFT) & + I40E_QTX_CTL_VFVM_INDX_MASK; } else { - qtx_ctl = I40E_QTX_CTL_PF_QUEUE; + if (vsi->type == I40E_VSI_VMDQ2) { + qtx_ctl = I40E_QTX_CTL_VM_QUEUE; + qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) & + I40E_QTX_CTL_VFVM_INDX_MASK; + } else { + qtx_ctl = I40E_QTX_CTL_PF_QUEUE; + } } qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) & @@ -2998,7 +3191,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) struct i40e_hmc_obj_rxq rx_ctx; i40e_status err = 0; - ring->state = 0; + bitmap_zero(ring->state, __I40E_RING_STATE_NBITS); /* clear the context structure first */ memset(&rx_ctx, 0, sizeof(rx_ctx)); @@ -3023,7 +3216,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) if (hw->revision_id == 0) rx_ctx.lrxqthresh = 0; else - rx_ctx.lrxqthresh = 2; + rx_ctx.lrxqthresh = 1; rx_ctx.crcstrip = 1; rx_ctx.l2tsel = 1; /* this controls whether VLAN is stripped from inner headers */ @@ -3138,6 +3331,7 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi) rx_ring->dcb_tc = 0; tx_ring->dcb_tc = 0; } + return; } for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) { @@ -3396,15 +3590,14 @@ void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf) /** * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0 * @pf: board private structure - * @clearpba: true when all pending interrupt events should be cleared **/ -void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba) +void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 val; val = I40E_PFINT_DYN_CTL0_INTENA_MASK | - (clearpba ? I40E_PFINT_DYN_CTL0_CLEARPBA_MASK : 0) | + I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, val); @@ -3471,6 +3664,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) int tx_int_idx = 0; int vector, err; int irq_num; + int cpu; for (vector = 0; vector < q_vectors; vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[vector]; @@ -3506,10 +3700,14 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) q_vector->affinity_notify.notify = i40e_irq_affinity_notify; q_vector->affinity_notify.release = i40e_irq_affinity_release; irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); - /* get_cpu_mask returns a static constant mask with - * a permanent lifetime so it's ok to use here. + /* Spread affinity hints out across online CPUs. + * + * get_cpu_mask returns a static constant mask with + * a permanent lifetime so it's ok to pass to + * irq_set_affinity_hint without making a copy. */ - irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx)); + cpu = cpumask_local_spread(q_vector->v_idx, -1); + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu)); } vsi->irqs_ready = true; @@ -3585,7 +3783,7 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++) i40e_irq_dynamic_enable(vsi, i); } else { - i40e_irq_dynamic_enable_icr0(pf, true); + i40e_irq_dynamic_enable_icr0(pf); } i40e_flush(&pf->hw); @@ -3593,14 +3791,20 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi) } /** - * i40e_stop_misc_vector - Stop the vector that handles non-queue events + * i40e_free_misc_vector - Free the vector that handles non-queue events * @pf: board private structure **/ -static void i40e_stop_misc_vector(struct i40e_pf *pf) +static void i40e_free_misc_vector(struct i40e_pf *pf) { /* Disable ICR 0 */ wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0); i40e_flush(&pf->hw); + + if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) { + synchronize_irq(pf->msix_entries[0].vector); + free_irq(pf->msix_entries[0].vector, pf); + clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state); + } } /** @@ -3728,7 +3932,7 @@ enable_intr: wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask); if (!test_bit(__I40E_DOWN, pf->state)) { i40e_service_event_schedule(pf); - i40e_irq_dynamic_enable_icr0(pf, false); + i40e_irq_dynamic_enable_icr0(pf); } return ret; @@ -4455,11 +4659,7 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf) { int i; - i40e_stop_misc_vector(pf); - if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) { - synchronize_irq(pf->msix_entries[0].vector); - free_irq(pf->msix_entries[0].vector, pf); - } + i40e_free_misc_vector(pf); i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector, I40E_IWARP_IRQ_PILE_ID); @@ -4848,6 +5048,24 @@ static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg) } /** + * i40e_mqprio_get_enabled_tc - Get enabled traffic classes + * @pf: PF being queried + * + * Query the current MQPRIO configuration and return the number of + * traffic classes enabled. + **/ +static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf) +{ + struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; + u8 num_tc = vsi->mqprio_qopt.qopt.num_tc; + u8 enabled_tc = 1, i; + + for (i = 1; i < num_tc; i++) + enabled_tc |= BIT(i); + return enabled_tc; +} + +/** * i40e_pf_get_num_tc - Get enabled traffic classes for PF * @pf: PF being queried * @@ -4860,7 +5078,10 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) u8 num_tc = 0; struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config; - /* If DCB is not enabled then always in single TC */ + if (pf->flags & I40E_FLAG_TC_MQPRIO) + return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc; + + /* If neither MQPRIO nor DCB is enabled, then always use single TC */ if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) return 1; @@ -4889,7 +5110,12 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) **/ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf) { - /* If DCB is not enabled for this PF then just return default TC */ + if (pf->flags & I40E_FLAG_TC_MQPRIO) + return i40e_mqprio_get_enabled_tc(pf); + + /* If neither MQPRIO nor DCB is enabled for this PF then just return + * default TC + */ if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) return I40E_DEFAULT_TRAFFIC_CLASS; @@ -4979,6 +5205,16 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, i40e_status ret; int i; + if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) + return 0; + if (!vsi->mqprio_qopt.qopt.hw) { + ret = i40e_set_bw_limit(vsi, vsi->seid, 0); + if (ret) + dev_info(&vsi->back->pdev->dev, + "Failed to reset tx rate for vsi->seid %u\n", + vsi->seid); + return ret; + } bw_data.tc_valid_bits = enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) bw_data.tc_bw_credits[i] = bw_share[i]; @@ -5041,6 +5277,9 @@ static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc) vsi->tc_config.tc_info[i].qoffset); } + if (pf->flags & I40E_FLAG_TC_MQPRIO) + return; + /* Assign UP2TC map for the VSI */ for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { /* Get the actual TC# for the UP */ @@ -5091,7 +5330,8 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) int i; /* Check if enabled_tc is same as existing or new TCs */ - if (vsi->tc_config.enabled_tc == enabled_tc) + if (vsi->tc_config.enabled_tc == enabled_tc && + vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL) return ret; /* Enable ETS TCs with equal BW Share for now across all VSIs */ @@ -5114,15 +5354,37 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc) ctxt.vf_num = 0; ctxt.uplink_seid = vsi->uplink_seid; ctxt.info = vsi->info; - i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false); + if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) { + ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc); + if (ret) + goto out; + } else { + i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false); + } + /* On destroying the qdisc, reset vsi->rss_size, as number of enabled + * queues changed. + */ + if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) { + vsi->rss_size = min_t(int, vsi->back->alloc_rss_size, + vsi->num_queue_pairs); + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Failed to reconfig rss for num_queues\n"); + return ret; + } + vsi->reconfig_rss = false; + } if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) { ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; } - /* Update the VSI after updating the VSI queue-mapping information */ + /* Update the VSI after updating the VSI queue-mapping + * information + */ ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) { dev_info(&vsi->back->pdev->dev, @@ -5154,6 +5416,825 @@ out: } /** + * i40e_get_link_speed - Returns link speed for the interface + * @vsi: VSI to be configured + * + **/ +int i40e_get_link_speed(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + + switch (pf->hw.phy.link_info.link_speed) { + case I40E_LINK_SPEED_40GB: + return 40000; + case I40E_LINK_SPEED_25GB: + return 25000; + case I40E_LINK_SPEED_20GB: + return 20000; + case I40E_LINK_SPEED_10GB: + return 10000; + case I40E_LINK_SPEED_1GB: + return 1000; + default: + return -EINVAL; + } +} + +/** + * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate + * @vsi: VSI to be configured + * @seid: seid of the channel/VSI + * @max_tx_rate: max TX rate to be configured as BW limit + * + * Helper function to set BW limit for a given VSI + **/ +int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate) +{ + struct i40e_pf *pf = vsi->back; + u64 credits = 0; + int speed = 0; + int ret = 0; + + speed = i40e_get_link_speed(vsi); + if (max_tx_rate > speed) { + dev_err(&pf->pdev->dev, + "Invalid max tx rate %llu specified for VSI seid %d.", + max_tx_rate, seid); + return -EINVAL; + } + if (max_tx_rate && max_tx_rate < 50) { + dev_warn(&pf->pdev->dev, + "Setting max tx rate to minimum usable value of 50Mbps.\n"); + max_tx_rate = 50; + } + + /* Tx rate credits are in values of 50Mbps, 0 is disabled */ + credits = max_tx_rate; + do_div(credits, I40E_BW_CREDIT_DIVISOR); + ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits, + I40E_MAX_BW_INACTIVE_ACCUM, NULL); + if (ret) + dev_err(&pf->pdev->dev, + "Failed set tx rate (%llu Mbps) for vsi->seid %u, err %s aq_err %s\n", + max_tx_rate, seid, i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + return ret; +} + +/** + * i40e_remove_queue_channels - Remove queue channels for the TCs + * @vsi: VSI to be configured + * + * Remove queue channels for the TCs + **/ +static void i40e_remove_queue_channels(struct i40e_vsi *vsi) +{ + enum i40e_admin_queue_err last_aq_status; + struct i40e_cloud_filter *cfilter; + struct i40e_channel *ch, *ch_tmp; + struct i40e_pf *pf = vsi->back; + struct hlist_node *node; + int ret, i; + + /* Reset rss size that was stored when reconfiguring rss for + * channel VSIs with non-power-of-2 queue count. + */ + vsi->current_rss_size = 0; + + /* perform cleanup for channels if they exist */ + if (list_empty(&vsi->ch_list)) + return; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + struct i40e_vsi *p_vsi; + + list_del(&ch->list); + p_vsi = ch->parent_vsi; + if (!p_vsi || !ch->initialized) { + kfree(ch); + continue; + } + /* Reset queue contexts */ + for (i = 0; i < ch->num_queue_pairs; i++) { + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + + pf_q = ch->base_queue + i; + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = NULL; + + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = NULL; + } + + /* Reset BW configured for this VSI via mqprio */ + ret = i40e_set_bw_limit(vsi, ch->seid, 0); + if (ret) + dev_info(&vsi->back->pdev->dev, + "Failed to reset tx rate for ch->seid %u\n", + ch->seid); + + /* delete cloud filters associated with this channel */ + hlist_for_each_entry_safe(cfilter, node, + &pf->cloud_filter_list, cloud_node) { + if (cfilter->seid != ch->seid) + continue; + + hash_del(&cfilter->cloud_node); + if (cfilter->dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, + cfilter, + false); + else + ret = i40e_add_del_cloud_filter(vsi, cfilter, + false); + last_aq_status = pf->hw.aq.asq_last_status; + if (ret) + dev_info(&pf->pdev->dev, + "Failed to delete cloud filter, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, last_aq_status)); + kfree(cfilter); + } + + /* delete VSI from FW */ + ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid, + NULL); + if (ret) + dev_err(&vsi->back->pdev->dev, + "unable to remove channel (%d) for parent VSI(%d)\n", + ch->seid, p_vsi->seid); + kfree(ch); + } + INIT_LIST_HEAD(&vsi->ch_list); +} + +/** + * i40e_is_any_channel - channel exist or not + * @vsi: ptr to VSI to which channels are associated with + * + * Returns true or false if channel(s) exist for associated VSI or not + **/ +static bool i40e_is_any_channel(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + if (ch->initialized) + return true; + } + + return false; +} + +/** + * i40e_get_max_queues_for_channel + * @vsi: ptr to VSI to which channels are associated with + * + * Helper function which returns max value among the queue counts set on the + * channels/TCs created. + **/ +static int i40e_get_max_queues_for_channel(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + int max = 0; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + if (!ch->initialized) + continue; + if (ch->num_queue_pairs > max) + max = ch->num_queue_pairs; + } + + return max; +} + +/** + * i40e_validate_num_queues - validate num_queues w.r.t channel + * @pf: ptr to PF device + * @num_queues: number of queues + * @vsi: the parent VSI + * @reconfig_rss: indicates should the RSS be reconfigured or not + * + * This function validates number of queues in the context of new channel + * which is being established and determines if RSS should be reconfigured + * or not for parent VSI. + **/ +static int i40e_validate_num_queues(struct i40e_pf *pf, int num_queues, + struct i40e_vsi *vsi, bool *reconfig_rss) +{ + int max_ch_queues; + + if (!reconfig_rss) + return -EINVAL; + + *reconfig_rss = false; + + if (num_queues > I40E_MAX_QUEUES_PER_CH) { + dev_err(&pf->pdev->dev, + "Failed to create VMDq VSI. User requested num_queues (%d) > I40E_MAX_QUEUES_PER_VSI (%u)\n", + num_queues, I40E_MAX_QUEUES_PER_CH); + return -EINVAL; + } + + if (vsi->current_rss_size) { + if (num_queues > vsi->current_rss_size) { + dev_dbg(&pf->pdev->dev, + "Error: num_queues (%d) > vsi's current_size(%d)\n", + num_queues, vsi->current_rss_size); + return -EINVAL; + } else if ((num_queues < vsi->current_rss_size) && + (!is_power_of_2(num_queues))) { + dev_dbg(&pf->pdev->dev, + "Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n", + num_queues, vsi->current_rss_size); + return -EINVAL; + } + } + + if (!is_power_of_2(num_queues)) { + /* Find the max num_queues configured for channel if channel + * exist. + * if channel exist, then enforce 'num_queues' to be more than + * max ever queues configured for channel. + */ + max_ch_queues = i40e_get_max_queues_for_channel(vsi); + if (num_queues < max_ch_queues) { + dev_dbg(&pf->pdev->dev, + "Error: num_queues (%d) < max queues configured for channel(%d)\n", + num_queues, max_ch_queues); + return -EINVAL; + } + *reconfig_rss = true; + } + + return 0; +} + +/** + * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size + * @vsi: the VSI being setup + * @rss_size: size of RSS, accordingly LUT gets reprogrammed + * + * This function reconfigures RSS by reprogramming LUTs using 'rss_size' + **/ +static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size) +{ + struct i40e_pf *pf = vsi->back; + u8 seed[I40E_HKEY_ARRAY_SIZE]; + struct i40e_hw *hw = &pf->hw; + int local_rss_size; + u8 *lut; + int ret; + + if (!vsi->rss_size) + return -EINVAL; + + if (rss_size > vsi->rss_size) + return -EINVAL; + + local_rss_size = min_t(int, vsi->rss_size, rss_size); + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + + /* Ignoring user configured lut if there is one */ + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size); + + /* Use user configured hash key if there is one, otherwise + * use default. + */ + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); + else + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + + ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS lut, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + kfree(lut); + return ret; + } + kfree(lut); + + /* Do the update w.r.t. storing rss_size */ + if (!vsi->orig_rss_size) + vsi->orig_rss_size = vsi->rss_size; + vsi->current_rss_size = local_rss_size; + + return ret; +} + +/** + * i40e_channel_setup_queue_map - Setup a channel queue map + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ctxt: VSI context structure + * @ch: ptr to channel structure + * + * Setup queue map for a specific channel + **/ +static void i40e_channel_setup_queue_map(struct i40e_pf *pf, + struct i40e_vsi_context *ctxt, + struct i40e_channel *ch) +{ + u16 qcount, qmap, sections = 0; + u8 offset = 0; + int pow; + + sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; + sections |= I40E_AQ_VSI_PROP_SCHED_VALID; + + qcount = min_t(int, ch->num_queue_pairs, pf->num_lan_msix); + ch->num_queue_pairs = qcount; + + /* find the next higher power-of-2 of num queue pairs */ + pow = ilog2(qcount); + if (!is_power_of_2(qcount)) + pow++; + + qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | + (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); + + /* Setup queue TC[0].qmap for given VSI context */ + ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); + + ctxt->info.up_enable_bits = 0x1; /* TC0 enabled */ + ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); + ctxt->info.queue_mapping[0] = cpu_to_le16(ch->base_queue); + ctxt->info.valid_sections |= cpu_to_le16(sections); +} + +/** + * i40e_add_channel - add a channel by adding VSI + * @pf: ptr to PF device + * @uplink_seid: underlying HW switching element (VEB) ID + * @ch: ptr to channel structure + * + * Add a channel (VSI) using add_vsi and queue_map + **/ +static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid, + struct i40e_channel *ch) +{ + struct i40e_hw *hw = &pf->hw; + struct i40e_vsi_context ctxt; + u8 enabled_tc = 0x1; /* TC0 enabled */ + int ret; + + if (ch->type != I40E_VSI_VMDQ2) { + dev_info(&pf->pdev->dev, + "add new vsi failed, ch->type %d\n", ch->type); + return -EINVAL; + } + + memset(&ctxt, 0, sizeof(ctxt)); + ctxt.pf_num = hw->pf_id; + ctxt.vf_num = 0; + ctxt.uplink_seid = uplink_seid; + ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; + if (ch->type == I40E_VSI_VMDQ2) + ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2; + + if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) { + ctxt.info.valid_sections |= + cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); + ctxt.info.switch_id = + cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB); + } + + /* Set queue map for a given VSI context */ + i40e_channel_setup_queue_map(pf, &ctxt, ch); + + /* Now time to create VSI */ + ret = i40e_aq_add_vsi(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "add new vsi failed, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + return -ENOENT; + } + + /* Success, update channel */ + ch->enabled_tc = enabled_tc; + ch->seid = ctxt.seid; + ch->vsi_number = ctxt.vsi_number; + ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx); + + /* copy just the sections touched not the entire info + * since not all sections are valid as returned by + * update vsi params + */ + ch->info.mapping_flags = ctxt.info.mapping_flags; + memcpy(&ch->info.queue_mapping, + &ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping)); + memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping, + sizeof(ctxt.info.tc_mapping)); + + return 0; +} + +static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch, + u8 *bw_share) +{ + struct i40e_aqc_configure_vsi_tc_bw_data bw_data; + i40e_status ret; + int i; + + bw_data.tc_valid_bits = ch->enabled_tc; + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) + bw_data.tc_bw_credits[i] = bw_share[i]; + + ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid, + &bw_data, NULL); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n", + vsi->back->hw.aq.asq_last_status, ch->seid); + return -EINVAL; + } + + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) + ch->info.qs_handle[i] = bw_data.qs_handles[i]; + + return 0; +} + +/** + * i40e_channel_config_tx_ring - config TX ring associated with new channel + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * + * Configure TX rings associated with channel (VSI) since queues are being + * from parent VSI. + **/ +static int i40e_channel_config_tx_ring(struct i40e_pf *pf, + struct i40e_vsi *vsi, + struct i40e_channel *ch) +{ + i40e_status ret; + int i; + u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0}; + + /* Enable ETS TCs with equal BW Share for now across all VSIs */ + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (ch->enabled_tc & BIT(i)) + bw_share[i] = 1; + } + + /* configure BW for new VSI */ + ret = i40e_channel_config_bw(vsi, ch, bw_share); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "Failed configuring TC map %d for channel (seid %u)\n", + ch->enabled_tc, ch->seid); + return ret; + } + + for (i = 0; i < ch->num_queue_pairs; i++) { + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + + pf_q = ch->base_queue + i; + + /* Get to TX ring ptr of main VSI, for re-setup TX queue + * context + */ + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = ch; + + /* Get the RX ring ptr */ + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = ch; + } + + return 0; +} + +/** + * i40e_setup_hw_channel - setup new channel + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * @uplink_seid: underlying HW switching element (VEB) ID + * @type: type of channel to be created (VMDq2/VF) + * + * Setup new channel (VSI) based on specified type (VMDq2/VF) + * and configures TX rings accordingly + **/ +static inline int i40e_setup_hw_channel(struct i40e_pf *pf, + struct i40e_vsi *vsi, + struct i40e_channel *ch, + u16 uplink_seid, u8 type) +{ + int ret; + + ch->initialized = false; + ch->base_queue = vsi->next_base_queue; + ch->type = type; + + /* Proceed with creation of channel (VMDq2) VSI */ + ret = i40e_add_channel(pf, uplink_seid, ch); + if (ret) { + dev_info(&pf->pdev->dev, + "failed to add_channel using uplink_seid %u\n", + uplink_seid); + return ret; + } + + /* Mark the successful creation of channel */ + ch->initialized = true; + + /* Reconfigure TX queues using QTX_CTL register */ + ret = i40e_channel_config_tx_ring(pf, vsi, ch); + if (ret) { + dev_info(&pf->pdev->dev, + "failed to configure TX rings for channel %u\n", + ch->seid); + return ret; + } + + /* update 'next_base_queue' */ + vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs; + dev_dbg(&pf->pdev->dev, + "Added channel: vsi_seid %u, vsi_number %u, stat_counter_idx %u, num_queue_pairs %u, pf->next_base_queue %d\n", + ch->seid, ch->vsi_number, ch->stat_counter_idx, + ch->num_queue_pairs, + vsi->next_base_queue); + return ret; +} + +/** + * i40e_setup_channel - setup new channel using uplink element + * @pf: ptr to PF device + * @type: type of channel to be created (VMDq2/VF) + * @uplink_seid: underlying HW switching element (VEB) ID + * @ch: ptr to channel structure + * + * Setup new channel (VSI) based on specified type (VMDq2/VF) + * and uplink switching element (uplink_seid) + **/ +static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi, + struct i40e_channel *ch) +{ + u8 vsi_type; + u16 seid; + int ret; + + if (vsi->type == I40E_VSI_MAIN) { + vsi_type = I40E_VSI_VMDQ2; + } else { + dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n", + vsi->type); + return false; + } + + /* underlying switching element */ + seid = pf->vsi[pf->lan_vsi]->uplink_seid; + + /* create channel (VSI), configure TX rings */ + ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type); + if (ret) { + dev_err(&pf->pdev->dev, "failed to setup hw_channel\n"); + return false; + } + + return ch->initialized ? true : false; +} + +/** + * i40e_validate_and_set_switch_mode - sets up switch mode correctly + * @vsi: ptr to VSI which has PF backing + * + * Sets up switch mode correctly if it needs to be changed and perform + * what are allowed modes. + **/ +static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi) +{ + u8 mode; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int ret; + + ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_dev_capabilities); + if (ret) + return -EINVAL; + + if (hw->dev_caps.switch_mode) { + /* if switch mode is set, support mode2 (non-tunneled for + * cloud filter) for now + */ + u32 switch_mode = hw->dev_caps.switch_mode & + I40E_SWITCH_MODE_MASK; + if (switch_mode >= I40E_CLOUD_FILTER_MODE1) { + if (switch_mode == I40E_CLOUD_FILTER_MODE2) + return 0; + dev_err(&pf->pdev->dev, + "Invalid switch_mode (%d), only non-tunneled mode for cloud filter is supported\n", + hw->dev_caps.switch_mode); + return -EINVAL; + } + } + + /* Set Bit 7 to be valid */ + mode = I40E_AQ_SET_SWITCH_BIT7_VALID; + + /* Set L4type to both TCP and UDP support */ + mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH; + + /* Set cloud filter mode */ + mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL; + + /* Prep mode field for set_switch_config */ + ret = i40e_aq_set_switch_config(hw, pf->last_sw_conf_flags, + pf->last_sw_conf_valid_flags, + mode, NULL); + if (ret && hw->aq.asq_last_status != I40E_AQ_RC_ESRCH) + dev_err(&pf->pdev->dev, + "couldn't set switch config bits, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, + hw->aq.asq_last_status)); + + return ret; +} + +/** + * i40e_create_queue_channel - function to create channel + * @vsi: VSI to be configured + * @ch: ptr to channel (it contains channel specific params) + * + * This function creates channel (VSI) using num_queues specified by user, + * reconfigs RSS if needed. + **/ +int i40e_create_queue_channel(struct i40e_vsi *vsi, + struct i40e_channel *ch) +{ + struct i40e_pf *pf = vsi->back; + bool reconfig_rss; + int err; + + if (!ch) + return -EINVAL; + + if (!ch->num_queue_pairs) { + dev_err(&pf->pdev->dev, "Invalid num_queues requested: %d\n", + ch->num_queue_pairs); + return -EINVAL; + } + + /* validate user requested num_queues for channel */ + err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi, + &reconfig_rss); + if (err) { + dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n", + ch->num_queue_pairs); + return -EINVAL; + } + + /* By default we are in VEPA mode, if this is the first VF/VMDq + * VSI to be added switch to VEB mode. + */ + if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) || + (!i40e_is_any_channel(vsi))) { + if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) { + dev_dbg(&pf->pdev->dev, + "Failed to create channel. Override queues (%u) not power of 2\n", + vsi->tc_config.tc_info[0].qcount); + return -EINVAL; + } + + if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { + pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + + if (vsi->type == I40E_VSI_MAIN) { + if (pf->flags & I40E_FLAG_TC_MQPRIO) + i40e_do_reset(pf, I40E_PF_RESET_FLAG, + true); + else + i40e_do_reset_safe(pf, + I40E_PF_RESET_FLAG); + } + } + /* now onwards for main VSI, number of queues will be value + * of TC0's queue count + */ + } + + /* By this time, vsi->cnt_q_avail shall be set to non-zero and + * it should be more than num_queues + */ + if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) { + dev_dbg(&pf->pdev->dev, + "Error: cnt_q_avail (%u) less than num_queues %d\n", + vsi->cnt_q_avail, ch->num_queue_pairs); + return -EINVAL; + } + + /* reconfig_rss only if vsi type is MAIN_VSI */ + if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) { + err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs); + if (err) { + dev_info(&pf->pdev->dev, + "Error: unable to reconfig rss for num_queues (%u)\n", + ch->num_queue_pairs); + return -EINVAL; + } + } + + if (!i40e_setup_channel(pf, vsi, ch)) { + dev_info(&pf->pdev->dev, "Failed to setup channel\n"); + return -EINVAL; + } + + dev_info(&pf->pdev->dev, + "Setup channel (id:%u) utilizing num_queues %d\n", + ch->seid, ch->num_queue_pairs); + + /* configure VSI for BW limit */ + if (ch->max_tx_rate) { + u64 credits = ch->max_tx_rate; + + if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate)) + return -EINVAL; + + do_div(credits, I40E_BW_CREDIT_DIVISOR); + dev_dbg(&pf->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + ch->max_tx_rate, + credits, + ch->seid); + } + + /* in case of VF, this will be main SRIOV VSI */ + ch->parent_vsi = vsi; + + /* and update main_vsi's count for queue_available to use */ + vsi->cnt_q_avail -= ch->num_queue_pairs; + + return 0; +} + +/** + * i40e_configure_queue_channels - Add queue channel for the given TCs + * @vsi: VSI to be configured + * + * Configures queue channel mapping to the given TCs + **/ +static int i40e_configure_queue_channels(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch; + u64 max_rate = 0; + int ret = 0, i; + + /* Create app vsi with the TCs. Main VSI with TC0 is already set up */ + vsi->tc_seid_map[0] = vsi->seid; + for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (vsi->tc_config.enabled_tc & BIT(i)) { + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) { + ret = -ENOMEM; + goto err_free; + } + + INIT_LIST_HEAD(&ch->list); + ch->num_queue_pairs = + vsi->tc_config.tc_info[i].qcount; + ch->base_queue = + vsi->tc_config.tc_info[i].qoffset; + + /* Bandwidth limit through tc interface is in bytes/s, + * change to Mbit/s + */ + max_rate = vsi->mqprio_qopt.max_rate[i]; + do_div(max_rate, I40E_BW_MBPS_DIVISOR); + ch->max_tx_rate = max_rate; + + list_add_tail(&ch->list, &vsi->ch_list); + + ret = i40e_create_queue_channel(vsi, ch); + if (ret) { + dev_err(&vsi->back->pdev->dev, + "Failed creating queue channel with TC%d: queues %d\n", + i, ch->num_queue_pairs); + goto err_free; + } + vsi->tc_seid_map[i] = ch->seid; + } + } + return ret; + +err_free: + i40e_remove_queue_channels(vsi); + return ret; +} + +/** * i40e_veb_config_tc - Configure TCs for given VEB * @veb: given VEB * @enabled_tc: TC bitmap @@ -5346,13 +6427,14 @@ out: void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) { enum i40e_aq_link_speed new_speed; + struct i40e_pf *pf = vsi->back; char *speed = "Unknown"; char *fc = "Unknown"; char *fec = ""; char *req_fec = ""; char *an = ""; - new_speed = vsi->back->hw.phy.link_info.link_speed; + new_speed = pf->hw.phy.link_info.link_speed; if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed)) return; @@ -5366,13 +6448,13 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) /* Warn user if link speed on NPAR enabled partition is not at * least 10GB */ - if (vsi->back->hw.func_caps.npar_enable && - (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB || - vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB)) + if (pf->hw.func_caps.npar_enable && + (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB || + pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB)) netdev_warn(vsi->netdev, "The partition detected link speed that is less than 10Gbps\n"); - switch (vsi->back->hw.phy.link_info.link_speed) { + switch (pf->hw.phy.link_info.link_speed) { case I40E_LINK_SPEED_40GB: speed = "40 G"; break; @@ -5395,7 +6477,7 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) break; } - switch (vsi->back->hw.fc.current_mode) { + switch (pf->hw.fc.current_mode) { case I40E_FC_FULL: fc = "RX/TX"; break; @@ -5410,18 +6492,18 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) break; } - if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { + if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { req_fec = ", Requested FEC: None"; fec = ", FEC: None"; an = ", Autoneg: False"; - if (vsi->back->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) + if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) an = ", Autoneg: True"; - if (vsi->back->hw.phy.link_info.fec_info & + if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) fec = ", FEC: CL74 FC-FEC/BASE-R"; - else if (vsi->back->hw.phy.link_info.fec_info & + else if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA) fec = ", FEC: CL108 RS-FEC"; @@ -5470,15 +6552,6 @@ static int i40e_up_complete(struct i40e_vsi *vsi) i40e_print_link_message(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); - } else if (vsi->netdev) { - i40e_print_link_message(vsi, false); - /* need to check for qualified module here*/ - if ((pf->hw.phy.link_info.link_info & - I40E_AQ_MEDIA_AVAILABLE) && - (!(pf->hw.phy.link_info.an_info & - I40E_AQ_QUALIFIED_MODULE))) - netdev_err(vsi->netdev, - "the driver failed to link because an unqualified module was detected."); } /* replay FDIR SB filters */ @@ -5562,74 +6635,928 @@ void i40e_down(struct i40e_vsi *vsi) } /** + * i40e_validate_mqprio_qopt- validate queue mapping info + * @vsi: the VSI being configured + * @mqprio_qopt: queue parametrs + **/ +static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, + struct tc_mqprio_qopt_offload *mqprio_qopt) +{ + u64 sum_max_rate = 0; + u64 max_rate = 0; + int i; + + if (mqprio_qopt->qopt.offset[0] != 0 || + mqprio_qopt->qopt.num_tc < 1 || + mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS) + return -EINVAL; + for (i = 0; ; i++) { + if (!mqprio_qopt->qopt.count[i]) + return -EINVAL; + if (mqprio_qopt->min_rate[i]) { + dev_err(&vsi->back->pdev->dev, + "Invalid min tx rate (greater than 0) specified\n"); + return -EINVAL; + } + max_rate = mqprio_qopt->max_rate[i]; + do_div(max_rate, I40E_BW_MBPS_DIVISOR); + sum_max_rate += max_rate; + + if (i >= mqprio_qopt->qopt.num_tc - 1) + break; + if (mqprio_qopt->qopt.offset[i + 1] != + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) + return -EINVAL; + } + if (vsi->num_queue_pairs < + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) { + return -EINVAL; + } + if (sum_max_rate > i40e_get_link_speed(vsi)) { + dev_err(&vsi->back->pdev->dev, + "Invalid max tx rate specified\n"); + return -EINVAL; + } + return 0; +} + +/** + * i40e_vsi_set_default_tc_config - set default values for tc configuration + * @vsi: the VSI being configured + **/ +static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi) +{ + u16 qcount; + int i; + + /* Only TC0 is enabled */ + vsi->tc_config.numtc = 1; + vsi->tc_config.enabled_tc = 1; + qcount = min_t(int, vsi->alloc_queue_pairs, + i40e_pf_get_max_q_per_tc(vsi->back)); + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + /* For the TC that is not enabled set the offset to to default + * queue and allocate one queue for the given TC. + */ + vsi->tc_config.tc_info[i].qoffset = 0; + if (i == 0) + vsi->tc_config.tc_info[i].qcount = qcount; + else + vsi->tc_config.tc_info[i].qcount = 1; + vsi->tc_config.tc_info[i].netdev_tc = 0; + } +} + +/** * i40e_setup_tc - configure multiple traffic classes * @netdev: net device to configure - * @tc: number of traffic classes to enable + * @type_data: tc offload data **/ -static int i40e_setup_tc(struct net_device *netdev, u8 tc) +static int i40e_setup_tc(struct net_device *netdev, void *type_data) { + struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - u8 enabled_tc = 0; + u8 enabled_tc = 0, num_tc, hw; + bool need_reset = false; int ret = -EINVAL; + u16 mode; int i; - /* Check if DCB enabled to continue */ - if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) { - netdev_info(netdev, "DCB is not enabled for adapter\n"); - goto exit; + num_tc = mqprio_qopt->qopt.num_tc; + hw = mqprio_qopt->qopt.hw; + mode = mqprio_qopt->mode; + if (!hw) { + pf->flags &= ~I40E_FLAG_TC_MQPRIO; + memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); + goto config_tc; } /* Check if MFP enabled */ if (pf->flags & I40E_FLAG_MFP_ENABLED) { - netdev_info(netdev, "Configuring TC not supported in MFP mode\n"); - goto exit; + netdev_info(netdev, + "Configuring TC not supported in MFP mode\n"); + return ret; } + switch (mode) { + case TC_MQPRIO_MODE_DCB: + pf->flags &= ~I40E_FLAG_TC_MQPRIO; - /* Check whether tc count is within enabled limit */ - if (tc > i40e_pf_get_num_tc(pf)) { - netdev_info(netdev, "TC count greater than enabled on link for adapter\n"); - goto exit; + /* Check if DCB enabled to continue */ + if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) { + netdev_info(netdev, + "DCB is not enabled for adapter\n"); + return ret; + } + + /* Check whether tc count is within enabled limit */ + if (num_tc > i40e_pf_get_num_tc(pf)) { + netdev_info(netdev, + "TC count greater than enabled on link for adapter\n"); + return ret; + } + break; + case TC_MQPRIO_MODE_CHANNEL: + if (pf->flags & I40E_FLAG_DCB_ENABLED) { + netdev_info(netdev, + "Full offload of TC Mqprio options is not supported when DCB is enabled\n"); + return ret; + } + if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) + return ret; + ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt); + if (ret) + return ret; + memcpy(&vsi->mqprio_qopt, mqprio_qopt, + sizeof(*mqprio_qopt)); + pf->flags |= I40E_FLAG_TC_MQPRIO; + pf->flags &= ~I40E_FLAG_DCB_ENABLED; + break; + default: + return -EINVAL; } +config_tc: /* Generate TC map for number of tc requested */ - for (i = 0; i < tc; i++) + for (i = 0; i < num_tc; i++) enabled_tc |= BIT(i); /* Requesting same TC configuration as already enabled */ - if (enabled_tc == vsi->tc_config.enabled_tc) + if (enabled_tc == vsi->tc_config.enabled_tc && + mode != TC_MQPRIO_MODE_CHANNEL) return 0; /* Quiesce VSI queues */ i40e_quiesce_vsi(vsi); + if (!hw && !(pf->flags & I40E_FLAG_TC_MQPRIO)) + i40e_remove_queue_channels(vsi); + /* Configure VSI for enabled TCs */ ret = i40e_vsi_config_tc(vsi, enabled_tc); if (ret) { netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n", vsi->seid); + need_reset = true; goto exit; } + if (pf->flags & I40E_FLAG_TC_MQPRIO) { + if (vsi->mqprio_qopt.max_rate[0]) { + u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; + + do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR); + ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); + if (!ret) { + u64 credits = max_tx_rate; + + do_div(credits, I40E_BW_CREDIT_DIVISOR); + dev_dbg(&vsi->back->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + max_tx_rate, + credits, + vsi->seid); + } else { + need_reset = true; + goto exit; + } + } + ret = i40e_configure_queue_channels(vsi); + if (ret) { + netdev_info(netdev, + "Failed configuring queue channels\n"); + need_reset = true; + goto exit; + } + } + +exit: + /* Reset the configuration data to defaults, only TC0 is enabled */ + if (need_reset) { + i40e_vsi_set_default_tc_config(vsi); + need_reset = false; + } + /* Unquiesce VSI */ i40e_unquiesce_vsi(vsi); + return ret; +} -exit: +/** + * i40e_set_cld_element - sets cloud filter element data + * @filter: cloud filter rule + * @cld: ptr to cloud filter element data + * + * This is helper function to copy data into cloud filter element + **/ +static inline void +i40e_set_cld_element(struct i40e_cloud_filter *filter, + struct i40e_aqc_cloud_filters_element_data *cld) +{ + int i, j; + u32 ipa; + + memset(cld, 0, sizeof(*cld)); + ether_addr_copy(cld->outer_mac, filter->dst_mac); + ether_addr_copy(cld->inner_mac, filter->src_mac); + + if (filter->n_proto != ETH_P_IP && filter->n_proto != ETH_P_IPV6) + return; + + if (filter->n_proto == ETH_P_IPV6) { +#define IPV6_MAX_INDEX (ARRAY_SIZE(filter->dst_ipv6) - 1) + for (i = 0, j = 0; i < ARRAY_SIZE(filter->dst_ipv6); + i++, j += 2) { + ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]); + ipa = cpu_to_le32(ipa); + memcpy(&cld->ipaddr.raw_v6.data[j], &ipa, sizeof(ipa)); + } + } else { + ipa = be32_to_cpu(filter->dst_ipv4); + memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa)); + } + + cld->inner_vlan = cpu_to_le16(ntohs(filter->vlan_id)); + + /* tenant_id is not supported by FW now, once the support is enabled + * fill the cld->tenant_id with cpu_to_le32(filter->tenant_id) + */ + if (filter->tenant_id) + return; +} + +/** + * i40e_add_del_cloud_filter - Add/del cloud filter + * @vsi: pointer to VSI + * @filter: cloud filter rule + * @add: if true, add, if false, delete + * + * Add or delete a cloud filter for a specific flow spec. + * Returns 0 if the filter were successfully added. + **/ +static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, bool add) +{ + struct i40e_aqc_cloud_filters_element_data cld_filter; + struct i40e_pf *pf = vsi->back; + int ret; + static const u16 flag_table[128] = { + [I40E_CLOUD_FILTER_FLAGS_OMAC] = + I40E_AQC_ADD_CLOUD_FILTER_OMAC, + [I40E_CLOUD_FILTER_FLAGS_IMAC] = + I40E_AQC_ADD_CLOUD_FILTER_IMAC, + [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN] = + I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN, + [I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID] = + I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID, + [I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC] = + I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC, + [I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID] = + I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID, + [I40E_CLOUD_FILTER_FLAGS_IIP] = + I40E_AQC_ADD_CLOUD_FILTER_IIP, + }; + + if (filter->flags >= ARRAY_SIZE(flag_table)) + return I40E_ERR_CONFIG; + + /* copy element needed to add cloud filter from filter */ + i40e_set_cld_element(filter, &cld_filter); + + if (filter->tunnel_type != I40E_CLOUD_TNL_TYPE_NONE) + cld_filter.flags = cpu_to_le16(filter->tunnel_type << + I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT); + + if (filter->n_proto == ETH_P_IPV6) + cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] | + I40E_AQC_ADD_CLOUD_FLAGS_IPV6); + else + cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] | + I40E_AQC_ADD_CLOUD_FLAGS_IPV4); + + if (add) + ret = i40e_aq_add_cloud_filters(&pf->hw, filter->seid, + &cld_filter, 1); + else + ret = i40e_aq_rem_cloud_filters(&pf->hw, filter->seid, + &cld_filter, 1); + if (ret) + dev_dbg(&pf->pdev->dev, + "Failed to %s cloud filter using l4 port %u, err %d aq_err %d\n", + add ? "add" : "delete", filter->dst_port, ret, + pf->hw.aq.asq_last_status); + else + dev_info(&pf->pdev->dev, + "%s cloud filter for VSI: %d\n", + add ? "Added" : "Deleted", filter->seid); return ret; } -static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type, - void *type_data) +/** + * i40e_add_del_cloud_filter_big_buf - Add/del cloud filter using big_buf + * @vsi: pointer to VSI + * @filter: cloud filter rule + * @add: if true, add, if false, delete + * + * Add or delete a cloud filter for a specific flow spec using big buffer. + * Returns 0 if the filter were successfully added. + **/ +static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add) { - struct tc_mqprio_qopt *mqprio = type_data; + struct i40e_aqc_cloud_filters_element_bb cld_filter; + struct i40e_pf *pf = vsi->back; + int ret; + + /* Both (src/dst) valid mac_addr are not supported */ + if ((is_valid_ether_addr(filter->dst_mac) && + is_valid_ether_addr(filter->src_mac)) || + (is_multicast_ether_addr(filter->dst_mac) && + is_multicast_ether_addr(filter->src_mac))) + return -EINVAL; + + /* Make sure port is specified, otherwise bail out, for channel + * specific cloud filter needs 'L4 port' to be non-zero + */ + if (!filter->dst_port) + return -EINVAL; + + /* adding filter using src_port/src_ip is not supported at this stage */ + if (filter->src_port || filter->src_ipv4 || + !ipv6_addr_any(&filter->ip.v6.src_ip6)) + return -EINVAL; + + /* copy element needed to add cloud filter from filter */ + i40e_set_cld_element(filter, &cld_filter.element); + + if (is_valid_ether_addr(filter->dst_mac) || + is_valid_ether_addr(filter->src_mac) || + is_multicast_ether_addr(filter->dst_mac) || + is_multicast_ether_addr(filter->src_mac)) { + /* MAC + IP : unsupported mode */ + if (filter->dst_ipv4) + return -EINVAL; + + /* since we validated that L4 port must be valid before + * we get here, start with respective "flags" value + * and update if vlan is present or not + */ + cld_filter.element.flags = + cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT); + + if (filter->vlan_id) { + cld_filter.element.flags = + cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT); + } + + } else if (filter->dst_ipv4 || + !ipv6_addr_any(&filter->ip.v6.dst_ip6)) { + cld_filter.element.flags = + cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT); + if (filter->n_proto == ETH_P_IPV6) + cld_filter.element.flags |= + cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV6); + else + cld_filter.element.flags |= + cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV4); + } else { + dev_err(&pf->pdev->dev, + "either mac or ip has to be valid for cloud filter\n"); + return -EINVAL; + } + + /* Now copy L4 port in Byte 6..7 in general fields */ + cld_filter.general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0] = + be16_to_cpu(filter->dst_port); + + if (add) { + /* Validate current device switch mode, change if necessary */ + ret = i40e_validate_and_set_switch_mode(vsi); + if (ret) { + dev_err(&pf->pdev->dev, + "failed to set switch mode, ret %d\n", + ret); + return ret; + } - if (type != TC_SETUP_MQPRIO) + ret = i40e_aq_add_cloud_filters_bb(&pf->hw, filter->seid, + &cld_filter, 1); + } else { + ret = i40e_aq_rem_cloud_filters_bb(&pf->hw, filter->seid, + &cld_filter, 1); + } + + if (ret) + dev_dbg(&pf->pdev->dev, + "Failed to %s cloud filter(big buffer) err %d aq_err %d\n", + add ? "add" : "delete", ret, pf->hw.aq.asq_last_status); + else + dev_info(&pf->pdev->dev, + "%s cloud filter for VSI: %d, L4 port: %d\n", + add ? "add" : "delete", filter->seid, + ntohs(filter->dst_port)); + return ret; +} + +/** + * i40e_parse_cls_flower - Parse tc flower filters provided by kernel + * @vsi: Pointer to VSI + * @cls_flower: Pointer to struct tc_cls_flower_offload + * @filter: Pointer to cloud filter structure + * + **/ +static int i40e_parse_cls_flower(struct i40e_vsi *vsi, + struct tc_cls_flower_offload *f, + struct i40e_cloud_filter *filter) +{ + u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0; + struct i40e_pf *pf = vsi->back; + u8 field_flags = 0; + + if (f->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) { + dev_err(&pf->pdev->dev, "Unsupported key used: 0x%x\n", + f->dissector->used_keys); return -EOPNOTSUPP; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->key); + + struct flow_dissector_key_keyid *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + + if (mask->keyid != 0) + field_flags |= I40E_CLOUD_FIELD_TEN_ID; + + filter->tenant_id = be32_to_cpu(key->keyid); + } - mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); - return i40e_setup_tc(netdev, mqprio->num_tc); + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->mask); + + n_proto_key = ntohs(key->n_proto); + n_proto_mask = ntohs(mask->n_proto); + + if (n_proto_key == ETH_P_ALL) { + n_proto_key = 0; + n_proto_mask = 0; + } + filter->n_proto = n_proto_key & n_proto_mask; + filter->ip_proto = key->ip_proto; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_dissector_key_eth_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->key); + + struct flow_dissector_key_eth_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->mask); + + /* use is_broadcast and is_zero to check for all 0xf or 0 */ + if (!is_zero_ether_addr(mask->dst)) { + if (is_broadcast_ether_addr(mask->dst)) { + field_flags |= I40E_CLOUD_FIELD_OMAC; + } else { + dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n", + mask->dst); + return I40E_ERR_CONFIG; + } + } + + if (!is_zero_ether_addr(mask->src)) { + if (is_broadcast_ether_addr(mask->src)) { + field_flags |= I40E_CLOUD_FIELD_IMAC; + } else { + dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n", + mask->src); + return I40E_ERR_CONFIG; + } + } + ether_addr_copy(filter->dst_mac, key->dst); + ether_addr_copy(filter->src_mac, key->src); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + + if (mask->vlan_id) { + if (mask->vlan_id == VLAN_VID_MASK) { + field_flags |= I40E_CLOUD_FIELD_IVLAN; + + } else { + dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n", + mask->vlan_id); + return I40E_ERR_CONFIG; + } + } + + filter->vlan_id = cpu_to_be16(key->vlan_id); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_CONTROL, + f->key); + + addr_type = key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->mask); + + if (mask->dst) { + if (mask->dst == cpu_to_be32(0xffffffff)) { + field_flags |= I40E_CLOUD_FIELD_IIP; + } else { + mask->dst = be32_to_cpu(mask->dst); + dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4\n", + &mask->dst); + return I40E_ERR_CONFIG; + } + } + + if (mask->src) { + if (mask->src == cpu_to_be32(0xffffffff)) { + field_flags |= I40E_CLOUD_FIELD_IIP; + } else { + mask->src = be32_to_cpu(mask->src); + dev_err(&pf->pdev->dev, "Bad ip src mask %pI4\n", + &mask->src); + return I40E_ERR_CONFIG; + } + } + + if (field_flags & I40E_CLOUD_FIELD_TEN_ID) { + dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n"); + return I40E_ERR_CONFIG; + } + filter->dst_ipv4 = key->dst; + filter->src_ipv4 = key->src; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->mask); + + /* src and dest IPV6 address should not be LOOPBACK + * (0:0:0:0:0:0:0:1), which can be represented as ::1 + */ + if (ipv6_addr_loopback(&key->dst) || + ipv6_addr_loopback(&key->src)) { + dev_err(&pf->pdev->dev, + "Bad ipv6, addr is LOOPBACK\n"); + return I40E_ERR_CONFIG; + } + if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src)) + field_flags |= I40E_CLOUD_FIELD_IIP; + + memcpy(&filter->src_ipv6, &key->src.s6_addr32, + sizeof(filter->src_ipv6)); + memcpy(&filter->dst_ipv6, &key->dst.s6_addr32, + sizeof(filter->dst_ipv6)); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->mask); + + if (mask->src) { + if (mask->src == cpu_to_be16(0xffff)) { + field_flags |= I40E_CLOUD_FIELD_IIP; + } else { + dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n", + be16_to_cpu(mask->src)); + return I40E_ERR_CONFIG; + } + } + + if (mask->dst) { + if (mask->dst == cpu_to_be16(0xffff)) { + field_flags |= I40E_CLOUD_FIELD_IIP; + } else { + dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n", + be16_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + + filter->dst_port = key->dst; + filter->src_port = key->src; + + switch (filter->ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + break; + default: + dev_err(&pf->pdev->dev, + "Only UDP and TCP transport are supported\n"); + return -EINVAL; + } + } + filter->flags = field_flags; + return 0; +} + +/** + * i40e_handle_tclass: Forward to a traffic class on the device + * @vsi: Pointer to VSI + * @tc: traffic class index on the device + * @filter: Pointer to cloud filter structure + * + **/ +static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc, + struct i40e_cloud_filter *filter) +{ + struct i40e_channel *ch, *ch_tmp; + + /* direct to a traffic class on the same device */ + if (tc == 0) { + filter->seid = vsi->seid; + return 0; + } else if (vsi->tc_config.enabled_tc & BIT(tc)) { + if (!filter->dst_port) { + dev_err(&vsi->back->pdev->dev, + "Specify destination port to direct to traffic class that is not default\n"); + return -EINVAL; + } + if (list_empty(&vsi->ch_list)) + return -EINVAL; + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, + list) { + if (ch->seid == vsi->tc_seid_map[tc]) + filter->seid = ch->seid; + } + return 0; + } + dev_err(&vsi->back->pdev->dev, "TC is not enabled\n"); + return -EINVAL; +} + +/** + * i40e_configure_clsflower - Configure tc flower filters + * @vsi: Pointer to VSI + * @cls_flower: Pointer to struct tc_cls_flower_offload + * + **/ +static int i40e_configure_clsflower(struct i40e_vsi *vsi, + struct tc_cls_flower_offload *cls_flower) +{ + int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); + struct i40e_cloud_filter *filter = NULL; + struct i40e_pf *pf = vsi->back; + int err = 0; + + if (tc < 0) { + dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n"); + return -EINVAL; + } + + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || + test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) + return -EBUSY; + + if (pf->fdir_pf_active_filters || + (!hlist_empty(&pf->fdir_filter_list))) { + dev_err(&vsi->back->pdev->dev, + "Flow Director Sideband filters exists, turn ntuple off to configure cloud filters\n"); + return -EINVAL; + } + + if (vsi->back->flags & I40E_FLAG_FD_SB_ENABLED) { + dev_err(&vsi->back->pdev->dev, + "Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n"); + vsi->back->flags &= ~I40E_FLAG_FD_SB_ENABLED; + vsi->back->flags |= I40E_FLAG_FD_SB_TO_CLOUD_FILTER; + } + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + filter->cookie = cls_flower->cookie; + + err = i40e_parse_cls_flower(vsi, cls_flower, filter); + if (err < 0) + goto err; + + err = i40e_handle_tclass(vsi, tc, filter); + if (err < 0) + goto err; + + /* Add cloud filter */ + if (filter->dst_port) + err = i40e_add_del_cloud_filter_big_buf(vsi, filter, true); + else + err = i40e_add_del_cloud_filter(vsi, filter, true); + + if (err) { + dev_err(&pf->pdev->dev, + "Failed to add cloud filter, err %s\n", + i40e_stat_str(&pf->hw, err)); + err = i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status); + goto err; + } + + /* add filter to the ordered list */ + INIT_HLIST_NODE(&filter->cloud_node); + + hlist_add_head(&filter->cloud_node, &pf->cloud_filter_list); + + pf->num_cloud_filters++; + + return err; +err: + kfree(filter); + return err; +} + +/** + * i40e_find_cloud_filter - Find the could filter in the list + * @vsi: Pointer to VSI + * @cookie: filter specific cookie + * + **/ +static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi, + unsigned long *cookie) +{ + struct i40e_cloud_filter *filter = NULL; + struct hlist_node *node2; + + hlist_for_each_entry_safe(filter, node2, + &vsi->back->cloud_filter_list, cloud_node) + if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie))) + return filter; + return NULL; +} + +/** + * i40e_delete_clsflower - Remove tc flower filters + * @vsi: Pointer to VSI + * @cls_flower: Pointer to struct tc_cls_flower_offload + * + **/ +static int i40e_delete_clsflower(struct i40e_vsi *vsi, + struct tc_cls_flower_offload *cls_flower) +{ + struct i40e_cloud_filter *filter = NULL; + struct i40e_pf *pf = vsi->back; + int err = 0; + + filter = i40e_find_cloud_filter(vsi, &cls_flower->cookie); + + if (!filter) + return -EINVAL; + + hash_del(&filter->cloud_node); + + if (filter->dst_port) + err = i40e_add_del_cloud_filter_big_buf(vsi, filter, false); + else + err = i40e_add_del_cloud_filter(vsi, filter, false); + + kfree(filter); + if (err) { + dev_err(&pf->pdev->dev, + "Failed to delete cloud filter, err %s\n", + i40e_stat_str(&pf->hw, err)); + return i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status); + } + + pf->num_cloud_filters--; + if (!pf->num_cloud_filters) + if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) && + !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) { + pf->flags |= I40E_FLAG_FD_SB_ENABLED; + pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER; + pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE; + } + return 0; +} + +/** + * i40e_setup_tc_cls_flower - flower classifier offloads + * @netdev: net device to configure + * @type_data: offload data + **/ +static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np, + struct tc_cls_flower_offload *cls_flower) +{ + struct i40e_vsi *vsi = np->vsi; + + if (cls_flower->common.chain_index) + return -EOPNOTSUPP; + + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return i40e_configure_clsflower(vsi, cls_flower); + case TC_CLSFLOWER_DESTROY: + return i40e_delete_clsflower(vsi, cls_flower); + case TC_CLSFLOWER_STATS: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct i40e_netdev_priv *np = cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return i40e_setup_tc_cls_flower(np, type_data); + + default: + return -EOPNOTSUPP; + } +} + +static int i40e_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct i40e_netdev_priv *np = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb, + np, np); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_QDISC_MQPRIO: + return i40e_setup_tc(netdev, type_data); + case TC_SETUP_BLOCK: + return i40e_setup_tc_block(netdev, type_data); + default: + return -EOPNOTSUPP; + } } /** @@ -5747,7 +7674,7 @@ err_setup_rx: err_setup_tx: i40e_vsi_free_tx_resources(vsi); if (vsi == pf->vsi[pf->lan_vsi]) - i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true); + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); return err; } @@ -5810,6 +7737,33 @@ static void i40e_fdir_filter_exit(struct i40e_pf *pf) } /** + * i40e_cloud_filter_exit - Cleans up the cloud filters + * @pf: Pointer to PF + * + * This function destroys the hlist where all the cloud filters + * were saved. + **/ +static void i40e_cloud_filter_exit(struct i40e_pf *pf) +{ + struct i40e_cloud_filter *cfilter; + struct hlist_node *node; + + hlist_for_each_entry_safe(cfilter, node, + &pf->cloud_filter_list, cloud_node) { + hlist_del(&cfilter->cloud_node); + kfree(cfilter); + } + pf->num_cloud_filters = 0; + + if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) && + !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) { + pf->flags |= I40E_FLAG_FD_SB_ENABLED; + pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER; + pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE; + } +} + +/** * i40e_close - Disables a network interface * @netdev: network interface device structure * @@ -5875,7 +7829,7 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) wr32(&pf->hw, I40E_GLGEN_RTRIG, val); i40e_flush(&pf->hw); - } else if (reset_flags & BIT_ULL(__I40E_PF_RESET_REQUESTED)) { + } else if (reset_flags & I40E_PF_RESET_FLAG) { /* Request a PF Reset * @@ -6226,6 +8180,7 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf) hlist_del(&filter->fdir_node); kfree(filter); pf->fdir_pf_active_filters--; + pf->fd_inv = 0; } } } @@ -6429,8 +8384,7 @@ static void i40e_link_event(struct i40e_pf *pf) new_link == netif_carrier_ok(vsi->netdev))) return; - if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - i40e_print_link_message(vsi, new_link); + i40e_print_link_message(vsi, new_link); /* Notify the base of the switch tree connected to * the link. Floating VEBs are not notified. @@ -6553,12 +8507,26 @@ static void i40e_handle_link_event(struct i40e_pf *pf, */ i40e_link_event(pf); - /* check for unqualified module, if link is down */ - if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && - (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && - (!(status->link_info & I40E_AQ_LINK_UP))) + /* Check if module meets thermal requirements */ + if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) { + dev_err(&pf->pdev->dev, + "Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n"); dev_err(&pf->pdev->dev, - "The driver failed to link because an unqualified module was detected.\n"); + "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); + } else { + /* check for unqualified module, if link is down, suppress + * the message if link was forced to be down. + */ + if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && + (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && + (!(status->link_info & I40E_AQ_LINK_UP)) && + (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) { + dev_err(&pf->pdev->dev, + "Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n"); + dev_err(&pf->pdev->dev, + "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); + } + } } /** @@ -6900,7 +8868,8 @@ end_reconstitute: * i40e_get_capabilities - get info about the HW * @pf: the PF struct **/ -static int i40e_get_capabilities(struct i40e_pf *pf) +static int i40e_get_capabilities(struct i40e_pf *pf, + enum i40e_admin_queue_opc list_type) { struct i40e_aqc_list_capabilities_element_resp *cap_buf; u16 data_size; @@ -6915,9 +8884,8 @@ static int i40e_get_capabilities(struct i40e_pf *pf) /* this loads the data into the hw struct for us */ err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len, - &data_size, - i40e_aqc_opc_list_func_capabilities, - NULL); + &data_size, list_type, + NULL); /* data loaded, buffer no longer needed */ kfree(cap_buf); @@ -6934,26 +8902,44 @@ static int i40e_get_capabilities(struct i40e_pf *pf) } } while (err); - if (pf->hw.debug_mask & I40E_DEBUG_USER) - dev_info(&pf->pdev->dev, - "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n", - pf->hw.pf_id, pf->hw.func_caps.num_vfs, - pf->hw.func_caps.num_msix_vectors, - pf->hw.func_caps.num_msix_vectors_vf, - pf->hw.func_caps.fd_filters_guaranteed, - pf->hw.func_caps.fd_filters_best_effort, - pf->hw.func_caps.num_tx_qp, - pf->hw.func_caps.num_vsis); - + if (pf->hw.debug_mask & I40E_DEBUG_USER) { + if (list_type == i40e_aqc_opc_list_func_capabilities) { + dev_info(&pf->pdev->dev, + "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n", + pf->hw.pf_id, pf->hw.func_caps.num_vfs, + pf->hw.func_caps.num_msix_vectors, + pf->hw.func_caps.num_msix_vectors_vf, + pf->hw.func_caps.fd_filters_guaranteed, + pf->hw.func_caps.fd_filters_best_effort, + pf->hw.func_caps.num_tx_qp, + pf->hw.func_caps.num_vsis); + } else if (list_type == i40e_aqc_opc_list_dev_capabilities) { + dev_info(&pf->pdev->dev, + "switch_mode=0x%04x, function_valid=0x%08x\n", + pf->hw.dev_caps.switch_mode, + pf->hw.dev_caps.valid_functions); + dev_info(&pf->pdev->dev, + "SR-IOV=%d, num_vfs for all function=%u\n", + pf->hw.dev_caps.sr_iov_1_1, + pf->hw.dev_caps.num_vfs); + dev_info(&pf->pdev->dev, + "num_vsis=%u, num_rx:%u, num_tx=%u\n", + pf->hw.dev_caps.num_vsis, + pf->hw.dev_caps.num_rx_qp, + pf->hw.dev_caps.num_tx_qp); + } + } + if (list_type == i40e_aqc_opc_list_func_capabilities) { #define DEF_NUM_VSI (1 + (pf->hw.func_caps.fcoe ? 1 : 0) \ + pf->hw.func_caps.num_vfs) - if (pf->hw.revision_id == 0 && (DEF_NUM_VSI > pf->hw.func_caps.num_vsis)) { - dev_info(&pf->pdev->dev, - "got num_vsis %d, setting num_vsis to %d\n", - pf->hw.func_caps.num_vsis, DEF_NUM_VSI); - pf->hw.func_caps.num_vsis = DEF_NUM_VSI; + if (pf->hw.revision_id == 0 && + pf->hw.func_caps.num_vsis < DEF_NUM_VSI) { + dev_info(&pf->pdev->dev, + "got num_vsis %d, setting num_vsis to %d\n", + pf->hw.func_caps.num_vsis, DEF_NUM_VSI); + pf->hw.func_caps.num_vsis = DEF_NUM_VSI; + } } - return 0; } @@ -6995,6 +8981,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf) if (!vsi) { dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n"); pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; return; } } @@ -7017,6 +9004,95 @@ static void i40e_fdir_teardown(struct i40e_pf *pf) } /** + * i40e_rebuild_cloud_filters - Rebuilds cloud filters for VSIs + * @vsi: PF main vsi + * @seid: seid of main or channel VSIs + * + * Rebuilds cloud filters associated with main VSI and channel VSIs if they + * existed before reset + **/ +static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid) +{ + struct i40e_cloud_filter *cfilter; + struct i40e_pf *pf = vsi->back; + struct hlist_node *node; + i40e_status ret; + + /* Add cloud filters back if they exist */ + hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list, + cloud_node) { + if (cfilter->seid != seid) + continue; + + if (cfilter->dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, + true); + else + ret = i40e_add_del_cloud_filter(vsi, cfilter, true); + + if (ret) { + dev_dbg(&pf->pdev->dev, + "Failed to rebuild cloud filter, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + return ret; + } + } + return 0; +} + +/** + * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset + * @vsi: PF main vsi + * + * Rebuilds channel VSIs if they existed before reset + **/ +static int i40e_rebuild_channels(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + i40e_status ret; + + if (list_empty(&vsi->ch_list)) + return 0; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + if (!ch->initialized) + break; + /* Proceed with creation of channel (VMDq2) VSI */ + ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch); + if (ret) { + dev_info(&vsi->back->pdev->dev, + "failed to rebuild channels using uplink_seid %u\n", + vsi->uplink_seid); + return ret; + } + if (ch->max_tx_rate) { + u64 credits = ch->max_tx_rate; + + if (i40e_set_bw_limit(vsi, ch->seid, + ch->max_tx_rate)) + return -EINVAL; + + do_div(credits, I40E_BW_CREDIT_DIVISOR); + dev_dbg(&vsi->back->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + ch->max_tx_rate, + credits, + ch->seid); + } + ret = i40e_rebuild_cloud_filters(vsi, ch->seid); + if (ret) { + dev_dbg(&vsi->back->pdev->dev, + "Failed to rebuild cloud filters for channel VSI %u\n", + ch->seid); + return ret; + } + } + return 0; +} + +/** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure * @lock_acquired: indicates whether or not the lock has been acquired @@ -7152,6 +9228,7 @@ static int i40e_reset(struct i40e_pf *pf) **/ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { + struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_hw *hw = &pf->hw; u8 set_fc_aq_fail = 0; i40e_status ret; @@ -7177,7 +9254,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) i40e_verify_eeprom(pf); i40e_clear_pxe_mode(hw); - ret = i40e_get_capabilities(pf); + ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities); if (ret) goto end_core_reset; @@ -7234,7 +9311,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) * If there were VEBs but the reconstitution failed, we'll try * try to recover minimal use by getting the basic PF VSI working. */ - if (pf->vsi[pf->lan_vsi]->uplink_seid != pf->mac_seid) { + if (vsi->uplink_seid != pf->mac_seid) { dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n"); /* find the one VEB connected to the MAC, and find orphans */ for (v = 0; v < I40E_MAX_VEB; v++) { @@ -7258,8 +9335,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) dev_info(&pf->pdev->dev, "rebuild of switch failed: %d, will try to set up simple PF connection\n", ret); - pf->vsi[pf->lan_vsi]->uplink_seid - = pf->mac_seid; + vsi->uplink_seid = pf->mac_seid; break; } else if (pf->veb[v]->uplink_seid == 0) { dev_info(&pf->pdev->dev, @@ -7270,10 +9346,10 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } } - if (pf->vsi[pf->lan_vsi]->uplink_seid == pf->mac_seid) { + if (vsi->uplink_seid == pf->mac_seid) { dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n"); /* no VEB, so rebuild only the Main VSI */ - ret = i40e_add_vsi(pf->vsi[pf->lan_vsi]); + ret = i40e_add_vsi(vsi); if (ret) { dev_info(&pf->pdev->dev, "rebuild of Main VSI failed: %d\n", ret); @@ -7281,6 +9357,35 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } } + if (vsi->mqprio_qopt.max_rate[0]) { + u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; + u64 credits = 0; + + do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR); + ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); + if (ret) + goto end_unlock; + + credits = max_tx_rate; + do_div(credits, I40E_BW_CREDIT_DIVISOR); + dev_dbg(&vsi->back->pdev->dev, + "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n", + max_tx_rate, + credits, + vsi->seid); + } + + ret = i40e_rebuild_cloud_filters(vsi, vsi->seid); + if (ret) + goto end_unlock; + + /* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs + * for this main VSI if they exist + */ + ret = i40e_rebuild_channels(vsi); + if (ret) + goto end_unlock; + /* Reconfigure hardware for allowing smaller MSS in the case * of TSO, so that we avoid the MDD being fired and causing * a reset in the case of small MSS+TSO. @@ -7615,9 +9720,9 @@ static void i40e_service_task(struct work_struct *work) * i40e_service_timer - timer callback * @data: pointer to PF struct **/ -static void i40e_service_timer(unsigned long data) +static void i40e_service_timer(struct timer_list *t) { - struct i40e_pf *pf = (struct i40e_pf *)data; + struct i40e_pf *pf = from_timer(pf, t, service_timer); mod_timer(&pf->service_timer, round_jiffies(jiffies + pf->service_timer_period)); @@ -7674,7 +9779,7 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi) /** * i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi - * @type: VSI pointer + * @vsi: VSI pointer * @alloc_qvectors: a bool to specify if q_vectors need to be allocated. * * On error: returns error code (negative) @@ -8139,7 +10244,7 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_lan_qps = 1; pf->num_lan_msix = 1; - } else if (!vectors_left) { + } else if (v_actual != v_budget) { /* If we have limited resources, we will start with no vectors * for the special features and then allocate vectors to some * of these features based on the policy and at the end disable @@ -8148,7 +10253,8 @@ static int i40e_init_msix(struct i40e_pf *pf) int vec; dev_info(&pf->pdev->dev, - "MSI-X vector limit reached, attempting to redistribute vectors\n"); + "MSI-X vector limit reached with %d, wanted %d, attempting to redistribute vectors\n", + v_actual, v_budget); /* reserve the misc vector */ vec = v_actual - 1; @@ -8196,6 +10302,7 @@ static int i40e_init_msix(struct i40e_pf *pf) (pf->num_fdsb_msix == 0)) { dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n"); pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; } if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) && (pf->num_vmdq_msix == 0)) { @@ -8313,6 +10420,7 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED | I40E_FLAG_VMDQ_ENABLED); + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; /* rework the queue expectations without MSIX */ i40e_determine_queue_usage(pf); @@ -8351,6 +10459,55 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) } /** + * i40e_restore_interrupt_scheme - Restore the interrupt scheme + * @pf: private board data structure + * + * Restore the interrupt scheme that was cleared when we suspended the + * device. This should be called during resume to re-allocate the q_vectors + * and reacquire IRQs. + */ +static int i40e_restore_interrupt_scheme(struct i40e_pf *pf) +{ + int err, i; + + /* We cleared the MSI and MSI-X flags when disabling the old interrupt + * scheme. We need to re-enabled them here in order to attempt to + * re-acquire the MSI or MSI-X vectors + */ + pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED); + + err = i40e_init_interrupt_scheme(pf); + if (err) + return err; + + /* Now that we've re-acquired IRQs, we need to remap the vectors and + * rings together again. + */ + for (i = 0; i < pf->num_alloc_vsi; i++) { + if (pf->vsi[i]) { + err = i40e_vsi_alloc_q_vectors(pf->vsi[i]); + if (err) + goto err_unwind; + i40e_vsi_map_rings_to_vectors(pf->vsi[i]); + } + } + + err = i40e_setup_misc_vector(pf); + if (err) + goto err_unwind; + + return 0; + +err_unwind: + while (i--) { + if (pf->vsi[i]) + i40e_vsi_free_q_vectors(pf->vsi[i]); + } + + return err; +} + +/** * i40e_setup_misc_vector - Setup the misc vector to handle non queue events * @pf: board private structure * @@ -8363,13 +10520,12 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) struct i40e_hw *hw = &pf->hw; int err = 0; - /* Only request the irq if this is the first time through, and - * not when we're rebuilding after a Reset - */ - if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) { + /* Only request the IRQ once, the first time through. */ + if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) { err = request_irq(pf->msix_entries[0].vector, i40e_intr, 0, pf->int_name, pf); if (err) { + clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state); dev_info(&pf->pdev->dev, "request_irq for %s failed: %d\n", pf->int_name, err); @@ -8385,51 +10541,12 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) i40e_flush(hw); - i40e_irq_dynamic_enable_icr0(pf, true); + i40e_irq_dynamic_enable_icr0(pf); return err; } /** - * i40e_config_rss_aq - Prepare for RSS using AQ commands - * @vsi: vsi structure - * @seed: RSS hash seed - **/ -static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, - u8 *lut, u16 lut_size) -{ - struct i40e_pf *pf = vsi->back; - struct i40e_hw *hw = &pf->hw; - int ret = 0; - - if (seed) { - struct i40e_aqc_get_set_rss_key_data *seed_dw = - (struct i40e_aqc_get_set_rss_key_data *)seed; - ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw); - if (ret) { - dev_info(&pf->pdev->dev, - "Cannot set RSS key, err %s aq_err %s\n", - i40e_stat_str(hw, ret), - i40e_aq_str(hw, hw->aq.asq_last_status)); - return ret; - } - } - if (lut) { - bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; - - ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); - if (ret) { - dev_info(&pf->pdev->dev, - "Cannot set RSS lut, err %s aq_err %s\n", - i40e_stat_str(hw, ret), - i40e_aq_str(hw, hw->aq.asq_last_status)); - return ret; - } - } - return ret; -} - -/** * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands * @vsi: Pointer to vsi structure * @seed: Buffter to store the hash keys @@ -8476,46 +10593,6 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, } /** - * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used - * @vsi: VSI structure - **/ -static int i40e_vsi_config_rss(struct i40e_vsi *vsi) -{ - u8 seed[I40E_HKEY_ARRAY_SIZE]; - struct i40e_pf *pf = vsi->back; - u8 *lut; - int ret; - - if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)) - return 0; - - if (!vsi->rss_size) - vsi->rss_size = min_t(int, pf->alloc_rss_size, - vsi->num_queue_pairs); - if (!vsi->rss_size) - return -EINVAL; - - lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); - if (!lut) - return -ENOMEM; - /* Use the user configured hash keys and lookup table if there is one, - * otherwise use default - */ - if (vsi->rss_lut_user) - memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); - else - i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); - if (vsi->rss_hkey_user) - memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); - else - netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); - ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); - kfree(lut); - - return ret; -} - -/** * i40e_config_rss_reg - Configure RSS keys and lut by writing registers * @vsi: Pointer to vsi structure * @seed: RSS hash seed @@ -8913,8 +10990,8 @@ static int i40e_sw_init(struct i40e_pf *pf) I40E_FLAG_MSIX_ENABLED; /* Set default ITR */ - pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF; - pf->tx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF; + pf->rx_itr_default = I40E_ITR_RX_DEF; + pf->tx_itr_default = I40E_ITR_TX_DEF; /* Depending on PF configurations, it is possible that the RSS * maximum might end up larger than the available queues @@ -9014,6 +11091,11 @@ static int i40e_sw_init(struct i40e_pf *pf) (pf->hw.aq.fw_maj_ver >= 5))) pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB; + /* Enable PTP L4 if FW > v6.0 */ + if (pf->hw.mac.type == I40E_MAC_XL710 && + pf->hw.aq.fw_maj_ver >= 6) + pf->hw_features |= I40E_HW_PTP_L4_CAPABLE; + if (pf->hw.func_caps.vmdq) { pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI; pf->flags |= I40E_FLAG_VMDQ_ENABLED; @@ -9079,9 +11161,13 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) /* Enable filters and mark for reset */ if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) need_reset = true; - /* enable FD_SB only if there is MSI-X vector */ - if (pf->num_fdsb_msix > 0) + /* enable FD_SB only if there is MSI-X vector and no cloud + * filters exist + */ + if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) { pf->flags |= I40E_FLAG_FD_SB_ENABLED; + pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE; + } } else { /* turn off filters, mark for reset and clear SW filter list */ if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { @@ -9090,6 +11176,8 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) } pf->flags &= ~(I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_SB_AUTO_DISABLED); + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; + /* reset fd counters */ pf->fd_add_err = 0; pf->fd_atr_cnt = 0; @@ -9151,10 +11239,16 @@ static int i40e_set_features(struct net_device *netdev, else i40e_vlan_stripping_disable(vsi); + if (!(features & NETIF_F_HW_TC) && pf->num_cloud_filters) { + dev_err(&pf->pdev->dev, + "Offloaded tc filters active, can't turn hw_tc_offload off"); + return -EINVAL; + } + need_reset = i40e_set_ntuple(pf, features); if (need_reset) - i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true); + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); return 0; } @@ -9406,8 +11500,7 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; else pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), - true); + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); break; } } @@ -9555,12 +11648,12 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, } /** - * i40e_xdp - implements ndo_xdp for i40e + * i40e_xdp - implements ndo_bpf for i40e * @dev: netdevice * @xdp: XDP command **/ static int i40e_xdp(struct net_device *dev, - struct netdev_xdp *xdp) + struct netdev_bpf *xdp) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; @@ -9612,7 +11705,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_features_check = i40e_features_check, .ndo_bridge_getlink = i40e_ndo_bridge_getlink, .ndo_bridge_setlink = i40e_ndo_bridge_setlink, - .ndo_xdp = i40e_xdp, + .ndo_bpf = i40e_xdp, }; /** @@ -9671,7 +11764,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID; if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) - netdev->hw_features |= NETIF_F_NTUPLE; + netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; + hw_features = hw_enc_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; @@ -9849,6 +11943,31 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) enabled_tc = i40e_pf_get_tc_map(pf); + /* Source pruning is enabled by default, so the flag is + * negative logic - if it's set, we need to fiddle with + * the VSI to disable source pruning. + */ + if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) { + memset(&ctxt, 0, sizeof(ctxt)); + ctxt.seid = pf->main_vsi_seid; + ctxt.pf_num = pf->hw.pf_id; + ctxt.vf_num = 0; + ctxt.info.valid_sections |= + cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); + ctxt.info.switch_id = + cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB); + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "update vsi failed, err %s aq_err %s\n", + i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + ret = -ENOENT; + goto err; + } + } + /* MFP mode setup queue map and update VSI */ if ((pf->flags & I40E_FLAG_MFP_ENABLED) && !(pf->hw.func_caps.iscsi)) { /* NIC type PF */ @@ -10951,14 +13070,16 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) */ if ((pf->hw.pf_id == 0) && - !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) + !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) { flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; + pf->last_sw_conf_flags = flags; + } if (pf->hw.pf_id == 0) { u16 valid_flags; valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; - ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, + ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, 0, NULL); if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) { dev_info(&pf->pdev->dev, @@ -10968,6 +13089,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) pf->hw.aq.asq_last_status)); /* not a fatal problem, just keep going */ } + pf->last_sw_conf_valid_flags = valid_flags; } /* first time setup */ @@ -10988,6 +13110,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]); if (!vsi) { dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n"); + i40e_cloud_filter_exit(pf); i40e_fdir_teardown(pf); return -EAGAIN; } @@ -11039,6 +13162,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) static void i40e_determine_queue_usage(struct i40e_pf *pf) { int queues_left; + int q_max; pf->num_lan_qps = 0; @@ -11063,6 +13187,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) I40E_FLAG_DCB_ENABLED | I40E_FLAG_SRIOV_ENABLED | I40E_FLAG_VMDQ_ENABLED); + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; } else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED | I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED | @@ -11077,6 +13202,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) I40E_FLAG_FD_ATR_ENABLED | I40E_FLAG_DCB_ENABLED | I40E_FLAG_VMDQ_ENABLED); + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; } else { /* Not enough queues for all TCs */ if ((pf->flags & I40E_FLAG_DCB_CAPABLE) && @@ -11085,10 +13211,12 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) I40E_FLAG_DCB_ENABLED); dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n"); } - pf->num_lan_qps = max_t(int, pf->rss_size_max, - num_online_cpus()); - pf->num_lan_qps = min_t(int, pf->num_lan_qps, - pf->hw.func_caps.num_tx_qp); + + /* limit lan qps to the smaller of qps, cpus or msix */ + q_max = max_t(int, pf->rss_size_max, num_online_cpus()); + q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp); + q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors); + pf->num_lan_qps = q_max; queues_left -= pf->num_lan_qps; } @@ -11098,6 +13226,7 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) queues_left -= 1; /* save 1 queue for FD */ } else { pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + pf->flags |= I40E_FLAG_FD_SB_INACTIVE; dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n"); } } @@ -11304,6 +13433,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->bus.bus_id = pdev->bus->number; pf->instance = pfs_found; + /* Select something other than the 802.1ad ethertype for the + * switch to use internally and drop on ingress. + */ + hw->switch_tag = 0xffff; + hw->first_tag = ETH_P_8021AD; + hw->second_tag = ETH_P_8021Q; + INIT_LIST_HEAD(&pf->l3_flex_pit_list); INIT_LIST_HEAD(&pf->l4_flex_pit_list); @@ -11380,11 +13516,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i40e_nvm_version_str(hw)); if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && - hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) + hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) dev_info(&pdev->dev, "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n"); - else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR || - hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1)) + else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4) dev_info(&pdev->dev, "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n"); @@ -11395,7 +13530,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n"); i40e_clear_pxe_mode(hw); - err = i40e_get_capabilities(pf); + err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities); if (err) goto err_adminq_setup; @@ -11454,7 +13589,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #endif /* CONFIG_I40E_DCB */ /* set up periodic task facility */ - setup_timer(&pf->service_timer, i40e_service_timer, (unsigned long)pf); + timer_setup(&pf->service_timer, i40e_service_timer, 0); pf->service_timer_period = HZ; INIT_WORK(&pf->service_task, i40e_service_task); @@ -11506,6 +13641,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err); goto err_vsis; } + INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list); /* Make sure flow control is set according to current settings */ err = i40e_set_fc(hw, &set_fc_aq_fail, true); @@ -11777,7 +13913,7 @@ static void i40e_remove(struct pci_dev *pdev) /* no more scheduling of any task */ set_bit(__I40E_SUSPENDED, pf->state); set_bit(__I40E_DOWN, pf->state); - if (pf->service_timer.data) + if (pf->service_timer.function) del_timer_sync(&pf->service_timer); if (pf->service_task.func) cancel_work_sync(&pf->service_task); @@ -11812,6 +13948,8 @@ static void i40e_remove(struct pci_dev *pdev) if (pf->vsi[pf->lan_vsi]) i40e_vsi_release(pf->vsi[pf->lan_vsi]); + i40e_cloud_filter_exit(pf); + /* remove attached clients */ if (pf->flags & I40E_FLAG_IWARP_ENABLED) { ret_code = i40e_lan_del_device(pf); @@ -11937,6 +14075,28 @@ static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev) } /** + * i40e_pci_error_reset_prepare - prepare device driver for pci reset + * @pdev: PCI device information struct + */ +static void i40e_pci_error_reset_prepare(struct pci_dev *pdev) +{ + struct i40e_pf *pf = pci_get_drvdata(pdev); + + i40e_prep_for_reset(pf, false); +} + +/** + * i40e_pci_error_reset_done - pci reset done, device driver reset can begin + * @pdev: PCI device information struct + */ +static void i40e_pci_error_reset_done(struct pci_dev *pdev) +{ + struct i40e_pf *pf = pci_get_drvdata(pdev); + + i40e_reset_and_rebuild(pf, false, false); +} + +/** * i40e_pci_error_resume - restart operations after PCI error recovery * @pdev: PCI device information struct * @@ -12021,6 +14181,7 @@ static void i40e_shutdown(struct pci_dev *pdev) del_timer_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); + i40e_cloud_filter_exit(pf); i40e_fdir_teardown(pf); /* Client close must be called explicitly here because the timer @@ -12046,20 +14207,26 @@ static void i40e_shutdown(struct pci_dev *pdev) } } -#ifdef CONFIG_PM /** - * i40e_suspend - PCI callback for moving to D3 - * @pdev: PCI device information struct + * i40e_suspend - PM callback for moving to D3 + * @dev: generic device information structure **/ -static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) +static int __maybe_unused i40e_suspend(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct i40e_pf *pf = pci_get_drvdata(pdev); struct i40e_hw *hw = &pf->hw; - int retval = 0; - set_bit(__I40E_SUSPENDED, pf->state); + /* If we're already suspended, then there is nothing to do */ + if (test_and_set_bit(__I40E_SUSPENDED, pf->state)) + return 0; + set_bit(__I40E_DOWN, pf->state); + /* Ensure service task will not be running */ + del_timer_sync(&pf->service_timer); + cancel_work_sync(&pf->service_task); + if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE)) i40e_enable_mc_magic_wake(pf); @@ -12068,81 +14235,70 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); - i40e_stop_misc_vector(pf); - if (pf->msix_entries) { - synchronize_irq(pf->msix_entries[0].vector); - free_irq(pf->msix_entries[0].vector, pf); - } - retval = pci_save_state(pdev); - if (retval) - return retval; - - pci_wake_from_d3(pdev, pf->wol_en); - pci_set_power_state(pdev, PCI_D3hot); + /* Clear the interrupt scheme and release our IRQs so that the system + * can safely hibernate even when there are a large number of CPUs. + * Otherwise hibernation might fail when mapping all the vectors back + * to CPU0. + */ + i40e_clear_interrupt_scheme(pf); - return retval; + return 0; } /** - * i40e_resume - PCI callback for waking up from D3 - * @pdev: PCI device information struct + * i40e_resume - PM callback for waking up from D3 + * @dev: generic device information structure **/ -static int i40e_resume(struct pci_dev *pdev) +static int __maybe_unused i40e_resume(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct i40e_pf *pf = pci_get_drvdata(pdev); - u32 err; + int err; - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - /* pci_restore_state() clears dev->state_saves, so - * call pci_save_state() again to restore it. - */ - pci_save_state(pdev); + /* If we're not suspended, then there is nothing to do */ + if (!test_bit(__I40E_SUSPENDED, pf->state)) + return 0; - err = pci_enable_device_mem(pdev); + /* We cleared the interrupt scheme when we suspended, so we need to + * restore it now to resume device functionality. + */ + err = i40e_restore_interrupt_scheme(pf); if (err) { - dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); - return err; + dev_err(&pdev->dev, "Cannot restore interrupt scheme: %d\n", + err); } - pci_set_master(pdev); - /* no wakeup events while running */ - pci_wake_from_d3(pdev, false); - - /* handling the reset will rebuild the device state */ - if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) { - clear_bit(__I40E_DOWN, pf->state); - if (pf->msix_entries) { - err = request_irq(pf->msix_entries[0].vector, - i40e_intr, 0, pf->int_name, pf); - if (err) { - dev_err(&pf->pdev->dev, - "request_irq for %s failed: %d\n", - pf->int_name, err); - } - } - i40e_reset_and_rebuild(pf, false, false); - } + clear_bit(__I40E_DOWN, pf->state); + i40e_reset_and_rebuild(pf, false, false); + + /* Clear suspended state last after everything is recovered */ + clear_bit(__I40E_SUSPENDED, pf->state); + + /* Restart the service task */ + mod_timer(&pf->service_timer, + round_jiffies(jiffies + pf->service_timer_period)); return 0; } -#endif static const struct pci_error_handlers i40e_err_handler = { .error_detected = i40e_pci_error_detected, .slot_reset = i40e_pci_error_slot_reset, + .reset_prepare = i40e_pci_error_reset_prepare, + .reset_done = i40e_pci_error_reset_done, .resume = i40e_pci_error_resume, }; +static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume); + static struct pci_driver i40e_driver = { .name = i40e_driver_name, .id_table = i40e_pci_tbl, .probe = i40e_probe, .remove = i40e_remove, -#ifdef CONFIG_PM - .suspend = i40e_suspend, - .resume = i40e_resume, -#endif + .driver = { + .pm = &i40e_pm_ops, + }, .shutdown = i40e_shutdown, .err_handler = &i40e_err_handler, .sriov_configure = i40e_pci_sriov_configure, diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index d591b3e6bd7c..0ccab0a5d717 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -311,13 +311,10 @@ static i40e_status i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset, static i40e_status __i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, u16 *data) { - i40e_status ret_code = 0; - if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) - ret_code = i40e_read_nvm_word_aq(hw, offset, data); - else - ret_code = i40e_read_nvm_word_srctl(hw, offset, data); - return ret_code; + return i40e_read_nvm_word_aq(hw, offset, data); + + return i40e_read_nvm_word_srctl(hw, offset, data); } /** @@ -331,7 +328,7 @@ static i40e_status __i40e_read_nvm_word(struct i40e_hw *hw, i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, u16 *data) { - i40e_status ret_code = 0; + i40e_status ret_code; ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); if (ret_code) @@ -446,13 +443,10 @@ static i40e_status __i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset, u16 *words, u16 *data) { - i40e_status ret_code = 0; - if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) - ret_code = i40e_read_nvm_buffer_aq(hw, offset, words, data); - else - ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data); - return ret_code; + return i40e_read_nvm_buffer_aq(hw, offset, words, data); + + return i40e_read_nvm_buffer_srctl(hw, offset, words, data); } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index a39b13197891..3bb6659db822 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -190,7 +190,7 @@ i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw, u16 flags, - u16 valid_flags, + u16 valid_flags, u8 mode, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_request_resource(struct i40e_hw *hw, enum i40e_aq_resources_ids resource, @@ -283,6 +283,22 @@ i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); i40e_status i40e_aq_resume_port_tx(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); +i40e_status +i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_cloud_filters_element_bb *filters, + u8 filter_count); +enum i40e_status_code +i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 vsi, + struct i40e_aqc_cloud_filters_element_data *filters, + u8 filter_count); +enum i40e_status_code +i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 vsi, + struct i40e_aqc_cloud_filters_element_data *filters, + u8 filter_count); +i40e_status +i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid, + struct i40e_aqc_cloud_filters_element_bb *filters, + u8 filter_count); i40e_status i40e_read_lldp_cfg(struct i40e_hw *hw, struct i40e_lldp_variables *lldp_cfg); /* i40e_common */ @@ -360,6 +376,15 @@ i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, u32 reg_addr, u32 reg_val, struct i40e_asq_cmd_details *cmd_details); void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); +i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); + i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw, u16 reg, u8 phy_addr, u16 *value); i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw, diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 86ca27f72f02..c234758dad15 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -2794,7 +2794,7 @@ #define I40E_GLV_RUPP_MAX_INDEX 383 #define I40E_GLV_RUPP_RUPP_SHIFT 0 #define I40E_GLV_RUPP_RUPP_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_RUPP_RUPP_SHIFT) -#define I40E_GLV_TEPC(_VSI) (0x00344000 + ((_VSI) * 4)) /* _i=0...383 */ /* Reset: CORER */ +#define I40E_GLV_TEPC(_i) (0x00344000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */ #define I40E_GLV_TEPC_MAX_INDEX 383 #define I40E_GLV_TEPC_TEPC_SHIFT 0 #define I40E_GLV_TEPC_TEPC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_TEPC_TEPC_SHIFT) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 120c68f78951..d6d352a6e6ea 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -960,14 +960,14 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) { enum i40e_latency_range new_latency_range = rc->latency_range; u32 new_itr = rc->itr; - int bytes_per_int; + int bytes_per_usec; unsigned int usecs, estimated_usecs; if (rc->total_packets == 0 || !rc->itr) return false; usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_int = rc->total_bytes / usecs; + bytes_per_usec = rc->total_bytes / usecs; /* The calculations in this algorithm depend on interrupts actually * firing at the ITR rate. This may not happen if the packet rate is @@ -993,18 +993,18 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) */ switch (new_latency_range) { case I40E_LOWEST_LATENCY: - if (bytes_per_int > 10) + if (bytes_per_usec > 10) new_latency_range = I40E_LOW_LATENCY; break; case I40E_LOW_LATENCY: - if (bytes_per_int > 20) + if (bytes_per_usec > 20) new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_int <= 10) + else if (bytes_per_usec <= 10) new_latency_range = I40E_LOWEST_LATENCY; break; case I40E_BULK_LATENCY: default: - if (bytes_per_int <= 20) + if (bytes_per_usec <= 20) new_latency_range = I40E_LOW_LATENCY; break; } @@ -2117,6 +2117,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) if (!skb) { xdp.data = page_address(rx_buffer->page) + rx_buffer->page_offset; + xdp_set_data_meta_invalid(&xdp); xdp.data_hard_start = xdp.data - i40e_rx_offset(rx_ring); xdp.data_end = xdp.data + size; @@ -2211,9 +2212,7 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) u32 val; val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - /* Don't clear PBA because that can cause lost interrupts that - * came in while we were cleaning/polling - */ + I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); @@ -2250,7 +2249,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, /* If we don't have MSIX, then we only need to re-enable icr0 */ if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { - i40e_irq_dynamic_enable_icr0(vsi->back, false); + i40e_irq_dynamic_enable_icr0(vsi->back); return; } @@ -3176,38 +3175,12 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, /* write last descriptor with EOP bit */ td_cmd |= I40E_TX_DESC_CMD_EOP; - /* We can OR these values together as they both are checked against - * 4 below and at this point desc_count will be used as a boolean value - * after this if/else block. + /* We OR these values together to check both against 4 (WB_STRIDE) + * below. This is safe since we don't re-use desc_count afterwards. */ desc_count |= ++tx_ring->packet_stride; - /* Algorithm to optimize tail and RS bit setting: - * if queue is stopped - * mark RS bit - * reset packet counter - * else if xmit_more is supported and is true - * advance packet counter to 4 - * reset desc_count to 0 - * - * if desc_count >= 4 - * mark RS bit - * reset packet counter - * if desc_count > 0 - * update tail - * - * Note: If there are less than 4 descriptors - * pending and interrupts were disabled the service task will - * trigger a force WB. - */ - if (netif_xmit_stopped(txring_txq(tx_ring))) { - goto do_rs; - } else if (skb->xmit_more) { - /* set stride to arm on next packet and reset desc_count */ - tx_ring->packet_stride = WB_STRIDE; - desc_count = 0; - } else if (desc_count >= WB_STRIDE) { -do_rs: + if (desc_count >= WB_STRIDE) { /* write last descriptor with RS bit set */ td_cmd |= I40E_TX_DESC_CMD_RS; tx_ring->packet_stride = 0; @@ -3228,7 +3201,7 @@ do_rs: first->next_to_watch = tx_desc; /* notify HW of packet */ - if (desc_count) { + if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { writel(i, tx_ring->tail); /* we need this if more than one processor can write to our tail diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 2f848bc5e391..fbae1182e2ea 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -38,8 +38,10 @@ #define I40E_ITR_8K 0x003E #define I40E_ITR_4K 0x007A #define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF I40E_ITR_20K -#define I40E_ITR_TX_DEF I40E_ITR_20K +#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ + I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ + I40E_ITR_DYNAMIC) #define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ #define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ #define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ @@ -206,7 +208,7 @@ static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc, } /* How many Rx Buffers do we bundle into one write to the hardware ? */ -#define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */ +#define I40E_RX_BUFFER_WRITE 32 /* Must be power of 2 */ #define I40E_RX_INCREMENT(r, i) \ do { \ (i)++; \ @@ -342,6 +344,7 @@ struct i40e_rx_queue_stats { enum i40e_ring_state_t { __I40E_TX_FDIR_INIT_DONE, __I40E_TX_XPS_INIT_DONE, + __I40E_RING_STATE_NBITS /* must be last */ }; /* some useful defines for virtchannel interface, which @@ -366,7 +369,7 @@ struct i40e_ring { struct i40e_tx_buffer *tx_bi; struct i40e_rx_buffer *rx_bi; }; - unsigned long state; + DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS); u16 queue_index; /* Queue number of ring */ u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; @@ -423,6 +426,8 @@ struct i40e_ring { * i40e_clean_rx_ring_irq() is called * for this ring. */ + + struct i40e_channel *ch; } ____cacheline_internodealigned_in_smp; static inline bool ring_uses_build_skb(struct i40e_ring *ring) diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index fd4bbdd88b57..00d4833e9925 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -46,6 +46,9 @@ /* Max default timeout in ms, */ #define I40E_MAX_NVM_TIMEOUT 18000 +/* Max timeout in ms for the phy to respond */ +#define I40E_MAX_PHY_TIMEOUT 500 + /* Switch from ms to the 1usec global time (this is the GTIME resolution) */ #define I40E_MS_TO_GTIME(time) ((time) * 1000) @@ -268,6 +271,10 @@ struct i40e_phy_info { I40E_PHY_TYPE_OFFSET) #define I40E_CAP_PHY_TYPE_25GBASE_LR BIT_ULL(I40E_PHY_TYPE_25GBASE_LR + \ I40E_PHY_TYPE_OFFSET) +#define I40E_CAP_PHY_TYPE_25GBASE_AOC BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC + \ + I40E_PHY_TYPE_OFFSET) +#define I40E_CAP_PHY_TYPE_25GBASE_ACC BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC + \ + I40E_PHY_TYPE_OFFSET) #define I40E_HW_CAP_MAX_GPIO 30 /* Capabilities of a PF or a VF or the whole device */ struct i40e_hw_capabilities { @@ -276,6 +283,16 @@ struct i40e_hw_capabilities { #define I40E_NVM_IMAGE_TYPE_CLOUD 0x2 #define I40E_NVM_IMAGE_TYPE_UDP_CLOUD 0x3 + /* Cloud filter modes: + * Mode1: Filter on L4 port only + * Mode2: Filter for non-tunneled traffic + * Mode3: Filter for tunnel traffic + */ +#define I40E_CLOUD_FILTER_MODE1 0x6 +#define I40E_CLOUD_FILTER_MODE2 0x7 +#define I40E_CLOUD_FILTER_MODE3 0x8 +#define I40E_SWITCH_MODE_MASK 0xF + u32 management_mode; u32 mng_protocols_over_mctp; #define I40E_MNG_PROTOCOL_PLDM 0x2 @@ -428,6 +445,18 @@ struct i40e_nvm_access { u8 data[1]; }; +/* (Q)SFP module access definitions */ +#define I40E_I2C_EEPROM_DEV_ADDR 0xA0 +#define I40E_I2C_EEPROM_DEV_ADDR2 0xA2 +#define I40E_MODULE_TYPE_ADDR 0x00 +#define I40E_MODULE_REVISION_ADDR 0x01 +#define I40E_MODULE_SFF_8472_COMP 0x5E +#define I40E_MODULE_SFF_8472_SWAP 0x5C +#define I40E_MODULE_SFF_ADDR_MODE 0x04 +#define I40E_MODULE_TYPE_QSFP_PLUS 0x0D +#define I40E_MODULE_TYPE_QSFP28 0x11 +#define I40E_MODULE_QSFP_MAX_LEN 640 + /* PCI bus types */ enum i40e_bus_type { i40e_bus_type_unknown = 0, @@ -598,8 +627,15 @@ struct i40e_hw { struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */ #define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0) +#define I40E_HW_FLAG_802_1AD_CAPABLE BIT_ULL(1) +#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2) u64 flags; + /* Used in set switch config AQ command */ + u16 switch_tag; + u16 first_tag; + u16 second_tag; + /* debug mask */ u32 debug_mask; char err_str[16]; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 4d1e670f490e..f8a794b72462 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -154,15 +154,30 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf) /** * i40e_vc_disable_vf - * @pf: pointer to the PF info * @vf: pointer to the VF info * - * Disable the VF through a SW reset + * Disable the VF through a SW reset. **/ -static inline void i40e_vc_disable_vf(struct i40e_pf *pf, struct i40e_vf *vf) +static inline void i40e_vc_disable_vf(struct i40e_vf *vf) { + int i; + i40e_vc_notify_vf_reset(vf); - i40e_reset_vf(vf, false); + + /* We want to ensure that an actual reset occurs initiated after this + * function was called. However, we do not want to wait forever, so + * we'll give a reasonable time and print a message if we failed to + * ensure a reset. + */ + for (i = 0; i < 20; i++) { + if (i40e_reset_vf(vf, false)) + return; + usleep_range(10000, 20000); + } + + dev_warn(&vf->pf->pdev->dev, + "Failed to initiate reset for VF %d after 200 milliseconds\n", + vf->vf_id); } /** @@ -258,7 +273,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, struct i40e_hw *hw = &pf->hw; u16 vsi_queue_id, pf_queue_id; enum i40e_queue_type qtype; - u16 next_q, vector_id; + u16 next_q, vector_id, size; u32 reg, reg_idx; u16 itr_idx = 0; @@ -288,9 +303,11 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, vsi_queue_id + 1)); } - next_q = find_first_bit(&linklistmap, - (I40E_MAX_VSI_QP * - I40E_VIRTCHNL_SUPPORTED_QTYPES)); + size = I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES; + next_q = find_first_bit(&linklistmap, size); + if (unlikely(next_q == size)) + goto irq_list_done; + vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); @@ -298,7 +315,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, wr32(hw, reg_idx, reg); - while (next_q < (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)) { + while (next_q < size) { switch (qtype) { case I40E_QUEUE_TYPE_RX: reg_idx = I40E_QINT_RQCTL(pf_queue_id); @@ -312,12 +329,8 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, break; } - next_q = find_next_bit(&linklistmap, - (I40E_MAX_VSI_QP * - I40E_VIRTCHNL_SUPPORTED_QTYPES), - next_q + 1); - if (next_q < - (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)) { + next_q = find_next_bit(&linklistmap, size, next_q + 1); + if (next_q < size) { vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, @@ -423,6 +436,9 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf, (sizeof(struct virtchnl_iwarp_qv_info) * (qvlist_info->num_vectors - 1)); vf->qvlist_info = kzalloc(size, GFP_KERNEL); + if (!vf->qvlist_info) + return -ENOMEM; + vf->qvlist_info->num_vectors = qvlist_info->num_vectors; msix_vf = pf->hw.func_caps.num_msix_vectors_vf; @@ -621,7 +637,7 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, rx_ctx.dsize = 1; /* default values */ - rx_ctx.lrxqthresh = 2; + rx_ctx.lrxqthresh = 1; rx_ctx.crcstrip = 1; rx_ctx.prefena = 1; rx_ctx.l2tsel = 1; @@ -815,6 +831,14 @@ static void i40e_free_vf_res(struct i40e_vf *vf) */ clear_bit(I40E_VF_STATE_INIT, &vf->vf_states); + /* It's possible the VF had requeuested more queues than the default so + * do the accounting here when we're about to free them. + */ + if (vf->num_queue_pairs > I40E_DEFAULT_QUEUES_PER_VF) { + pf->queues_left += vf->num_queue_pairs - + I40E_DEFAULT_QUEUES_PER_VF; + } + /* free vsi & disconnect it from the parent uplink */ if (vf->lan_vsi_idx) { i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]); @@ -853,7 +877,8 @@ static void i40e_free_vf_res(struct i40e_vf *vf) } /* reset some of the state variables keeping track of the resources */ vf->num_queue_pairs = 0; - vf->vf_states = 0; + clear_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states); + clear_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states); } /** @@ -868,12 +893,27 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf) int total_queue_pairs = 0; int ret; + if (vf->num_req_queues && + vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF) + pf->num_vf_qps = vf->num_req_queues; + else + pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF; + /* allocate hw vsi context & associated resources */ ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV); if (ret) goto error_alloc; total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; + /* We account for each VF to get a default number of queue pairs. If + * the VF has now requested more, we need to account for that to make + * certain we never request more queues than we actually have left in + * HW. + */ + if (total_queue_pairs > I40E_DEFAULT_QUEUES_PER_VF) + pf->queues_left -= + total_queue_pairs - I40E_DEFAULT_QUEUES_PER_VF; + if (vf->trusted) set_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); else @@ -1008,8 +1048,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); clear_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); /* Do not notify the client during VF init */ - if (test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE, - &vf->vf_states)) + if (!test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE, + &vf->vf_states)) i40e_notify_client_of_vf_reset(pf, abs_vf_id); vf->num_vlan = 0; } @@ -1026,9 +1066,9 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) * @vf: pointer to the VF structure * @flr: VFLR was issued or not * - * reset the VF + * Returns true if the VF is reset, false otherwise. **/ -void i40e_reset_vf(struct i40e_vf *vf, bool flr) +bool i40e_reset_vf(struct i40e_vf *vf, bool flr) { struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; @@ -1036,9 +1076,11 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) u32 reg; int i; - /* If VFs have been disabled, there is no need to reset */ + /* If the VFs have been disabled, this means something else is + * resetting the VF, so we shouldn't continue. + */ if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) - return; + return false; i40e_trigger_vf_reset(vf, flr); @@ -1075,6 +1117,8 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) i40e_flush(hw); clear_bit(__I40E_VF_DISABLE, pf->state); + + return true; } /** @@ -1086,8 +1130,10 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) * VF, then do all the waiting in one chunk, and finally finish restoring each * VF after the wait. This is useful during PF routines which need to reset * all VFs, as otherwise it must perform these resets in a serialized fashion. + * + * Returns true if any VFs were reset, and false otherwise. **/ -void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) +bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) { struct i40e_hw *hw = &pf->hw; struct i40e_vf *vf; @@ -1096,11 +1142,11 @@ void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* If we don't have any VFs, then there is nothing to reset */ if (!pf->num_alloc_vfs) - return; + return false; /* If VFs have been disabled, there is no need to reset */ if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) - return; + return false; /* Begin reset on all VFs at once */ for (v = 0; v < pf->num_alloc_vfs; v++) @@ -1175,6 +1221,8 @@ void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) i40e_flush(hw); clear_bit(__I40E_VF_DISABLE, pf->state); + + return true; } /** @@ -1308,7 +1356,7 @@ err_alloc: i40e_free_vfs(pf); err_iov: /* Re-enable interrupt 0. */ - i40e_irq_dynamic_enable_icr0(pf, false); + i40e_irq_dynamic_enable_icr0(pf); return ret; } @@ -1377,8 +1425,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) if (num_vfs) { if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset_safe(pf, - BIT_ULL(__I40E_PF_RESET_REQUESTED)); + i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); } return i40e_pci_sriov_enable(pdev, num_vfs); } @@ -1386,7 +1433,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!pci_vfs_assigned(pf->pdev)) { i40e_free_vfs(pf); pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset_safe(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED)); + i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); } else { dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n"); return -EINVAL; @@ -1537,6 +1584,8 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_IWARP)) { vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_IWARP; set_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states); + } else { + clear_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states); } if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { @@ -1579,6 +1628,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) VIRTCHNL_VF_OFFLOAD_WB_ON_ITR; } + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; + vfres->num_vsis = num_vsis; vfres->num_queue_pairs = vf->num_queue_pairs; vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; @@ -1987,6 +2039,57 @@ error_param: } /** + * i40e_vc_request_queues_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * @msglen: msg length + * + * VFs get a default number of queues but can use this message to request a + * different number. If the request is successful, PF will reset the VF and + * return 0. If unsuccessful, PF will send message informing VF of number of + * available queues and return result of sending VF a message. + **/ +static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen) +{ + struct virtchnl_vf_res_request *vfres = + (struct virtchnl_vf_res_request *)msg; + int req_pairs = vfres->num_queue_pairs; + int cur_pairs = vf->num_queue_pairs; + struct i40e_pf *pf = vf->pf; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) + return -EINVAL; + + if (req_pairs <= 0) { + dev_err(&pf->pdev->dev, + "VF %d tried to request %d queues. Ignoring.\n", + vf->vf_id, req_pairs); + } else if (req_pairs > I40E_MAX_VF_QUEUES) { + dev_err(&pf->pdev->dev, + "VF %d tried to request more than %d queues.\n", + vf->vf_id, + I40E_MAX_VF_QUEUES); + vfres->num_queue_pairs = I40E_MAX_VF_QUEUES; + } else if (req_pairs - cur_pairs > pf->queues_left) { + dev_warn(&pf->pdev->dev, + "VF %d requested %d more queues, but only %d left.\n", + vf->vf_id, + req_pairs - cur_pairs, + pf->queues_left); + vfres->num_queue_pairs = pf->queues_left + cur_pairs; + } else { + /* successful request */ + vf->num_req_queues = req_pairs; + i40e_vc_notify_vf_reset(vf); + i40e_reset_vf(vf, false); + return 0; + } + + return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, 0, + (u8 *)vfres, sizeof(vfres)); +} + +/** * i40e_vc_get_stats_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -2708,6 +2811,9 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: ret = i40e_vc_disable_vlan_stripping(vf, msg, msglen); break; + case VIRTCHNL_OP_REQUEST_QUEUES: + ret = i40e_vc_request_queues_msg(vf, msg, msglen); + break; case VIRTCHNL_OP_UNKNOWN: default: @@ -2779,6 +2885,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) struct i40e_mac_filter *f; struct i40e_vf *vf; int ret = 0; + struct hlist_node *h; int bkt; /* validate the request */ @@ -2817,7 +2924,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) /* Delete all the filters for this VSI - we're going to kill it * anyway. */ - hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) __i40e_del_filter(vsi, f); spin_unlock_bh(&vsi->mac_filter_hash_lock); @@ -2840,7 +2947,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) } /* Force the VF driver stop so it has to reload with new MAC address */ - i40e_vc_disable_vf(pf, vf); + i40e_vc_disable_vf(vf); dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n"); error_param: @@ -2848,6 +2955,34 @@ error_param: } /** + * i40e_vsi_has_vlans - True if VSI has configured VLANs + * @vsi: pointer to the vsi + * + * Check if a VSI has configured any VLANs. False if we have a port VLAN or if + * we have no configured VLANs. Do not call while holding the + * mac_filter_hash_lock. + */ +static bool i40e_vsi_has_vlans(struct i40e_vsi *vsi) +{ + bool have_vlans; + + /* If we have a port VLAN, then the VSI cannot have any VLANs + * configured, as all MAC/VLAN filters will be assigned to the PVID. + */ + if (vsi->info.pvid) + return false; + + /* Since we don't have a PVID, we know that if the device is in VLAN + * mode it must be because of a VLAN filter configured on this VSI. + */ + spin_lock_bh(&vsi->mac_filter_hash_lock); + have_vlans = i40e_is_vsi_in_vlan(vsi); + spin_unlock_bh(&vsi->mac_filter_hash_lock); + + return have_vlans; +} + +/** * i40e_ndo_set_vf_port_vlan * @netdev: network interface device structure * @vf_id: VF identifier @@ -2899,10 +3034,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, /* duplicate request, so just return success */ goto error_pvid; - /* Locked once because multiple functions below iterate list */ - spin_lock_bh(&vsi->mac_filter_hash_lock); - - if (le16_to_cpu(vsi->info.pvid) == 0 && i40e_is_vsi_in_vlan(vsi)) { + if (i40e_vsi_has_vlans(vsi)) { dev_err(&pf->pdev->dev, "VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n", vf_id); @@ -2910,11 +3042,14 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, * the right thing by reconfiguring his network correctly * and then reloading the VF driver. */ - i40e_vc_disable_vf(pf, vf); + i40e_vc_disable_vf(vf); /* During reset the VF got a new VSI, so refresh the pointer. */ vsi = pf->vsi[vf->lan_vsi_idx]; } + /* Locked once because multiple functions below iterate list */ + spin_lock_bh(&vsi->mac_filter_hash_lock); + /* Check for condition where there was already a port VLAN ID * filter set and now it is being deleted by setting it to zero. * Additionally check for the condition where there was a port @@ -2987,8 +3122,6 @@ error_pvid: return ret; } -#define I40E_BW_CREDIT_DIVISOR 50 /* 50Mbps per BW credit */ -#define I40E_MAX_BW_INACTIVE_ACCUM 4 /* device can accumulate 4 credits max */ /** * i40e_ndo_set_vf_bw * @netdev: network interface device structure @@ -3004,7 +3137,6 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, struct i40e_pf *pf = np->vsi->back; struct i40e_vsi *vsi; struct i40e_vf *vf; - int speed = 0; int ret = 0; /* validate the request */ @@ -3029,48 +3161,10 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, goto error; } - switch (pf->hw.phy.link_info.link_speed) { - case I40E_LINK_SPEED_40GB: - speed = 40000; - break; - case I40E_LINK_SPEED_25GB: - speed = 25000; - break; - case I40E_LINK_SPEED_20GB: - speed = 20000; - break; - case I40E_LINK_SPEED_10GB: - speed = 10000; - break; - case I40E_LINK_SPEED_1GB: - speed = 1000; - break; - default: - break; - } - - if (max_tx_rate > speed) { - dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for VF %d.\n", - max_tx_rate, vf->vf_id); - ret = -EINVAL; + ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); + if (ret) goto error; - } - - if ((max_tx_rate < 50) && (max_tx_rate > 0)) { - dev_warn(&pf->pdev->dev, "Setting max Tx rate to minimum usable value of 50Mbps.\n"); - max_tx_rate = 50; - } - /* Tx rate credits are in values of 50Mbps, 0 is disabled*/ - ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, - max_tx_rate / I40E_BW_CREDIT_DIVISOR, - I40E_MAX_BW_INACTIVE_ACCUM, NULL); - if (ret) { - dev_err(&pf->pdev->dev, "Unable to set max tx rate, error code %d.\n", - ret); - ret = -EIO; - goto error; - } vf->tx_rate = max_tx_rate; error: return ret; @@ -3279,14 +3373,11 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) vf = &pf->vf[vf_id]; - if (!vf) - return -EINVAL; if (setting == vf->trusted) goto out; vf->trusted = setting; - i40e_vc_notify_vf_reset(vf); - i40e_reset_vf(vf, false); + i40e_vc_disable_vf(vf); dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", vf_id, setting ? "" : "un"); out: diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 1f4b0c504368..5efc4f92bb37 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -56,7 +56,6 @@ enum i40e_vf_states { I40E_VF_STATE_INIT = 0, I40E_VF_STATE_ACTIVE, I40E_VF_STATE_IWARPENA, - I40E_VF_STATE_FCOEENA, I40E_VF_STATE_DISABLED, I40E_VF_STATE_MC_PROMISC, I40E_VF_STATE_UC_PROMISC, @@ -97,6 +96,7 @@ struct i40e_vf { u16 lan_vsi_id; /* ID as used by firmware */ u8 num_queue_pairs; /* num of qps assigned to VF vsis */ + u8 num_req_queues; /* num of requested qps */ u64 num_mdd_events; /* num of mdd events detected */ /* num of continuous malformed or invalid msgs detected */ u64 num_invalid_msgs; @@ -121,8 +121,8 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs); int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen); int i40e_vc_process_vflr_event(struct i40e_pf *pf); -void i40e_reset_vf(struct i40e_vf *vf, bool flr); -void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr); +bool i40e_reset_vf(struct i40e_vf *vf, bool flr); +bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr); void i40e_vc_notify_vf_reset(struct i40e_vf *vf); /* VF configuration related iplink handlers */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 83e63e55c4b4..06b04572c518 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -34,7 +34,15 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0005 +#define I40E_FW_API_VERSION_MINOR_X722 0x0005 +#define I40E_FW_API_VERSION_MINOR_X710 0x0007 + +#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \ + I40E_FW_API_VERSION_MINOR_X710 : \ + I40E_FW_API_VERSION_MINOR_X722) + +/* API version 1.7 implements additional link and PHY-specific APIs */ +#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007 struct i40e_aq_desc { __le16 flags; @@ -236,6 +244,8 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_set_phy_debug = 0x0622, i40e_aqc_opc_upload_ext_phy_fm = 0x0625, i40e_aqc_opc_run_phy_activity = 0x0626, + i40e_aqc_opc_set_phy_register = 0x0628, + i40e_aqc_opc_get_phy_register = 0x0629, /* NVM commands */ i40e_aqc_opc_nvm_read = 0x0701, @@ -761,7 +771,22 @@ struct i40e_aqc_set_switch_config { #define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 __le16 valid_flags; - u8 reserved[12]; + /* The ethertype in switch_tag is dropped on ingress and used + * internally by the switch. Set this to zero for the default + * of 0x88a8 (802.1ad). Should be zero for firmware API + * versions lower than 1.7. + */ + __le16 switch_tag; + /* The ethertypes in first_tag and second_tag are used to + * match the outer and inner VLAN tags (respectively) when HW + * double VLAN tagging is enabled via the set port parameters + * AQ command. Otherwise these are both ignored. Set them to + * zero for their defaults of 0x8100 (802.1Q). Should be zero + * for firmware API versions lower than 1.7. + */ + __le16 first_tag; + __le16 second_tag; + u8 reserved[6]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); @@ -1314,14 +1339,16 @@ struct i40e_aqc_add_remove_cloud_filters { #define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT 0 #define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_MASK (0x3FF << \ I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT) - u8 reserved2[4]; + u8 big_buffer_flag; +#define I40E_AQC_ADD_CLOUD_CMD_BB 1 + u8 reserved2[3]; __le32 addr_high; __le32 addr_low; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_cloud_filters); -struct i40e_aqc_add_remove_cloud_filters_element_data { +struct i40e_aqc_cloud_filters_element_data { u8 outer_mac[6]; u8 inner_mac[6]; __le16 inner_vlan; @@ -1333,6 +1360,9 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { struct { u8 data[16]; } v6; + struct { + __le16 data[8]; + } raw_v6; } ipaddr; __le16 flags; #define I40E_AQC_ADD_CLOUD_FILTER_SHIFT 0 @@ -1351,6 +1381,10 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { #define I40E_AQC_ADD_CLOUD_FILTER_IMAC 0x000A #define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC 0x000B #define I40E_AQC_ADD_CLOUD_FILTER_IIP 0x000C +/* 0x0010 to 0x0017 is for custom filters */ +#define I40E_AQC_ADD_CLOUD_FILTER_IP_PORT 0x0010 /* Dest IP + L4 Port */ +#define I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT 0x0011 /* Dest MAC + L4 Port */ +#define I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT 0x0012 /* Dest MAC + VLAN + L4 Port */ #define I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE 0x0080 #define I40E_AQC_ADD_CLOUD_VNK_SHIFT 6 @@ -1385,6 +1419,49 @@ struct i40e_aqc_add_remove_cloud_filters_element_data { u8 response_reserved[7]; }; +I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_cloud_filters_element_data); + +/* i40e_aqc_cloud_filters_element_bb is used when + * I40E_AQC_ADD_CLOUD_CMD_BB flag is set. + */ +struct i40e_aqc_cloud_filters_element_bb { + struct i40e_aqc_cloud_filters_element_data element; + u16 general_fields[32]; +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD0 0 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD1 1 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD2 2 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD0 3 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD1 4 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD2 5 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD0 6 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD1 7 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD2 8 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD0 9 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD1 10 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD2 11 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD0 12 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD1 13 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD2 14 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0 15 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD1 16 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD2 17 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD3 18 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD4 19 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD5 20 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD6 21 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD7 22 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD0 23 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD1 24 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD2 25 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD3 26 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD4 27 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD5 28 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD6 29 +#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD7 30 +}; + +I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_cloud_filters_element_bb); + struct i40e_aqc_remove_cloud_filters_completion { __le16 perfect_ovlan_used; __le16 perfect_ovlan_free; @@ -1396,6 +1473,60 @@ struct i40e_aqc_remove_cloud_filters_completion { I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_cloud_filters_completion); +/* Replace filter Command 0x025F + * uses the i40e_aqc_replace_cloud_filters, + * and the generic indirect completion structure + */ +struct i40e_filter_data { + u8 filter_type; + u8 input[3]; +}; + +I40E_CHECK_STRUCT_LEN(4, i40e_filter_data); + +struct i40e_aqc_replace_cloud_filters_cmd { + u8 valid_flags; +#define I40E_AQC_REPLACE_L1_FILTER 0x0 +#define I40E_AQC_REPLACE_CLOUD_FILTER 0x1 +#define I40E_AQC_GET_CLOUD_FILTERS 0x2 +#define I40E_AQC_MIRROR_CLOUD_FILTER 0x4 +#define I40E_AQC_HIGH_PRIORITY_CLOUD_FILTER 0x8 + u8 old_filter_type; + u8 new_filter_type; + u8 tr_bit; + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_replace_cloud_filters_cmd); + +struct i40e_aqc_replace_cloud_filters_cmd_buf { + u8 data[32]; +/* Filter type INPUT codes*/ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_ENTRIES_MAX 3 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_VALIDATED BIT(7) + +/* Field Vector offsets */ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_MAC_DA 0 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_ETH 6 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG 7 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_VLAN 8 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_OVLAN 9 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_STAG_IVLAN 10 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_TUNNLE_KEY 11 +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IMAC 12 +/* big FLU */ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_IP_DA 14 +/* big FLU */ +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_OIP_DA 15 + +#define I40E_AQC_REPLACE_CLOUD_CMD_INPUT_FV_INNER_VLAN 37 + struct i40e_filter_data filters[8]; +}; + +I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_replace_cloud_filters_cmd_buf); + /* Add Mirror Rule (indirect or direct 0x0260) * Delete Mirror Rule (indirect or direct 0x0261) * note: some rule types (4,5) do not use an external buffer. @@ -1722,6 +1853,8 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_10GBASE_CR1_CU = 0xB, I40E_PHY_TYPE_10GBASE_AOC = 0xC, I40E_PHY_TYPE_40GBASE_AOC = 0xD, + I40E_PHY_TYPE_UNRECOGNIZED = 0xE, + I40E_PHY_TYPE_UNSUPPORTED = 0xF, I40E_PHY_TYPE_100BASE_TX = 0x11, I40E_PHY_TYPE_1000BASE_T = 0x12, I40E_PHY_TYPE_10GBASE_T = 0x13, @@ -1740,7 +1873,12 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_25GBASE_CR = 0x20, I40E_PHY_TYPE_25GBASE_SR = 0x21, I40E_PHY_TYPE_25GBASE_LR = 0x22, - I40E_PHY_TYPE_MAX + I40E_PHY_TYPE_25GBASE_AOC = 0x23, + I40E_PHY_TYPE_25GBASE_ACC = 0x24, + I40E_PHY_TYPE_MAX, + I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP = 0xFD, + I40E_PHY_TYPE_EMPTY = 0xFE, + I40E_PHY_TYPE_DEFAULT = 0xFF, }; #define I40E_LINK_SPEED_100MB_SHIFT 0x1 @@ -1797,6 +1935,8 @@ struct i40e_aq_get_phy_abilities_resp { #define I40E_AQ_PHY_TYPE_EXT_25G_CR 0X02 #define I40E_AQ_PHY_TYPE_EXT_25G_SR 0x04 #define I40E_AQ_PHY_TYPE_EXT_25G_LR 0x08 +#define I40E_AQ_PHY_TYPE_EXT_25G_AOC 0x10 +#define I40E_AQ_PHY_TYPE_EXT_25G_ACC 0x20 u8 fec_cfg_curr_mod_ext_info; #define I40E_AQ_ENABLE_FEC_KR 0x01 #define I40E_AQ_ENABLE_FEC_RS 0x02 @@ -1930,19 +2070,31 @@ struct i40e_aqc_get_link_status { #define I40E_AQ_25G_SERDES_UCODE_ERR 0X04 #define I40E_AQ_25G_NIMB_UCODE_ERR 0X05 u8 loopback; /* use defines from i40e_aqc_set_lb_mode */ +/* Since firmware API 1.7 loopback field keeps power class info as well */ +#define I40E_AQ_LOOPBACK_MASK 0x07 +#define I40E_AQ_PWR_CLASS_SHIFT_LB 6 +#define I40E_AQ_PWR_CLASS_MASK_LB (0x03 << I40E_AQ_PWR_CLASS_SHIFT_LB) __le16 max_frame_size; u8 config; #define I40E_AQ_CONFIG_FEC_KR_ENA 0x01 #define I40E_AQ_CONFIG_FEC_RS_ENA 0x02 #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 power_desc; + union { + struct { + u8 power_desc; #define I40E_AQ_LINK_POWER_CLASS_1 0x00 #define I40E_AQ_LINK_POWER_CLASS_2 0x01 #define I40E_AQ_LINK_POWER_CLASS_3 0x02 #define I40E_AQ_LINK_POWER_CLASS_4 0x03 #define I40E_AQ_PWR_CLASS_MASK 0x03 - u8 reserved[4]; + u8 reserved[4]; + }; + struct { + u8 link_type[4]; + u8 link_type_ext; + }; + }; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status); @@ -2022,6 +2174,22 @@ struct i40e_aqc_run_phy_activity { I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity); +/* Set PHY Register command (0x0628) */ +/* Get PHY Register command (0x0629) */ +struct i40e_aqc_phy_register_access { + u8 phy_interface; +#define I40E_AQ_PHY_REG_ACCESS_INTERNAL 0 +#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL 1 +#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE 2 + u8 dev_address; + u8 reserved1[2]; + __le32 reg_address; + __le32 reg_value; + u8 reserved2[4]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access); + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 8d3a2bfe186a..7d70bf69b249 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -1042,6 +1042,75 @@ do_retry: } /** + * i40evf_aq_set_phy_register + * @hw: pointer to the hw struct + * @phy_select: select which phy should be accessed + * @dev_addr: PHY device address + * @reg_addr: PHY register address + * @reg_val: new register value + * @cmd_details: pointer to command details structure or NULL + * + * Reset the external PHY. + **/ +i40e_status i40evf_aq_set_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_phy_register_access *cmd = + (struct i40e_aqc_phy_register_access *)&desc.params.raw; + i40e_status status; + + i40evf_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_phy_register); + + cmd->phy_interface = phy_select; + cmd->dev_address = dev_addr; + cmd->reg_address = cpu_to_le32(reg_addr); + cmd->reg_value = cpu_to_le32(reg_val); + + status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40evf_aq_get_phy_register + * @hw: pointer to the hw struct + * @phy_select: select which phy should be accessed + * @dev_addr: PHY device address + * @reg_addr: PHY register address + * @reg_val: read register value + * @cmd_details: pointer to command details structure or NULL + * + * Reset the external PHY. + **/ +i40e_status i40evf_aq_get_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_phy_register_access *cmd = + (struct i40e_aqc_phy_register_access *)&desc.params.raw; + i40e_status status; + + i40evf_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_get_phy_register); + + cmd->phy_interface = phy_select; + cmd->dev_address = dev_addr; + cmd->reg_address = cpu_to_le32(reg_addr); + + status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details); + if (!status) + *reg_val = le32_to_cpu(cmd->reg_value); + + return status; +} + +/** * i40e_aq_send_msg_to_pf * @hw: pointer to the hardware structure * @v_opcode: opcodes for VF-PF communication diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h index c9836bba487d..b624b5994075 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h @@ -111,6 +111,15 @@ i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw, u32 reg_addr, u32 reg_val, struct i40e_asq_cmd_details *cmd_details); void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); +i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); + i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, u16 reg, u8 phy_addr, u16 *value); i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index c32c62462c84..fe817e2b6fef 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -358,14 +358,14 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) { enum i40e_latency_range new_latency_range = rc->latency_range; u32 new_itr = rc->itr; - int bytes_per_int; + int bytes_per_usec; unsigned int usecs, estimated_usecs; if (rc->total_packets == 0 || !rc->itr) return false; usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_int = rc->total_bytes / usecs; + bytes_per_usec = rc->total_bytes / usecs; /* The calculations in this algorithm depend on interrupts actually * firing at the ITR rate. This may not happen if the packet rate is @@ -391,18 +391,18 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) */ switch (new_latency_range) { case I40E_LOWEST_LATENCY: - if (bytes_per_int > 10) + if (bytes_per_usec > 10) new_latency_range = I40E_LOW_LATENCY; break; case I40E_LOW_LATENCY: - if (bytes_per_int > 20) + if (bytes_per_usec > 20) new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_int <= 10) + else if (bytes_per_usec <= 10) new_latency_range = I40E_LOWEST_LATENCY; break; case I40E_BULK_LATENCY: default: - if (bytes_per_int <= 20) + if (bytes_per_usec <= 20) new_latency_range = I40E_LOW_LATENCY; break; } @@ -1409,9 +1409,7 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) u32 val; val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - /* Don't clear PBA because that can cause lost interrupts that - * came in while we were cleaning/polling - */ + I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 0d9f98bc07bd..8d26c85d12e1 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -38,8 +38,10 @@ #define I40E_ITR_8K 0x003E #define I40E_ITR_4K 0x007A #define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF I40E_ITR_20K -#define I40E_ITR_TX_DEF I40E_ITR_20K +#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ + I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ + I40E_ITR_DYNAMIC) #define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ #define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ #define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ @@ -189,7 +191,7 @@ static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc, } /* How many Rx Buffers do we bundle into one write to the hardware ? */ -#define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */ +#define I40E_RX_BUFFER_WRITE 32 /* Must be power of 2 */ #define I40E_RX_INCREMENT(r, i) \ do { \ (i)++; \ @@ -325,6 +327,7 @@ struct i40e_rx_queue_stats { enum i40e_ring_state_t { __I40E_TX_FDIR_INIT_DONE, __I40E_TX_XPS_INIT_DONE, + __I40E_RING_STATE_NBITS /* must be last */ }; /* some useful defines for virtchannel interface, which @@ -348,7 +351,7 @@ struct i40e_ring { struct i40e_tx_buffer *tx_bi; struct i40e_rx_buffer *rx_bi; }; - unsigned long state; + DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS); u16 queue_index; /* Queue number of ring */ u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 2ea919d9cdcf..213b773dfad6 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -46,6 +46,9 @@ /* Max default timeout in ms, */ #define I40E_MAX_NVM_TIMEOUT 18000 +/* Max timeout in ms for the phy to respond */ +#define I40E_MAX_PHY_TIMEOUT 500 + /* Switch from ms to the 1usec global time (this is the GTIME resolution) */ #define I40E_MS_TO_GTIME(time) ((time) * 1000) @@ -401,6 +404,18 @@ struct i40e_nvm_access { u8 data[1]; }; +/* (Q)SFP module access definitions */ +#define I40E_I2C_EEPROM_DEV_ADDR 0xA0 +#define I40E_I2C_EEPROM_DEV_ADDR2 0xA2 +#define I40E_MODULE_TYPE_ADDR 0x00 +#define I40E_MODULE_REVISION_ADDR 0x01 +#define I40E_MODULE_SFF_8472_COMP 0x5E +#define I40E_MODULE_SFF_8472_SWAP 0x5C +#define I40E_MODULE_SFF_ADDR_MODE 0x04 +#define I40E_MODULE_TYPE_QSFP_PLUS 0x0D +#define I40E_MODULE_TYPE_QSFP28 0x11 +#define I40E_MODULE_QSFP_MAX_LEN 640 + /* PCI bus types */ enum i40e_bus_type { i40e_bus_type_unknown = 0, @@ -556,11 +571,19 @@ struct i40e_hw { /* LLDP/DCBX Status */ u16 dcbx_status; +#define I40E_HW_FLAG_802_1AD_CAPABLE BIT_ULL(1) +#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2) + /* DCBX info */ struct i40e_dcbx_config local_dcbx_config; /* Oper/Local Cfg */ struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */ struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */ + /* Used in set switch config AQ command */ + u16 switch_tag; + u16 first_tag; + u16 second_tag; + /* debug mask */ u32 debug_mask; char err_str[16]; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 82f69031e5cd..de0af521d602 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -102,6 +102,7 @@ struct i40e_vsi { #define I40E_TX_CTXTDESC(R, i) \ (&(((struct i40e_tx_context_desc *)((R)->desc))[i])) #define MAX_QUEUES 16 +#define I40EVF_MAX_REQ_QUEUES 4 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4) #define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4) @@ -200,6 +201,7 @@ struct i40evf_adapter { struct list_head vlan_filter_list; char misc_vector_name[IFNAMSIZ + 9]; int num_active_queues; + int num_req_queues; /* TX */ struct i40e_ring *tx_rings; @@ -220,21 +222,22 @@ struct i40evf_adapter { u32 flags; #define I40EVF_FLAG_RX_CSUM_ENABLED BIT(0) -#define I40EVF_FLAG_IMIR_ENABLED BIT(5) -#define I40EVF_FLAG_MQ_CAPABLE BIT(6) -#define I40EVF_FLAG_PF_COMMS_FAILED BIT(8) -#define I40EVF_FLAG_RESET_PENDING BIT(9) -#define I40EVF_FLAG_RESET_NEEDED BIT(10) -#define I40EVF_FLAG_WB_ON_ITR_CAPABLE BIT(11) -#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE BIT(12) -#define I40EVF_FLAG_ADDR_SET_BY_PF BIT(13) -#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED BIT(14) -#define I40EVF_FLAG_CLIENT_NEEDS_OPEN BIT(15) -#define I40EVF_FLAG_CLIENT_NEEDS_CLOSE BIT(16) -#define I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS BIT(17) -#define I40EVF_FLAG_PROMISC_ON BIT(18) -#define I40EVF_FLAG_ALLMULTI_ON BIT(19) -#define I40EVF_FLAG_LEGACY_RX BIT(20) +#define I40EVF_FLAG_IMIR_ENABLED BIT(1) +#define I40EVF_FLAG_MQ_CAPABLE BIT(2) +#define I40EVF_FLAG_PF_COMMS_FAILED BIT(3) +#define I40EVF_FLAG_RESET_PENDING BIT(4) +#define I40EVF_FLAG_RESET_NEEDED BIT(5) +#define I40EVF_FLAG_WB_ON_ITR_CAPABLE BIT(6) +#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE BIT(7) +#define I40EVF_FLAG_ADDR_SET_BY_PF BIT(8) +#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED BIT(9) +#define I40EVF_FLAG_CLIENT_NEEDS_OPEN BIT(10) +#define I40EVF_FLAG_CLIENT_NEEDS_CLOSE BIT(11) +#define I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS BIT(12) +#define I40EVF_FLAG_PROMISC_ON BIT(13) +#define I40EVF_FLAG_ALLMULTI_ON BIT(14) +#define I40EVF_FLAG_LEGACY_RX BIT(15) +#define I40EVF_FLAG_REINIT_ITR_NEEDED BIT(16) /* duplicates for common code */ #define I40E_FLAG_DCB_ENABLED 0 #define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED @@ -349,6 +352,7 @@ void i40evf_deconfigure_queues(struct i40evf_adapter *adapter); void i40evf_enable_queues(struct i40evf_adapter *adapter); void i40evf_disable_queues(struct i40evf_adapter *adapter); void i40evf_map_queues(struct i40evf_adapter *adapter); +int i40evf_request_queues(struct i40evf_adapter *adapter, int num); void i40evf_add_ether_addrs(struct i40evf_adapter *adapter); void i40evf_del_ether_addrs(struct i40evf_adapter *adapter); void i40evf_add_vlans(struct i40evf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index 65874d6b3ab9..da006fa3fec1 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -669,7 +669,7 @@ static void i40evf_get_channels(struct net_device *netdev, struct i40evf_adapter *adapter = netdev_priv(netdev); /* Report maximum channels */ - ch->max_combined = adapter->num_active_queues; + ch->max_combined = I40EVF_MAX_REQ_QUEUES; ch->max_other = NONQ_VECS; ch->other_count = NONQ_VECS; @@ -678,6 +678,41 @@ static void i40evf_get_channels(struct net_device *netdev, } /** + * i40evf_set_channels: set the new channel count + * @netdev: network interface device structure + * @ch: channel information structure + * + * Negotiate a new number of channels with the PF then do a reset. During + * reset we'll realloc queues and fix the RSS table. Returns 0 on success, + * negative on failure. + **/ +static int i40evf_set_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + int num_req = ch->combined_count; + + if (num_req != adapter->num_active_queues && + !(adapter->vf_res->vf_cap_flags & + VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)) { + dev_info(&adapter->pdev->dev, "PF is not capable of queue negotiation.\n"); + return -EINVAL; + } + + /* All of these should have already been checked by ethtool before this + * even gets to us, but just to be sure. + */ + if (num_req <= 0 || num_req > I40EVF_MAX_REQ_QUEUES) + return -EINVAL; + + if (ch->rx_count || ch->tx_count || ch->other_count != NONQ_VECS) + return -EINVAL; + + adapter->num_req_queues = num_req; + return i40evf_request_queues(adapter, num_req); +} + +/** * i40evf_get_rxfh_key_size - get the RSS hash key size * @netdev: network interface device structure * @@ -785,6 +820,7 @@ static const struct ethtool_ops i40evf_ethtool_ops = { .get_rxfh = i40evf_get_rxfh, .set_rxfh = i40evf_set_rxfh, .get_channels = i40evf_get_channels, + .set_channels = i40evf_set_channels, .get_rxfh_key_size = i40evf_get_rxfh_key_size, .get_link_ksettings = i40evf_get_link_ksettings, }; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 1825d956bb00..ca2ebdbd24d7 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -46,7 +46,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 3 #define DRV_VERSION_MINOR 0 -#define DRV_VERSION_BUILD 0 +#define DRV_VERSION_BUILD 1 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ @@ -430,57 +430,26 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) * group the rings as "efficiently" as possible. You would add new * mapping configurations in here. **/ -static int i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter) +static void i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter) { + int rings_remaining = adapter->num_active_queues; + int ridx = 0, vidx = 0; int q_vectors; - int v_start = 0; - int rxr_idx = 0, txr_idx = 0; - int rxr_remaining = adapter->num_active_queues; - int txr_remaining = adapter->num_active_queues; - int i, j; - int rqpv, tqpv; - int err = 0; q_vectors = adapter->num_msix_vectors - NONQ_VECS; - /* The ideal configuration... - * We have enough vectors to map one per queue. - */ - if (q_vectors >= (rxr_remaining * 2)) { - for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++) - i40evf_map_vector_to_rxq(adapter, v_start, rxr_idx); - - for (; txr_idx < txr_remaining; v_start++, txr_idx++) - i40evf_map_vector_to_txq(adapter, v_start, txr_idx); - goto out; - } + for (; ridx < rings_remaining; ridx++) { + i40evf_map_vector_to_rxq(adapter, vidx, ridx); + i40evf_map_vector_to_txq(adapter, vidx, ridx); - /* If we don't have enough vectors for a 1-to-1 - * mapping, we'll have to group them so there are - * multiple queues per vector. - * Re-adjusting *qpv takes care of the remainder. - */ - for (i = v_start; i < q_vectors; i++) { - rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i); - for (j = 0; j < rqpv; j++) { - i40evf_map_vector_to_rxq(adapter, i, rxr_idx); - rxr_idx++; - rxr_remaining--; - } - } - for (i = v_start; i < q_vectors; i++) { - tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i); - for (j = 0; j < tqpv; j++) { - i40evf_map_vector_to_txq(adapter, i, txr_idx); - txr_idx++; - txr_remaining--; - } + /* In the case where we have more queues than vectors, continue + * round-robin on vectors until all queues are mapped. + */ + if (++vidx >= q_vectors) + vidx = 0; } -out: adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS; - - return err; } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -546,6 +515,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) unsigned int vector, q_vectors; unsigned int rx_int_idx = 0, tx_int_idx = 0; int irq_num, err; + int cpu; i40evf_irq_disable(adapter); /* Decrement for Other and TCP Timer vectors */ @@ -584,10 +554,12 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename) q_vector->affinity_notify.release = i40evf_irq_affinity_release; irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); - /* get_cpu_mask returns a static constant mask with - * a permanent lifetime so it's ok to use here. + /* Spread the IRQ affinity hints across online CPUs. Note that + * get_cpu_mask returns a mask with a permanent lifetime so + * it's safe to use as a hint for irq_set_affinity_hint. */ - irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx)); + cpu = cpumask_local_spread(q_vector->v_idx, -1); + irq_set_affinity_hint(irq_num, get_cpu_mask(cpu)); } return 0; @@ -908,6 +880,8 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter, list_add_tail(&f->list, &adapter->mac_filter_list); f->add = true; adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER; + } else { + f->remove = false; } clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); @@ -1217,9 +1191,18 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) { int i, num_active_queues; - num_active_queues = min_t(int, - adapter->vsi_res->num_queue_pairs, - (int)(num_online_cpus())); + /* If we're in reset reallocating queues we don't actually know yet for + * certain the PF gave us the number of queues we asked for but we'll + * assume it did. Once basic reset is finished we'll confirm once we + * start negotiating config with PF. + */ + if (adapter->num_req_queues) + num_active_queues = adapter->num_req_queues; + else + num_active_queues = min_t(int, + adapter->vsi_res->num_queue_pairs, + (int)(num_online_cpus())); + adapter->tx_rings = kcalloc(num_active_queues, sizeof(struct i40e_ring), GFP_KERNEL); @@ -1240,7 +1223,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->netdev = adapter->netdev; tx_ring->dev = &adapter->pdev->dev; tx_ring->count = adapter->tx_desc_count; - tx_ring->tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF); + tx_ring->tx_itr_setting = I40E_ITR_TX_DEF; if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; @@ -1249,7 +1232,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; - rx_ring->rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); + rx_ring->rx_itr_setting = I40E_ITR_RX_DEF; } adapter->num_active_queues = num_active_queues; @@ -1568,12 +1551,53 @@ static void i40evf_free_rss(struct i40evf_adapter *adapter) } /** + * i40evf_reinit_interrupt_scheme - Reallocate queues and vectors + * @adapter: board private structure + * + * Returns 0 on success, negative on failure + **/ +static int i40evf_reinit_interrupt_scheme(struct i40evf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int err; + + if (netif_running(netdev)) + i40evf_free_traffic_irqs(adapter); + i40evf_free_misc_irq(adapter); + i40evf_reset_interrupt_capability(adapter); + i40evf_free_q_vectors(adapter); + i40evf_free_queues(adapter); + + err = i40evf_init_interrupt_scheme(adapter); + if (err) + goto err; + + netif_tx_stop_all_queues(netdev); + + err = i40evf_request_misc_irq(adapter); + if (err) + goto err; + + set_bit(__I40E_VSI_DOWN, adapter->vsi.state); + + i40evf_map_rings_to_vectors(adapter); + + if (RSS_AQ(adapter)) + adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS; + else + err = i40evf_init_rss(adapter); +err: + return err; +} + +/** * i40evf_watchdog_timer - Periodic call-back timer * @data: pointer to adapter disguised as unsigned long **/ -static void i40evf_watchdog_timer(unsigned long data) +static void i40evf_watchdog_timer(struct timer_list *t) { - struct i40evf_adapter *adapter = (struct i40evf_adapter *)data; + struct i40evf_adapter *adapter = from_timer(adapter, t, + watchdog_timer); schedule_work(&adapter->watchdog_task); /* timer will be rescheduled in watchdog task */ @@ -1913,8 +1937,15 @@ continue_reset: if (err) dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n", err); + adapter->aq_required = 0; - adapter->aq_required = I40EVF_FLAG_AQ_GET_CONFIG; + if (adapter->flags & I40EVF_FLAG_REINIT_ITR_NEEDED) { + err = i40evf_reinit_interrupt_scheme(adapter); + if (err) + goto reset_err; + } + + adapter->aq_required |= I40EVF_FLAG_AQ_GET_CONFIG; adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS; /* re-add all MAC filters */ @@ -1944,6 +1975,15 @@ continue_reset: if (err) goto reset_err; + if (adapter->flags & I40EVF_FLAG_REINIT_ITR_NEEDED) { + err = i40evf_request_traffic_irqs(adapter, + netdev->name); + if (err) + goto reset_err; + + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; + } + i40evf_configure(adapter); i40evf_up_complete(adapter); @@ -2386,10 +2426,6 @@ out_err: return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } -#define I40EVF_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_TX |\ - NETIF_F_HW_VLAN_CTAG_RX |\ - NETIF_F_HW_VLAN_CTAG_FILTER) - /** * i40evf_fix_features - fix up the netdev feature bits * @netdev: our net device @@ -2402,9 +2438,11 @@ static netdev_features_t i40evf_fix_features(struct net_device *netdev, { struct i40evf_adapter *adapter = netdev_priv(netdev); - features &= ~I40EVF_VLAN_FEATURES; - if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) - features |= I40EVF_VLAN_FEATURES; + if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)) + features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER); + return features; } @@ -2459,9 +2497,9 @@ static int i40evf_check_reset_complete(struct i40e_hw *hw) int i40evf_process_config(struct i40evf_adapter *adapter) { struct virtchnl_vf_resource *vfres = adapter->vf_res; + int i, num_req_queues = adapter->num_req_queues; struct net_device *netdev = adapter->netdev; struct i40e_vsi *vsi = &adapter->vsi; - int i; netdev_features_t hw_enc_features; netdev_features_t hw_features; @@ -2475,6 +2513,23 @@ int i40evf_process_config(struct i40evf_adapter *adapter) return -ENODEV; } + if (num_req_queues && + num_req_queues != adapter->vsi_res->num_queue_pairs) { + /* Problem. The PF gave us fewer queues than what we had + * negotiated in our request. Need a reset to see if we can't + * get back to a working state. + */ + dev_err(&adapter->pdev->dev, + "Requested %d queues, but PF only gave us %d.\n", + num_req_queues, + adapter->vsi_res->num_queue_pairs); + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->num_req_queues = adapter->vsi_res->num_queue_pairs; + i40evf_schedule_reset(adapter); + return -ENODEV; + } + adapter->num_req_queues = 0; + hw_enc_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | @@ -2518,9 +2573,17 @@ int i40evf_process_config(struct i40evf_adapter *adapter) */ hw_features = hw_enc_features; + /* Enable VLAN features if supported */ + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) + hw_features |= (NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX); + netdev->hw_features |= hw_features; - netdev->features |= hw_features | I40EVF_VLAN_FEATURES; + netdev->features |= hw_features; + + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; adapter->vsi.id = adapter->vsi_res->vsi_id; @@ -2686,9 +2749,7 @@ static void i40evf_init_task(struct work_struct *work) ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } - init_timer(&adapter->watchdog_timer); - adapter->watchdog_timer.function = &i40evf_watchdog_timer; - adapter->watchdog_timer.data = (unsigned long)adapter; + timer_setup(&adapter->watchdog_timer, i40evf_watchdog_timer, 0); mod_timer(&adapter->watchdog_timer, jiffies + 1); adapter->tx_desc_count = I40EVF_DEFAULT_TXD; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 85876f4fb1fb..46c8b8a3907c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -52,7 +52,7 @@ static int i40evf_send_pf_msg(struct i40evf_adapter *adapter, err = i40e_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL); if (err) - dev_err(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n", + dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n", op, i40evf_stat_str(hw, err), i40evf_aq_str(hw, hw->aq.asq_last_status)); return err; @@ -160,7 +160,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter) VIRTCHNL_VF_OFFLOAD_WB_ON_ITR | VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 | VIRTCHNL_VF_OFFLOAD_ENCAP | - VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM; + VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | + VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES; adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG; @@ -385,6 +386,33 @@ void i40evf_map_queues(struct i40evf_adapter *adapter) } /** + * i40evf_request_queues + * @adapter: adapter structure + * @num: number of requested queues + * + * We get a default number of queues from the PF. This enables us to request a + * different number. Returns 0 on success, negative on failure + **/ +int i40evf_request_queues(struct i40evf_adapter *adapter, int num) +{ + struct virtchnl_vf_res_request vfres; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot request queues, command %d pending\n", + adapter->current_op); + return -EBUSY; + } + + vfres.num_queue_pairs = num; + + adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES; + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; + return i40evf_send_pf_msg(adapter, VIRTCHNL_OP_REQUEST_QUEUES, + (u8 *)&vfres, sizeof(vfres)); +} + +/** * i40evf_add_ether_addrs * @adapter: adapter structure * @addrs: the MAC address filters to add (contiguous) @@ -1068,6 +1096,19 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, "Invalid message %d from PF\n", v_opcode); } break; + case VIRTCHNL_OP_REQUEST_QUEUES: { + struct virtchnl_vf_res_request *vfres = + (struct virtchnl_vf_res_request *)msg; + if (vfres->num_queue_pairs != adapter->num_req_queues) { + dev_info(&adapter->pdev->dev, + "Requested %d queues, PF can support %d\n", + adapter->num_req_queues, + vfres->num_queue_pairs); + adapter->num_req_queues = 0; + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; + } + } + break; default: if (adapter->current_op && (v_opcode != adapter->current_op)) dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 1de82f247312..83cabff1e0ab 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -353,7 +353,18 @@ #define E1000_RXPBS_CFG_TS_EN 0x80000000 #define I210_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */ +#define I210_RXPBSIZE_MASK 0x0000003F +#define I210_RXPBSIZE_PB_32KB 0x00000020 #define I210_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */ +#define I210_TXPBSIZE_MASK 0xC0FFFFFF +#define I210_TXPBSIZE_PB0_8KB (8 << 0) +#define I210_TXPBSIZE_PB1_8KB (8 << 6) +#define I210_TXPBSIZE_PB2_4KB (4 << 12) +#define I210_TXPBSIZE_PB3_4KB (4 << 18) + +#define I210_DTXMXPKTSZ_DEFAULT 0x00000098 + +#define I210_SR_QUEUES_NUM 2 /* SerDes Control */ #define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 @@ -1051,4 +1062,16 @@ #define E1000_VLAPQF_P_VALID(_n) (0x1 << (3 + (_n) * 4)) #define E1000_VLAPQF_QUEUE_MASK 0x03 +/* TX Qav Control fields */ +#define E1000_TQAVCTRL_XMIT_MODE BIT(0) +#define E1000_TQAVCTRL_DATAFETCHARB BIT(4) +#define E1000_TQAVCTRL_DATATRANARB BIT(8) + +/* TX Qav Credit Control fields */ +#define E1000_TQAVCC_IDLESLOPE_MASK 0xFFFF +#define E1000_TQAVCC_QUEUEMODE BIT(31) + +/* Transmit Descriptor Control fields */ +#define E1000_TXDCTL_PRIORITY BIT(27) + #endif diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 31a3f09df9f7..568c96842f28 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -421,6 +421,14 @@ do { \ #define E1000_I210_FLA 0x1201C +#define E1000_I210_DTXMXPKTSZ 0x355C + +#define E1000_I210_TXDCTL(_n) (0x0E028 + ((_n) * 0x40)) + +#define E1000_I210_TQAVCTRL 0x3570 +#define E1000_I210_TQAVCC(_n) (0x3004 + ((_n) * 0x40)) +#define E1000_I210_TQAVHC(_n) (0x300C + ((_n) * 0x40)) + #define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n)) #define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */ diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 06ffb2bc713e..92845692087a 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -281,6 +281,11 @@ struct igb_ring { u16 count; /* number of desc. in the ring */ u8 queue_index; /* logical index of the ring*/ u8 reg_idx; /* physical index of the ring */ + bool cbs_enable; /* indicates if CBS is enabled */ + s32 idleslope; /* idleSlope in kbps */ + s32 sendslope; /* sendSlope in kbps */ + s32 hicredit; /* hiCredit in bytes */ + s32 locredit; /* loCredit in bytes */ /* everything past this point are written often */ u16 next_to_clean; @@ -621,6 +626,7 @@ struct igb_adapter { #define IGB_FLAG_EEE BIT(14) #define IGB_FLAG_VLAN_PROMISC BIT(15) #define IGB_FLAG_RX_LEGACY BIT(16) +#define IGB_FLAG_FQTSS BIT(17) /* Media Auto Sense */ #define IGB_MAS_ENABLE_0 0X0001 diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 18b6c25d4705..e94d3c256667 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -34,6 +34,7 @@ #include <linux/slab.h> #include <net/checksum.h> #include <net/ip6_checksum.h> +#include <net/pkt_sched.h> #include <linux/net_tstamp.h> #include <linux/mii.h> #include <linux/ethtool.h> @@ -62,6 +63,17 @@ #define BUILD 0 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ __stringify(BUILD) "-k" + +enum queue_mode { + QUEUE_MODE_STRICT_PRIORITY, + QUEUE_MODE_STREAM_RESERVATION, +}; + +enum tx_queue_prio { + TX_QUEUE_PRIO_HIGH, + TX_QUEUE_PRIO_LOW, +}; + char igb_driver_name[] = "igb"; char igb_driver_version[] = DRV_VERSION; static const char igb_driver_string[] = @@ -133,8 +145,8 @@ static void igb_clean_all_rx_rings(struct igb_adapter *); static void igb_clean_tx_ring(struct igb_ring *); static void igb_clean_rx_ring(struct igb_ring *); static void igb_set_rx_mode(struct net_device *); -static void igb_update_phy_info(unsigned long); -static void igb_watchdog(unsigned long); +static void igb_update_phy_info(struct timer_list *); +static void igb_watchdog(struct timer_list *); static void igb_watchdog_task(struct work_struct *); static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); static void igb_get_stats64(struct net_device *dev, @@ -1271,6 +1283,12 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter, ring->count = adapter->tx_ring_count; ring->queue_index = txr_idx; + ring->cbs_enable = false; + ring->idleslope = 0; + ring->sendslope = 0; + ring->hicredit = 0; + ring->locredit = 0; + u64_stats_init(&ring->tx_syncp); u64_stats_init(&ring->tx_syncp2); @@ -1598,6 +1616,284 @@ static void igb_get_hw_control(struct igb_adapter *adapter) ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); } +static void enable_fqtss(struct igb_adapter *adapter, bool enable) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + + WARN_ON(hw->mac.type != e1000_i210); + + if (enable) + adapter->flags |= IGB_FLAG_FQTSS; + else + adapter->flags &= ~IGB_FLAG_FQTSS; + + if (netif_running(netdev)) + schedule_work(&adapter->reset_task); +} + +static bool is_fqtss_enabled(struct igb_adapter *adapter) +{ + return (adapter->flags & IGB_FLAG_FQTSS) ? true : false; +} + +static void set_tx_desc_fetch_prio(struct e1000_hw *hw, int queue, + enum tx_queue_prio prio) +{ + u32 val; + + WARN_ON(hw->mac.type != e1000_i210); + WARN_ON(queue < 0 || queue > 4); + + val = rd32(E1000_I210_TXDCTL(queue)); + + if (prio == TX_QUEUE_PRIO_HIGH) + val |= E1000_TXDCTL_PRIORITY; + else + val &= ~E1000_TXDCTL_PRIORITY; + + wr32(E1000_I210_TXDCTL(queue), val); +} + +static void set_queue_mode(struct e1000_hw *hw, int queue, enum queue_mode mode) +{ + u32 val; + + WARN_ON(hw->mac.type != e1000_i210); + WARN_ON(queue < 0 || queue > 1); + + val = rd32(E1000_I210_TQAVCC(queue)); + + if (mode == QUEUE_MODE_STREAM_RESERVATION) + val |= E1000_TQAVCC_QUEUEMODE; + else + val &= ~E1000_TQAVCC_QUEUEMODE; + + wr32(E1000_I210_TQAVCC(queue), val); +} + +/** + * igb_configure_cbs - Configure Credit-Based Shaper (CBS) + * @adapter: pointer to adapter struct + * @queue: queue number + * @enable: true = enable CBS, false = disable CBS + * @idleslope: idleSlope in kbps + * @sendslope: sendSlope in kbps + * @hicredit: hiCredit in bytes + * @locredit: loCredit in bytes + * + * Configure CBS for a given hardware queue. When disabling, idleslope, + * sendslope, hicredit, locredit arguments are ignored. Returns 0 if + * success. Negative otherwise. + **/ +static void igb_configure_cbs(struct igb_adapter *adapter, int queue, + bool enable, int idleslope, int sendslope, + int hicredit, int locredit) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + u32 tqavcc; + u16 value; + + WARN_ON(hw->mac.type != e1000_i210); + WARN_ON(queue < 0 || queue > 1); + + if (enable) { + set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH); + set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION); + + /* According to i210 datasheet section 7.2.7.7, we should set + * the 'idleSlope' field from TQAVCC register following the + * equation: + * + * For 100 Mbps link speed: + * + * value = BW * 0x7735 * 0.2 (E1) + * + * For 1000Mbps link speed: + * + * value = BW * 0x7735 * 2 (E2) + * + * E1 and E2 can be merged into one equation as shown below. + * Note that 'link-speed' is in Mbps. + * + * value = BW * 0x7735 * 2 * link-speed + * -------------- (E3) + * 1000 + * + * 'BW' is the percentage bandwidth out of full link speed + * which can be found with the following equation. Note that + * idleSlope here is the parameter from this function which + * is in kbps. + * + * BW = idleSlope + * ----------------- (E4) + * link-speed * 1000 + * + * That said, we can come up with a generic equation to + * calculate the value we should set it TQAVCC register by + * replacing 'BW' in E3 by E4. The resulting equation is: + * + * value = idleSlope * 0x7735 * 2 * link-speed + * ----------------- -------------- (E5) + * link-speed * 1000 1000 + * + * 'link-speed' is present in both sides of the fraction so + * it is canceled out. The final equation is the following: + * + * value = idleSlope * 61034 + * ----------------- (E6) + * 1000000 + */ + value = DIV_ROUND_UP_ULL(idleslope * 61034ULL, 1000000); + + tqavcc = rd32(E1000_I210_TQAVCC(queue)); + tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK; + tqavcc |= value; + wr32(E1000_I210_TQAVCC(queue), tqavcc); + + wr32(E1000_I210_TQAVHC(queue), 0x80000000 + hicredit * 0x7735); + } else { + set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_LOW); + set_queue_mode(hw, queue, QUEUE_MODE_STRICT_PRIORITY); + + /* Set idleSlope to zero. */ + tqavcc = rd32(E1000_I210_TQAVCC(queue)); + tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK; + wr32(E1000_I210_TQAVCC(queue), tqavcc); + + /* Set hiCredit to zero. */ + wr32(E1000_I210_TQAVHC(queue), 0); + } + + /* XXX: In i210 controller the sendSlope and loCredit parameters from + * CBS are not configurable by software so we don't do any 'controller + * configuration' in respect to these parameters. + */ + + netdev_dbg(netdev, "CBS %s: queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n", + (enable) ? "enabled" : "disabled", queue, + idleslope, sendslope, hicredit, locredit); +} + +static int igb_save_cbs_params(struct igb_adapter *adapter, int queue, + bool enable, int idleslope, int sendslope, + int hicredit, int locredit) +{ + struct igb_ring *ring; + + if (queue < 0 || queue > adapter->num_tx_queues) + return -EINVAL; + + ring = adapter->tx_ring[queue]; + + ring->cbs_enable = enable; + ring->idleslope = idleslope; + ring->sendslope = sendslope; + ring->hicredit = hicredit; + ring->locredit = locredit; + + return 0; +} + +static bool is_any_cbs_enabled(struct igb_adapter *adapter) +{ + struct igb_ring *ring; + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) { + ring = adapter->tx_ring[i]; + + if (ring->cbs_enable) + return true; + } + + return false; +} + +static void igb_setup_tx_mode(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + u32 val; + + /* Only i210 controller supports changing the transmission mode. */ + if (hw->mac.type != e1000_i210) + return; + + if (is_fqtss_enabled(adapter)) { + int i, max_queue; + + /* Configure TQAVCTRL register: set transmit mode to 'Qav', + * set data fetch arbitration to 'round robin' and set data + * transfer arbitration to 'credit shaper algorithm. + */ + val = rd32(E1000_I210_TQAVCTRL); + val |= E1000_TQAVCTRL_XMIT_MODE | E1000_TQAVCTRL_DATATRANARB; + val &= ~E1000_TQAVCTRL_DATAFETCHARB; + wr32(E1000_I210_TQAVCTRL, val); + + /* Configure Tx and Rx packet buffers sizes as described in + * i210 datasheet section 7.2.7.7. + */ + val = rd32(E1000_TXPBS); + val &= ~I210_TXPBSIZE_MASK; + val |= I210_TXPBSIZE_PB0_8KB | I210_TXPBSIZE_PB1_8KB | + I210_TXPBSIZE_PB2_4KB | I210_TXPBSIZE_PB3_4KB; + wr32(E1000_TXPBS, val); + + val = rd32(E1000_RXPBS); + val &= ~I210_RXPBSIZE_MASK; + val |= I210_RXPBSIZE_PB_32KB; + wr32(E1000_RXPBS, val); + + /* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ + * register should not exceed the buffer size programmed in + * TXPBS. The smallest buffer size programmed in TXPBS is 4kB + * so according to the datasheet we should set MAX_TPKT_SIZE to + * 4kB / 64. + * + * However, when we do so, no frame from queue 2 and 3 are + * transmitted. It seems the MAX_TPKT_SIZE should not be great + * or _equal_ to the buffer size programmed in TXPBS. For this + * reason, we set set MAX_ TPKT_SIZE to (4kB - 1) / 64. + */ + val = (4096 - 1) / 64; + wr32(E1000_I210_DTXMXPKTSZ, val); + + /* Since FQTSS mode is enabled, apply any CBS configuration + * previously set. If no previous CBS configuration has been + * done, then the initial configuration is applied, which means + * CBS is disabled. + */ + max_queue = (adapter->num_tx_queues < I210_SR_QUEUES_NUM) ? + adapter->num_tx_queues : I210_SR_QUEUES_NUM; + + for (i = 0; i < max_queue; i++) { + struct igb_ring *ring = adapter->tx_ring[i]; + + igb_configure_cbs(adapter, i, ring->cbs_enable, + ring->idleslope, ring->sendslope, + ring->hicredit, ring->locredit); + } + } else { + wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT); + wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT); + wr32(E1000_I210_DTXMXPKTSZ, I210_DTXMXPKTSZ_DEFAULT); + + val = rd32(E1000_I210_TQAVCTRL); + /* According to Section 8.12.21, the other flags we've set when + * enabling FQTSS are not relevant when disabling FQTSS so we + * don't set they here. + */ + val &= ~E1000_TQAVCTRL_XMIT_MODE; + wr32(E1000_I210_TQAVCTRL, val); + } + + netdev_dbg(netdev, "FQTSS %s\n", (is_fqtss_enabled(adapter)) ? + "enabled" : "disabled"); +} + /** * igb_configure - configure the hardware for RX and TX * @adapter: private board structure @@ -1609,6 +1905,7 @@ static void igb_configure(struct igb_adapter *adapter) igb_get_hw_control(adapter); igb_set_rx_mode(netdev); + igb_setup_tx_mode(adapter); igb_restore_vlan(adapter); @@ -2150,6 +2447,55 @@ igb_features_check(struct sk_buff *skb, struct net_device *dev, return features; } +static int igb_offload_cbs(struct igb_adapter *adapter, + struct tc_cbs_qopt_offload *qopt) +{ + struct e1000_hw *hw = &adapter->hw; + int err; + + /* CBS offloading is only supported by i210 controller. */ + if (hw->mac.type != e1000_i210) + return -EOPNOTSUPP; + + /* CBS offloading is only supported by queue 0 and queue 1. */ + if (qopt->queue < 0 || qopt->queue > 1) + return -EINVAL; + + err = igb_save_cbs_params(adapter, qopt->queue, qopt->enable, + qopt->idleslope, qopt->sendslope, + qopt->hicredit, qopt->locredit); + if (err) + return err; + + if (is_fqtss_enabled(adapter)) { + igb_configure_cbs(adapter, qopt->queue, qopt->enable, + qopt->idleslope, qopt->sendslope, + qopt->hicredit, qopt->locredit); + + if (!is_any_cbs_enabled(adapter)) + enable_fqtss(adapter, false); + + } else { + enable_fqtss(adapter, true); + } + + return 0; +} + +static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct igb_adapter *adapter = netdev_priv(dev); + + switch (type) { + case TC_SETUP_QDISC_CBS: + return igb_offload_cbs(adapter, type_data); + + default: + return -EOPNOTSUPP; + } +} + static const struct net_device_ops igb_netdev_ops = { .ndo_open = igb_open, .ndo_stop = igb_close, @@ -2175,6 +2521,7 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_set_features = igb_set_features, .ndo_fdb_add = igb_ndo_fdb_add, .ndo_features_check = igb_features_check, + .ndo_setup_tc = igb_setup_tc, }; /** @@ -2538,10 +2885,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT); } - setup_timer(&adapter->watchdog_timer, igb_watchdog, - (unsigned long) adapter); - setup_timer(&adapter->phy_info_timer, igb_update_phy_info, - (unsigned long) adapter); + timer_setup(&adapter->watchdog_timer, igb_watchdog, 0); + timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0); INIT_WORK(&adapter->reset_task, igb_reset_task); INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); @@ -3162,6 +3507,8 @@ static int igb_sw_init(struct igb_adapter *adapter) /* Setup and initialize a copy of the hw vlan table array */ adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), GFP_ATOMIC); + if (!adapter->shadow_vfta) + return -ENOMEM; /* This call may decrease the number of queues */ if (igb_init_interrupt_scheme(adapter, true)) { @@ -4423,9 +4770,9 @@ static void igb_spoof_check(struct igb_adapter *adapter) /* Need to wait a few seconds after link up to get diagnostic information from * the phy */ -static void igb_update_phy_info(unsigned long data) +static void igb_update_phy_info(struct timer_list *t) { - struct igb_adapter *adapter = (struct igb_adapter *) data; + struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer); igb_get_phy_info(&adapter->hw); } @@ -4512,9 +4859,9 @@ static void igb_check_lvmmc(struct igb_adapter *adapter) * igb_watchdog - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ -static void igb_watchdog(unsigned long data) +static void igb_watchdog(struct timer_list *t) { - struct igb_adapter *adapter = (struct igb_adapter *)data; + struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer); /* Do the rest outside of interrupt context */ schedule_work(&adapter->watchdog_task); } diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 1ed556911b14..713e8df23744 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -1915,9 +1915,9 @@ static bool igbvf_has_link(struct igbvf_adapter *adapter) * igbvf_watchdog - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ -static void igbvf_watchdog(unsigned long data) +static void igbvf_watchdog(struct timer_list *t) { - struct igbvf_adapter *adapter = (struct igbvf_adapter *)data; + struct igbvf_adapter *adapter = from_timer(adapter, t, watchdog_timer); /* Do the rest outside of interrupt context */ schedule_work(&adapter->watchdog_task); @@ -2878,8 +2878,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->addr_len); } - setup_timer(&adapter->watchdog_timer, &igbvf_watchdog, - (unsigned long)adapter); + timer_setup(&adapter->watchdog_timer, igbvf_watchdog, 0); INIT_WORK(&adapter->reset_task, igbvf_reset_task); INIT_WORK(&adapter->watchdog_task, igbvf_watchdog_task); diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 5a713199653c..2353c383f0a7 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -83,7 +83,7 @@ static void ixgb_setup_rctl(struct ixgb_adapter *adapter); static void ixgb_clean_tx_ring(struct ixgb_adapter *adapter); static void ixgb_clean_rx_ring(struct ixgb_adapter *adapter); static void ixgb_set_multi(struct net_device *netdev); -static void ixgb_watchdog(unsigned long data); +static void ixgb_watchdog(struct timer_list *t); static netdev_tx_t ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev); static int ixgb_change_mtu(struct net_device *netdev, int new_mtu); @@ -508,9 +508,7 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->part_num = ixgb_get_ee_pba_number(&adapter->hw); - init_timer(&adapter->watchdog_timer); - adapter->watchdog_timer.function = ixgb_watchdog; - adapter->watchdog_timer.data = (unsigned long)adapter; + timer_setup(&adapter->watchdog_timer, ixgb_watchdog, 0); INIT_WORK(&adapter->tx_timeout_task, ixgb_tx_timeout_task); @@ -1152,9 +1150,9 @@ alloc_failed: **/ static void -ixgb_watchdog(unsigned long data) +ixgb_watchdog(struct timer_list *t) { - struct ixgb_adapter *adapter = (struct ixgb_adapter *)data; + struct ixgb_adapter *adapter = from_timer(adapter, t, watchdog_timer); struct net_device *netdev = adapter->netdev; struct ixgb_desc_ring *txdr = &adapter->tx_ring; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index dd5578756ae0..468c3555a629 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -275,6 +275,7 @@ struct ixgbe_rx_queue_stats { u64 rsc_count; u64 rsc_flush; u64 non_eop_descs; + u64 alloc_rx_page; u64 alloc_rx_page_failed; u64 alloc_rx_buff_failed; u64 csum_err; @@ -434,8 +435,15 @@ static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) } #define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) +#define IXGBE_ITR_ADAPTIVE_MIN_INC 2 +#define IXGBE_ITR_ADAPTIVE_MIN_USECS 10 +#define IXGBE_ITR_ADAPTIVE_MAX_USECS 126 +#define IXGBE_ITR_ADAPTIVE_LATENCY 0x80 +#define IXGBE_ITR_ADAPTIVE_BULK 0x00 + struct ixgbe_ring_container { struct ixgbe_ring *ring; /* pointer to linked list of rings */ + unsigned long next_update; /* jiffies value of last update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_packets; /* total packets processed this int */ u16 work_limit; /* total work allowed per interrupt */ @@ -655,6 +663,7 @@ struct ixgbe_adapter { u64 rsc_total_count; u64 rsc_total_flush; u64 non_eop_descs; + u32 alloc_rx_page; u32 alloc_rx_page_failed; u32 alloc_rx_buff_failed; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 6e6ab6f6875e..9bef255f6a18 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -3781,10 +3781,10 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, fw_cmd.ver_build = build; fw_cmd.ver_sub = sub; fw_cmd.hdr.checksum = 0; - fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, - (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); fw_cmd.pad = 0; fw_cmd.pad2 = 0; + fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, + (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { ret_val = ixgbe_host_interface_command(hw, &fw_cmd, @@ -4081,8 +4081,8 @@ bool ixgbe_mng_present(struct ixgbe_hw *hw) return false; fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw)); - fwsm &= IXGBE_FWSM_MODE_MASK; - return fwsm == IXGBE_FWSM_FW_MODE_PT; + + return !!(fwsm & IXGBE_FWSM_FW_MODE_PT); } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index c3e7a8191128..0aad1c2a3667 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -104,6 +104,7 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = { {"tx_flow_control_xoff", IXGBE_STAT(stats.lxofftxc)}, {"rx_flow_control_xoff", IXGBE_STAT(stats.lxoffrxc)}, {"rx_csum_offload_errors", IXGBE_STAT(hw_csum_rx_error)}, + {"alloc_rx_page", IXGBE_STAT(alloc_rx_page)}, {"alloc_rx_page_failed", IXGBE_STAT(alloc_rx_page_failed)}, {"alloc_rx_buff_failed", IXGBE_STAT(alloc_rx_buff_failed)}, {"rx_no_dma_resources", IXGBE_STAT(hw_rx_no_dma_resources)}, @@ -1916,8 +1917,6 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, unsigned int size) { union ixgbe_adv_rx_desc *rx_desc; - struct ixgbe_rx_buffer *rx_buffer; - struct ixgbe_tx_buffer *tx_buffer; u16 rx_ntc, tx_ntc, count = 0; /* initialize next to clean and descriptor values */ @@ -1925,7 +1924,38 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, tx_ntc = tx_ring->next_to_clean; rx_desc = IXGBE_RX_DESC(rx_ring, rx_ntc); + while (tx_ntc != tx_ring->next_to_use) { + union ixgbe_adv_tx_desc *tx_desc; + struct ixgbe_tx_buffer *tx_buffer; + + tx_desc = IXGBE_TX_DESC(tx_ring, tx_ntc); + + /* if DD is not set transmit has not completed */ + if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) + return count; + + /* unmap buffer on Tx side */ + tx_buffer = &tx_ring->tx_buffer_info[tx_ntc]; + + /* Free all the Tx ring sk_buffs */ + dev_kfree_skb_any(tx_buffer->skb); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + + /* increment Tx next to clean counter */ + tx_ntc++; + if (tx_ntc == tx_ring->count) + tx_ntc = 0; + } + while (rx_desc->wb.upper.length) { + struct ixgbe_rx_buffer *rx_buffer; + /* check Rx buffer */ rx_buffer = &rx_ring->rx_buffer_info[rx_ntc]; @@ -1938,6 +1968,8 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, /* verify contents of skb */ if (ixgbe_check_lbtest_frame(rx_buffer, size)) count++; + else + break; /* sync Rx buffer for device write */ dma_sync_single_for_device(rx_ring->dev, @@ -1945,26 +1977,10 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, ixgbe_rx_bufsz(rx_ring), DMA_FROM_DEVICE); - /* unmap buffer on Tx side */ - tx_buffer = &tx_ring->tx_buffer_info[tx_ntc]; - - /* Free all the Tx ring sk_buffs */ - dev_kfree_skb_any(tx_buffer->skb); - - /* unmap skb header data */ - dma_unmap_single(tx_ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - dma_unmap_len_set(tx_buffer, len, 0); - - /* increment Rx/Tx next to clean counters */ + /* increment Rx next to clean counter */ rx_ntc++; if (rx_ntc == rx_ring->count) rx_ntc = 0; - tx_ntc++; - if (tx_ntc == tx_ring->count) - tx_ntc = 0; /* fetch next descriptor */ rx_desc = IXGBE_RX_DESC(rx_ring, rx_ntc); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index f1bfae0c41d0..8e2a957aca18 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -806,6 +806,7 @@ static void ixgbe_add_ring(struct ixgbe_ring *ring, ring->next = head->ring; head->ring = ring; head->count++; + head->next_update = jiffies + 1; } /** @@ -879,8 +880,11 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, /* initialize work limits */ q_vector->tx.work_limit = adapter->tx_work_limit; - /* initialize pointer to rings */ - ring = q_vector->ring; + /* Initialize setting for adaptive ITR */ + q_vector->tx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS | + IXGBE_ITR_ADAPTIVE_LATENCY; + q_vector->rx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS | + IXGBE_ITR_ADAPTIVE_LATENCY; /* intialize ITR */ if (txr_count && !rxr_count) { @@ -897,6 +901,9 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, q_vector->itr = adapter->rx_itr_setting; } + /* initialize pointer to rings */ + ring = q_vector->ring; + while (txr_count) { /* assign generic ring traits */ ring->dev = &adapter->pdev->dev; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 935a2f15b0b0..ca06c3cc2ca8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1620,6 +1620,7 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, bi->page = page; bi->page_offset = ixgbe_rx_offset(rx_ring); bi->pagecnt_bias = 1; + rx_ring->rx_stats.alloc_rx_page++; return true; } @@ -2133,6 +2134,21 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring, #if L1_CACHE_BYTES < 128 prefetch(xdp->data + L1_CACHE_BYTES); #endif + /* Note, we get here by enabling legacy-rx via: + * + * ethtool --set-priv-flags <dev> legacy-rx on + * + * In this mode, we currently get 0 extra XDP headroom as + * opposed to having legacy-rx off, where we process XDP + * packets going to stack via ixgbe_build_skb(). The latter + * provides us currently with 192 bytes of headroom. + * + * For ixgbe_construct_skb() mode it means that the + * xdp->data_meta will always point to xdp->data, since + * the helper cannot expand the head. Should this ever + * change in future for legacy-rx mode on, then lets also + * add xdp->data_meta handling here. + */ /* allocate a skb to store the frags */ skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE); @@ -2165,6 +2181,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, struct xdp_buff *xdp, union ixgbe_adv_rx_desc *rx_desc) { + unsigned int metasize = xdp->data - xdp->data_meta; #if (PAGE_SIZE < 8192) unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; #else @@ -2174,10 +2191,14 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, #endif struct sk_buff *skb; - /* prefetch first cache line of first page */ - prefetch(xdp->data); + /* Prefetch first cache line of first page. If xdp->data_meta + * is unused, this points extactly as xdp->data, otherwise we + * likely have a consumer accessing first few bytes of meta + * data, and then actual data. + */ + prefetch(xdp->data_meta); #if L1_CACHE_BYTES < 128 - prefetch(xdp->data + L1_CACHE_BYTES); + prefetch(xdp->data_meta + L1_CACHE_BYTES); #endif /* build an skb to around the page buffer */ @@ -2188,6 +2209,8 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, /* update pointers within the skb to store the data */ skb_reserve(skb, xdp->data - xdp->data_hard_start); __skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); /* record DMA address if this is the start of a chain of buffers */ if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) @@ -2326,6 +2349,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (!skb) { xdp.data = page_address(rx_buffer->page) + rx_buffer->page_offset; + xdp.data_meta = xdp.data; xdp.data_hard_start = xdp.data - ixgbe_rx_offset(rx_ring); xdp.data_end = xdp.data + size; @@ -2516,50 +2540,174 @@ enum latency_range { static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, struct ixgbe_ring_container *ring_container) { - int bytes = ring_container->total_bytes; - int packets = ring_container->total_packets; - u32 timepassed_us; - u64 bytes_perint; - u8 itr_setting = ring_container->itr; + unsigned int itr = IXGBE_ITR_ADAPTIVE_MIN_USECS | + IXGBE_ITR_ADAPTIVE_LATENCY; + unsigned int avg_wire_size, packets, bytes; + unsigned long next_update = jiffies; - if (packets == 0) + /* If we don't have any rings just leave ourselves set for maximum + * possible latency so we take ourselves out of the equation. + */ + if (!ring_container->ring) return; - /* simple throttlerate management - * 0-10MB/s lowest (100000 ints/s) - * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (12000 ints/s) + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. */ - /* what was last interrupt timeslice? */ - timepassed_us = q_vector->itr >> 2; - if (timepassed_us == 0) - return; + if (time_after(next_update, ring_container->next_update)) + goto clear_counts; - bytes_perint = bytes / timepassed_us; /* bytes/usec */ + packets = ring_container->total_packets; - switch (itr_setting) { - case lowest_latency: - if (bytes_perint > 10) - itr_setting = low_latency; - break; - case low_latency: - if (bytes_perint > 20) - itr_setting = bulk_latency; - else if (bytes_perint <= 10) - itr_setting = lowest_latency; + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * When this occurs just tick up our delay by the minimum value + * and hope that this extra delay will prevent us from being called + * without any work on our queue. + */ + if (!packets) { + itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC; + if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS) + itr = IXGBE_ITR_ADAPTIVE_MAX_USECS; + itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY; + goto clear_counts; + } + + bytes = ring_container->total_bytes; + + /* If packets are less than 4 or bytes are less than 9000 assume + * insufficient data to use bulk rate limiting approach. We are + * likely latency driven. + */ + if (packets < 4 && bytes < 9000) { + itr = IXGBE_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + + /* Between 4 and 48 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. + */ + if (packets < 48) { + itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC; + if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS) + itr = IXGBE_ITR_ADAPTIVE_MAX_USECS; + goto clear_counts; + } + + /* Between 48 and 96 is our "goldilocks" zone where we are working + * out "just right". Just report that our current ITR is good for us. + */ + if (packets < 96) { + itr = q_vector->itr >> 2; + goto clear_counts; + } + + /* If packet count is 96 or greater we are likely looking at a slight + * overrun of the delay we want. Try halving our delay to see if that + * will cut the number of packets in half per interrupt. + */ + if (packets < 256) { + itr = q_vector->itr >> 3; + if (itr < IXGBE_ITR_ADAPTIVE_MIN_USECS) + itr = IXGBE_ITR_ADAPTIVE_MIN_USECS; + goto clear_counts; + } + + /* The paths below assume we are dealing with a bulk ITR since number + * of packets is 256 or greater. We are just going to have to compute + * a value and try to bring the count under control, though for smaller + * packet sizes there isn't much we can do as NAPI polling will likely + * be kicking in sooner rather than later. + */ + itr = IXGBE_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * give the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value + * + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. + */ + if (avg_wire_size <= 60) { + /* Start at 50k ints/sec */ + avg_wire_size = 5120; + } else if (avg_wire_size <= 316) { + /* 50K ints/sec to 16K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 2720; + } else if (avg_wire_size <= 1084) { + /* 16K ints/sec to 9.2K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 9.2K ints/sec to 8K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 8K ints/sec */ + avg_wire_size = 32256; + } + + /* If we are in low latency mode half our delay which doubles the rate + * to somewhere between 100K to 16K ints/sec + */ + if (itr & IXGBE_ITR_ADAPTIVE_LATENCY) + avg_wire_size >>= 1; + + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + switch (q_vector->adapter->link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + case IXGBE_LINK_SPEED_100_FULL: + default: + itr += DIV_ROUND_UP(avg_wire_size, + IXGBE_ITR_ADAPTIVE_MIN_INC * 256) * + IXGBE_ITR_ADAPTIVE_MIN_INC; break; - case bulk_latency: - if (bytes_perint <= 20) - itr_setting = low_latency; + case IXGBE_LINK_SPEED_2_5GB_FULL: + case IXGBE_LINK_SPEED_1GB_FULL: + case IXGBE_LINK_SPEED_10_FULL: + itr += DIV_ROUND_UP(avg_wire_size, + IXGBE_ITR_ADAPTIVE_MIN_INC * 64) * + IXGBE_ITR_ADAPTIVE_MIN_INC; break; } - /* clear work counters since we have the values we need */ +clear_counts: + /* write back value */ + ring_container->itr = itr; + + /* next update should occur within next jiffy */ + ring_container->next_update = next_update + 1; + ring_container->total_bytes = 0; ring_container->total_packets = 0; - - /* write updated itr to ring container */ - ring_container->itr = itr_setting; } /** @@ -2601,34 +2749,19 @@ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector) static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector) { - u32 new_itr = q_vector->itr; - u8 current_itr; + u32 new_itr; ixgbe_update_itr(q_vector, &q_vector->tx); ixgbe_update_itr(q_vector, &q_vector->rx); - current_itr = max(q_vector->rx.itr, q_vector->tx.itr); + /* use the smallest value of new ITR delay calculations */ + new_itr = min(q_vector->rx.itr, q_vector->tx.itr); - switch (current_itr) { - /* counts and packets in update_itr are dependent on these numbers */ - case lowest_latency: - new_itr = IXGBE_100K_ITR; - break; - case low_latency: - new_itr = IXGBE_20K_ITR; - break; - case bulk_latency: - new_itr = IXGBE_12K_ITR; - break; - default: - break; - } + /* Clear latency flag if set, shift into correct position */ + new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY; + new_itr <<= 2; if (new_itr != q_vector->itr) { - /* do an exponential smoothing */ - new_itr = (10 * new_itr * q_vector->itr) / - ((9 * new_itr) + q_vector->itr); - /* save the algorithm value here */ q_vector->itr = new_itr; @@ -6771,6 +6904,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot; u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0; u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0; + u64 alloc_rx_page = 0; u64 bytes = 0, packets = 0, hw_csum_rx_error = 0; if (test_bit(__IXGBE_DOWN, &adapter->state) || @@ -6791,6 +6925,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbe_ring *rx_ring = adapter->rx_ring[i]; non_eop_descs += rx_ring->rx_stats.non_eop_descs; + alloc_rx_page += rx_ring->rx_stats.alloc_rx_page; alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed; alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed; hw_csum_rx_error += rx_ring->rx_stats.csum_err; @@ -6798,6 +6933,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) packets += rx_ring->stats.packets; } adapter->non_eop_descs = non_eop_descs; + adapter->alloc_rx_page = alloc_rx_page; adapter->alloc_rx_page_failed = alloc_rx_page_failed; adapter->alloc_rx_buff_failed = alloc_rx_buff_failed; adapter->hw_csum_rx_error = hw_csum_rx_error; @@ -7554,9 +7690,9 @@ static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter) * ixgbe_service_timer - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ -static void ixgbe_service_timer(unsigned long data) +static void ixgbe_service_timer(struct timer_list *t) { - struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)data; + struct ixgbe_adapter *adapter = from_timer(adapter, t, service_timer); unsigned long next_event_offset; /* poll faster when waiting for link */ @@ -9223,13 +9359,10 @@ free_jump: return err; } -static int ixgbe_setup_tc_cls_u32(struct net_device *dev, +static int ixgbe_setup_tc_cls_u32(struct ixgbe_adapter *adapter, struct tc_cls_u32_offload *cls_u32) { - struct ixgbe_adapter *adapter = netdev_priv(dev); - - if (!is_classid_clsact_ingress(cls_u32->common.classid) || - cls_u32->common.chain_index) + if (cls_u32->common.chain_index) return -EOPNOTSUPP; switch (cls_u32->command) { @@ -9248,6 +9381,43 @@ static int ixgbe_setup_tc_cls_u32(struct net_device *dev, } } +static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct ixgbe_adapter *adapter = cb_priv; + + if (!tc_can_offload(adapter->netdev)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSU32: + return ixgbe_setup_tc_cls_u32(adapter, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int ixgbe_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb, + adapter, adapter); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb, + adapter); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int ixgbe_setup_tc_mqprio(struct net_device *dev, struct tc_mqprio_qopt *mqprio) { @@ -9259,9 +9429,9 @@ static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_CLSU32: - return ixgbe_setup_tc_cls_u32(dev, type_data); - case TC_SETUP_MQPRIO: + case TC_SETUP_BLOCK: + return ixgbe_setup_tc_block(dev, type_data); + case TC_SETUP_QDISC_MQPRIO: return ixgbe_setup_tc_mqprio(dev, type_data); default: return -EOPNOTSUPP; @@ -9733,6 +9903,17 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv) limit = find_last_bit(&adapter->fwd_bitmask, 32); adapter->ring_feature[RING_F_VMDQ].limit = limit + 1; ixgbe_fwd_ring_down(fwd_adapter->netdev, fwd_adapter); + + /* go back to full RSS if we're done with our VMQs */ + if (adapter->ring_feature[RING_F_VMDQ].limit == 1) { + int rss = min_t(int, ixgbe_max_rss_indices(adapter), + num_online_cpus()); + + adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED; + adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED; + adapter->ring_feature[RING_F_RSS].limit = rss; + } + ixgbe_setup_tc(pdev, netdev_get_num_tc(pdev)); netdev_dbg(pdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n", fwd_adapter->pool, adapter->num_rx_pools, @@ -9823,7 +10004,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) return 0; } -static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct ixgbe_adapter *adapter = netdev_priv(dev); @@ -9932,7 +10113,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_udp_tunnel_add = ixgbe_add_udp_tunnel_port, .ndo_udp_tunnel_del = ixgbe_del_udp_tunnel_port, .ndo_features_check = ixgbe_features_check, - .ndo_xdp = ixgbe_xdp, + .ndo_bpf = ixgbe_xdp, .ndo_xdp_xmit = ixgbe_xdp_xmit, .ndo_xdp_flush = ixgbe_xdp_flush, }; @@ -10355,8 +10536,7 @@ skip_sriov: ether_addr_copy(hw->mac.addr, hw->mac.perm_addr); ixgbe_mac_set_default_filter(adapter); - setup_timer(&adapter->service_timer, &ixgbe_service_timer, - (unsigned long) adapter); + timer_setup(&adapter->service_timer, ixgbe_service_timer, 0); if (ixgbe_removed(hw->hw_addr)) { err = -EIO; @@ -10712,6 +10892,9 @@ skip_bad_vf_detection: if (!test_bit(__IXGBE_SERVICE_INITED, &adapter->state)) return PCI_ERS_RESULT_DISCONNECT; + if (!netif_device_present(netdev)) + return PCI_ERS_RESULT_DISCONNECT; + rtnl_lock(); netif_device_detach(netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index 6ea0d6a5fb90..b8c5fd2a2115 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -619,12 +619,6 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) usleep_range(5000, 10000); } - /* Failed to get SW only semaphore */ - if (swmask == IXGBE_GSSR_SW_MNG_SM) { - hw_dbg(hw, "Failed to get SW only semaphore\n"); - return IXGBE_ERR_SWFW_SYNC; - } - /* If the resource is not released by the FW/HW the SW can assume that * the FW/HW malfunctions. In that case the SW should set the SW bit(s) * of the requested resource(s) while ignoring the corresponding FW/HW @@ -647,7 +641,8 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) */ if (swfw_sync & swmask) { u32 rmask = IXGBE_GSSR_EEP_SM | IXGBE_GSSR_PHY0_SM | - IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM; + IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM | + IXGBE_GSSR_SW_MNG_SM; if (swi2c_mask) rmask |= IXGBE_GSSR_I2C_MASK; @@ -763,6 +758,8 @@ static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw) **/ void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw) { + u32 rmask; + /* First try to grab the semaphore but we don't need to bother * looking to see whether we got the lock or not since we do * the same thing regardless of whether we got the lock or not. @@ -771,6 +768,14 @@ void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw) */ ixgbe_get_swfw_sync_semaphore(hw); ixgbe_release_swfw_sync_semaphore(hw); + + /* Acquire and release all software resources. */ + rmask = IXGBE_GSSR_EEP_SM | IXGBE_GSSR_PHY0_SM | + IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM | + IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_I2C_MASK; + + ixgbe_acquire_swfw_sync_X540(hw, rmask); + ixgbe_release_swfw_sync_X540(hw, rmask); } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 19fbb2f28ea4..cb7da5f9c4da 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -900,6 +900,8 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, /* convert offset from words to bytes */ buffer.address = cpu_to_be32((offset + current_word) * 2); buffer.length = cpu_to_be16(words_to_read * 2); + buffer.pad2 = 0; + buffer.pad3 = 0; status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer), IXGBE_HI_COMMAND_TIMEOUT); @@ -3192,6 +3194,9 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) /* Identify the PHY or SFP module */ ret_val = phy->ops.identify(hw); + if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED || + ret_val == IXGBE_ERR_PHY_ADDR_INVALID) + return ret_val; /* Setup function pointers based on detected hardware */ ixgbe_init_mac_link_ops_X550em(hw); @@ -3394,9 +3399,10 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) ixgbe_clear_tx_pending(hw); /* PHY ops must be identified and initialized prior to reset */ - - /* Identify PHY and related function pointers */ status = hw->phy.ops.init(hw); + if (status == IXGBE_ERR_SFP_NOT_SUPPORTED || + status == IXGBE_ERR_PHY_ADDR_INVALID) + return status; /* start the external PHY */ if (hw->phy.type == ixgbe_phy_x550em_ext_t) { @@ -3884,7 +3890,7 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x_fw = { .write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550, }; -static struct ixgbe_mac_operations mac_ops_x550em_a = { +static const struct ixgbe_mac_operations mac_ops_x550em_a = { X550_COMMON_MAC .led_on = ixgbe_led_on_t_x550em, .led_off = ixgbe_led_off_t_x550em, @@ -3905,7 +3911,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = { .write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550a, }; -static struct ixgbe_mac_operations mac_ops_x550em_a_fw = { +static const struct ixgbe_mac_operations mac_ops_x550em_a_fw = { X550_COMMON_MAC .led_on = ixgbe_led_on_generic, .led_off = ixgbe_led_off_generic, diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index cacb30682434..feed11bc9ddf 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2747,9 +2747,10 @@ void ixgbevf_update_stats(struct ixgbevf_adapter *adapter) * ixgbevf_service_timer - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ -static void ixgbevf_service_timer(unsigned long data) +static void ixgbevf_service_timer(struct timer_list *t) { - struct ixgbevf_adapter *adapter = (struct ixgbevf_adapter *)data; + struct ixgbevf_adapter *adapter = from_timer(adapter, t, + service_timer); /* Reset the timer */ mod_timer(&adapter->service_timer, (HZ * 2) + jiffies); @@ -4120,8 +4121,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_sw_init; } - setup_timer(&adapter->service_timer, &ixgbevf_service_timer, - (unsigned long)adapter); + timer_setup(&adapter->service_timer, ixgbevf_service_timer, 0); INIT_WORK(&adapter->service_task, ixgbevf_service_task); set_bit(__IXGBEVF_SERVICE_INITED, &adapter->state); diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 3c0a6451273d..ae195f8adff5 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -4,6 +4,7 @@ * Copyright 2004 IDT Inc. (rischelp@idt.com) * Copyright 2006 Felix Fietkau <nbd@openwrt.org> * Copyright 2008 Florian Fainelli <florian@openwrt.org> + * Copyright 2017 Roman Yeryomin <roman@advem.lv> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -64,9 +65,9 @@ #include <asm/mach-rc32434/eth.h> #include <asm/mach-rc32434/dma_v.h> -#define DRV_NAME "korina" -#define DRV_VERSION "0.10" -#define DRV_RELDATE "04Mar2008" +#define DRV_NAME "korina" +#define DRV_VERSION "0.20" +#define DRV_RELDATE "15Sep2017" #define STATION_ADDRESS_HIGH(dev) (((dev)->dev_addr[0] << 8) | \ ((dev)->dev_addr[1])) @@ -75,7 +76,7 @@ ((dev)->dev_addr[4] << 8) | \ ((dev)->dev_addr[5])) -#define MII_CLOCK 1250000 /* no more than 2.5MHz */ +#define MII_CLOCK 1250000 /* no more than 2.5MHz */ /* the following must be powers of two */ #define KORINA_NUM_RDS 64 /* number of receive descriptors */ @@ -87,15 +88,19 @@ #define KORINA_RBSIZE 1536 /* size of one resource buffer = Ether MTU */ #define KORINA_RDS_MASK (KORINA_NUM_RDS - 1) #define KORINA_TDS_MASK (KORINA_NUM_TDS - 1) -#define RD_RING_SIZE (KORINA_NUM_RDS * sizeof(struct dma_desc)) +#define RD_RING_SIZE (KORINA_NUM_RDS * sizeof(struct dma_desc)) #define TD_RING_SIZE (KORINA_NUM_TDS * sizeof(struct dma_desc)) -#define TX_TIMEOUT (6000 * HZ / 1000) +#define TX_TIMEOUT (6000 * HZ / 1000) -enum chain_status { desc_filled, desc_empty }; -#define IS_DMA_FINISHED(X) (((X) & (DMA_DESC_FINI)) != 0) -#define IS_DMA_DONE(X) (((X) & (DMA_DESC_DONE)) != 0) -#define RCVPKT_LENGTH(X) (((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT) +enum chain_status { + desc_filled, + desc_empty +}; + +#define IS_DMA_FINISHED(X) (((X) & (DMA_DESC_FINI)) != 0) +#define IS_DMA_DONE(X) (((X) & (DMA_DESC_DONE)) != 0) +#define RCVPKT_LENGTH(X) (((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT) /* Information that need to be kept for each board. */ struct korina_private { @@ -122,10 +127,8 @@ struct korina_private { int rx_irq; int tx_irq; - int ovr_irq; - int und_irq; - spinlock_t lock; /* NIC xmit lock */ + spinlock_t lock; /* NIC xmit lock */ int dma_halt_cnt; int dma_run_cnt; @@ -148,17 +151,17 @@ static inline void korina_start_dma(struct dma_reg *ch, u32 dma_addr) static inline void korina_abort_dma(struct net_device *dev, struct dma_reg *ch) { - if (readl(&ch->dmac) & DMA_CHAN_RUN_BIT) { - writel(0x10, &ch->dmac); + if (readl(&ch->dmac) & DMA_CHAN_RUN_BIT) { + writel(0x10, &ch->dmac); - while (!(readl(&ch->dmas) & DMA_STAT_HALT)) - netif_trans_update(dev); + while (!(readl(&ch->dmas) & DMA_STAT_HALT)) + netif_trans_update(dev); - writel(0, &ch->dmas); - } + writel(0, &ch->dmas); + } - writel(0, &ch->dmadptr); - writel(0, &ch->dmandptr); + writel(0, &ch->dmadptr); + writel(0, &ch->dmandptr); } static inline void korina_chain_dma(struct dma_reg *ch, u32 dma_addr) @@ -365,59 +368,60 @@ static int korina_rx(struct net_device *dev, int limit) if ((KORINA_RBSIZE - (u32)DMA_COUNT(rd->control)) == 0) break; - /* Update statistics counters */ - if (devcs & ETH_RX_CRC) - dev->stats.rx_crc_errors++; - if (devcs & ETH_RX_LOR) - dev->stats.rx_length_errors++; - if (devcs & ETH_RX_LE) - dev->stats.rx_length_errors++; - if (devcs & ETH_RX_OVR) - dev->stats.rx_fifo_errors++; - if (devcs & ETH_RX_CV) - dev->stats.rx_frame_errors++; - if (devcs & ETH_RX_CES) - dev->stats.rx_length_errors++; - if (devcs & ETH_RX_MP) - dev->stats.multicast++; + /* check that this is a whole packet + * WARNING: DMA_FD bit incorrectly set + * in Rc32434 (errata ref #077) */ + if (!(devcs & ETH_RX_LD)) + goto next; - if ((devcs & ETH_RX_LD) != ETH_RX_LD) { - /* check that this is a whole packet - * WARNING: DMA_FD bit incorrectly set - * in Rc32434 (errata ref #077) */ + if (!(devcs & ETH_RX_ROK)) { + /* Update statistics counters */ dev->stats.rx_errors++; dev->stats.rx_dropped++; - } else if ((devcs & ETH_RX_ROK)) { - pkt_len = RCVPKT_LENGTH(devcs); + if (devcs & ETH_RX_CRC) + dev->stats.rx_crc_errors++; + if (devcs & ETH_RX_LE) + dev->stats.rx_length_errors++; + if (devcs & ETH_RX_OVR) + dev->stats.rx_fifo_errors++; + if (devcs & ETH_RX_CV) + dev->stats.rx_frame_errors++; + if (devcs & ETH_RX_CES) + dev->stats.rx_frame_errors++; + + goto next; + } - /* must be the (first and) last - * descriptor then */ - pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data; + pkt_len = RCVPKT_LENGTH(devcs); - /* invalidate the cache */ - dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4); + /* must be the (first and) last + * descriptor then */ + pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data; - /* Malloc up new buffer. */ - skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE); + /* invalidate the cache */ + dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4); - if (!skb_new) - break; - /* Do not count the CRC */ - skb_put(skb, pkt_len - 4); - skb->protocol = eth_type_trans(skb, dev); + /* Malloc up new buffer. */ + skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE); - /* Pass the packet to upper layers */ - netif_receive_skb(skb); - dev->stats.rx_packets++; - dev->stats.rx_bytes += pkt_len; + if (!skb_new) + break; + /* Do not count the CRC */ + skb_put(skb, pkt_len - 4); + skb->protocol = eth_type_trans(skb, dev); - /* Update the mcast stats */ - if (devcs & ETH_RX_MP) - dev->stats.multicast++; + /* Pass the packet to upper layers */ + napi_gro_receive(&lp->napi, skb); + dev->stats.rx_packets++; + dev->stats.rx_bytes += pkt_len; - lp->rx_skb[lp->rx_next_done] = skb_new; - } + /* Update the mcast stats */ + if (devcs & ETH_RX_MP) + dev->stats.multicast++; + + lp->rx_skb[lp->rx_next_done] = skb_new; +next: rd->devcs = 0; /* Restore descriptor's curr_addr */ @@ -649,10 +653,10 @@ static void korina_check_media(struct net_device *dev, unsigned int init_media) &lp->eth_regs->ethmac2); } -static void korina_poll_media(unsigned long data) +static void korina_poll_media(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct korina_private *lp = netdev_priv(dev); + struct korina_private *lp = from_timer(lp, t, media_check_timer); + struct net_device *dev = lp->dev; korina_check_media(dev, 0); mod_timer(&lp->media_check_timer, jiffies + HZ); @@ -686,7 +690,7 @@ static int korina_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) /* ethtool helpers */ static void netdev_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) + struct ethtool_drvinfo *info) { struct korina_private *lp = netdev_priv(dev); @@ -729,10 +733,10 @@ static u32 netdev_get_link(struct net_device *dev) } static const struct ethtool_ops netdev_ethtool_ops = { - .get_drvinfo = netdev_get_drvinfo, - .get_link = netdev_get_link, - .get_link_ksettings = netdev_get_link_ksettings, - .set_link_ksettings = netdev_set_link_ksettings, + .get_drvinfo = netdev_get_drvinfo, + .get_link = netdev_get_link, + .get_link_ksettings = netdev_get_link_ksettings, + .set_link_ksettings = netdev_set_link_ksettings, }; static int korina_alloc_ring(struct net_device *dev) @@ -864,7 +868,7 @@ static int korina_init(struct net_device *dev) /* Management Clock Prescaler Divisor * Clock independent setting */ writel(((idt_cpu_freq) / MII_CLOCK + 1) & ~1, - &lp->eth_regs->ethmcp); + &lp->eth_regs->ethmcp); /* don't transmit until fifo contains 48b */ writel(48, &lp->eth_regs->ethfifott); @@ -891,8 +895,6 @@ static void korina_restart_task(struct work_struct *work) */ disable_irq(lp->rx_irq); disable_irq(lp->tx_irq); - disable_irq(lp->ovr_irq); - disable_irq(lp->und_irq); writel(readl(&lp->tx_dma_regs->dmasm) | DMA_STAT_FINI | DMA_STAT_ERR, @@ -911,40 +913,10 @@ static void korina_restart_task(struct work_struct *work) } korina_multicast_list(dev); - enable_irq(lp->und_irq); - enable_irq(lp->ovr_irq); enable_irq(lp->tx_irq); enable_irq(lp->rx_irq); } -static void korina_clear_and_restart(struct net_device *dev, u32 value) -{ - struct korina_private *lp = netdev_priv(dev); - - netif_stop_queue(dev); - writel(value, &lp->eth_regs->ethintfc); - schedule_work(&lp->restart_task); -} - -/* Ethernet Tx Underflow interrupt */ -static irqreturn_t korina_und_interrupt(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct korina_private *lp = netdev_priv(dev); - unsigned int und; - - spin_lock(&lp->lock); - - und = readl(&lp->eth_regs->ethintfc); - - if (und & ETH_INT_FC_UND) - korina_clear_and_restart(dev, und & ~ETH_INT_FC_UND); - - spin_unlock(&lp->lock); - - return IRQ_HANDLED; -} - static void korina_tx_timeout(struct net_device *dev) { struct korina_private *lp = netdev_priv(dev); @@ -952,25 +924,6 @@ static void korina_tx_timeout(struct net_device *dev) schedule_work(&lp->restart_task); } -/* Ethernet Rx Overflow interrupt */ -static irqreturn_t -korina_ovr_interrupt(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct korina_private *lp = netdev_priv(dev); - unsigned int ovr; - - spin_lock(&lp->lock); - ovr = readl(&lp->eth_regs->ethintfc); - - if (ovr & ETH_INT_FC_OVR) - korina_clear_and_restart(dev, ovr & ~ETH_INT_FC_OVR); - - spin_unlock(&lp->lock); - - return IRQ_HANDLED; -} - #ifdef CONFIG_NET_POLL_CONTROLLER static void korina_poll_controller(struct net_device *dev) { @@ -993,48 +946,26 @@ static int korina_open(struct net_device *dev) } /* Install the interrupt handler - * that handles the Done Finished - * Ovr and Und Events */ + * that handles the Done Finished */ ret = request_irq(lp->rx_irq, korina_rx_dma_interrupt, 0, "Korina ethernet Rx", dev); if (ret < 0) { printk(KERN_ERR "%s: unable to get Rx DMA IRQ %d\n", - dev->name, lp->rx_irq); + dev->name, lp->rx_irq); goto err_release; } ret = request_irq(lp->tx_irq, korina_tx_dma_interrupt, 0, "Korina ethernet Tx", dev); if (ret < 0) { printk(KERN_ERR "%s: unable to get Tx DMA IRQ %d\n", - dev->name, lp->tx_irq); + dev->name, lp->tx_irq); goto err_free_rx_irq; } - /* Install handler for overrun error. */ - ret = request_irq(lp->ovr_irq, korina_ovr_interrupt, - 0, "Ethernet Overflow", dev); - if (ret < 0) { - printk(KERN_ERR "%s: unable to get OVR IRQ %d\n", - dev->name, lp->ovr_irq); - goto err_free_tx_irq; - } - - /* Install handler for underflow error. */ - ret = request_irq(lp->und_irq, korina_und_interrupt, - 0, "Ethernet Underflow", dev); - if (ret < 0) { - printk(KERN_ERR "%s: unable to get UND IRQ %d\n", - dev->name, lp->und_irq); - goto err_free_ovr_irq; - } mod_timer(&lp->media_check_timer, jiffies + 1); out: return ret; -err_free_ovr_irq: - free_irq(lp->ovr_irq, dev); -err_free_tx_irq: - free_irq(lp->tx_irq, dev); err_free_rx_irq: free_irq(lp->rx_irq, dev); err_release: @@ -1052,8 +983,6 @@ static int korina_close(struct net_device *dev) /* Disable interrupts */ disable_irq(lp->rx_irq); disable_irq(lp->tx_irq); - disable_irq(lp->ovr_irq); - disable_irq(lp->und_irq); korina_abort_tx(dev); tmp = readl(&lp->tx_dma_regs->dmasm); @@ -1073,8 +1002,6 @@ static int korina_close(struct net_device *dev) free_irq(lp->rx_irq, dev); free_irq(lp->tx_irq, dev); - free_irq(lp->ovr_irq, dev); - free_irq(lp->und_irq, dev); return 0; } @@ -1113,8 +1040,6 @@ static int korina_probe(struct platform_device *pdev) lp->rx_irq = platform_get_irq_byname(pdev, "korina_rx"); lp->tx_irq = platform_get_irq_byname(pdev, "korina_tx"); - lp->ovr_irq = platform_get_irq_byname(pdev, "korina_ovr"); - lp->und_irq = platform_get_irq_byname(pdev, "korina_und"); r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_regs"); dev->base_addr = r->start; @@ -1162,7 +1087,7 @@ static int korina_probe(struct platform_device *pdev) dev->netdev_ops = &korina_netdev_ops; dev->ethtool_ops = &netdev_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; - netif_napi_add(dev, &lp->napi, korina_poll, 64); + netif_napi_add(dev, &lp->napi, korina_poll, NAPI_POLL_WEIGHT); lp->phy_addr = (((lp->rx_irq == 0x2c? 1:0) << 8) | 0x05); lp->mii_if.dev = dev; @@ -1178,7 +1103,7 @@ static int korina_probe(struct platform_device *pdev) ": cannot register net device: %d\n", rc); goto probe_err_register; } - setup_timer(&lp->media_check_timer, korina_poll_media, (unsigned long) dev); + timer_setup(&lp->media_check_timer, korina_poll_media, 0); INIT_WORK(&lp->restart_task, korina_restart_task); @@ -1226,5 +1151,6 @@ module_platform_driver(korina_driver); MODULE_AUTHOR("Philip Rischel <rischelp@idt.com>"); MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>"); MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>"); +MODULE_AUTHOR("Roman Yeryomin <roman@advem.lv>"); MODULE_DESCRIPTION("IDT RC32434 (Korina) Ethernet driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 64a04975bcf8..bc93b69cfd1e 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -816,11 +816,14 @@ static void mvneta_txq_pend_desc_add(struct mvneta_port *pp, { u32 val; - /* Only 255 descriptors can be added at once ; Assume caller - * process TX desriptors in quanta less than 256 - */ - val = pend_desc + txq->pending; - mvreg_write(pp, MVNETA_TXQ_UPDATE_REG(txq->id), val); + pend_desc += txq->pending; + + /* Only 255 Tx descriptors can be added at once */ + do { + val = min(pend_desc, 255); + mvreg_write(pp, MVNETA_TXQ_UPDATE_REG(txq->id), val); + pend_desc -= val; + } while (pend_desc > 0); txq->pending = 0; } diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index fcf9ba5eb8d1..6c20e811f973 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -38,11 +38,12 @@ #include <net/ipv6.h> #include <net/tso.h> -/* RX Fifo Registers */ +/* Fifo Registers */ #define MVPP2_RX_DATA_FIFO_SIZE_REG(port) (0x00 + 4 * (port)) #define MVPP2_RX_ATTR_FIFO_SIZE_REG(port) (0x20 + 4 * (port)) #define MVPP2_RX_MIN_PKT_SIZE_REG 0x60 #define MVPP2_RX_FIFO_INIT_REG 0x64 +#define MVPP22_TX_FIFO_SIZE_REG(port) (0x8860 + 4 * (port)) /* RX DMA Top Registers */ #define MVPP2_RX_CTRL_REG(port) (0x140 + 4 * (port)) @@ -82,6 +83,16 @@ #define MVPP2_PRS_TCAM_CTRL_REG 0x1230 #define MVPP2_PRS_TCAM_EN_MASK BIT(0) +/* RSS Registers */ +#define MVPP22_RSS_INDEX 0x1500 +#define MVPP22_RSS_INDEX_TABLE_ENTRY(idx) ((idx) << 8) +#define MVPP22_RSS_INDEX_TABLE(idx) ((idx) << 8) +#define MVPP22_RSS_INDEX_QUEUE(idx) ((idx) << 16) +#define MVPP22_RSS_TABLE_ENTRY 0x1508 +#define MVPP22_RSS_TABLE 0x1510 +#define MVPP22_RSS_TABLE_POINTER(p) (p) +#define MVPP22_RSS_WIDTH 0x150c + /* Classifier Registers */ #define MVPP2_CLS_MODE_REG 0x1800 #define MVPP2_CLS_MODE_ACTIVE_MASK BIT(0) @@ -482,6 +493,13 @@ /* Maximum number of TXQs used by single port */ #define MVPP2_MAX_TXQ 8 +/* MVPP2_MAX_TSO_SEGS is the maximum number of fragments to allow in the GSO + * skb. As we need a maxium of two descriptors per fragments (1 header, 1 data), + * multiply this value by two to count the maximum number of skb descs needed. + */ +#define MVPP2_MAX_TSO_SEGS 300 +#define MVPP2_MAX_SKB_DESCS (MVPP2_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) + /* Dfault number of RXQs in use */ #define MVPP2_DEFAULT_RXQ 4 @@ -504,9 +522,17 @@ #define MVPP2_TX_DESC_ALIGN (MVPP2_DESC_ALIGNED_SIZE - 1) /* RX FIFO constants */ -#define MVPP2_RX_FIFO_PORT_DATA_SIZE 0x2000 -#define MVPP2_RX_FIFO_PORT_ATTR_SIZE 0x80 -#define MVPP2_RX_FIFO_PORT_MIN_PKT 0x80 +#define MVPP2_RX_FIFO_PORT_DATA_SIZE_32KB 0x8000 +#define MVPP2_RX_FIFO_PORT_DATA_SIZE_8KB 0x2000 +#define MVPP2_RX_FIFO_PORT_DATA_SIZE_4KB 0x1000 +#define MVPP2_RX_FIFO_PORT_ATTR_SIZE_32KB 0x200 +#define MVPP2_RX_FIFO_PORT_ATTR_SIZE_8KB 0x80 +#define MVPP2_RX_FIFO_PORT_ATTR_SIZE_4KB 0x40 +#define MVPP2_RX_FIFO_PORT_MIN_PKT 0x80 + +/* TX FIFO constants */ +#define MVPP22_TX_FIFO_DATA_SIZE_10KB 0xa +#define MVPP22_TX_FIFO_DATA_SIZE_3KB 0x3 /* RX buffer constants */ #define MVPP2_SKB_SHINFO_SIZE \ @@ -737,6 +763,10 @@ enum mvpp2_prs_l3_cast { #define MVPP2_CLS_FLOWS_TBL_SIZE 512 #define MVPP2_CLS_FLOWS_TBL_DATA_WORDS 3 #define MVPP2_CLS_LKP_TBL_SIZE 64 +#define MVPP2_CLS_RX_QUEUES 256 + +/* RSS constants */ +#define MVPP22_RSS_TABLE_ENTRIES 32 /* BM constants */ #define MVPP2_BM_POOLS_NUM 8 @@ -769,6 +799,42 @@ enum mvpp2_bm_type { MVPP2_BM_SWF_SHORT }; +/* GMAC MIB Counters register definitions */ +#define MVPP21_MIB_COUNTERS_OFFSET 0x1000 +#define MVPP21_MIB_COUNTERS_PORT_SZ 0x400 +#define MVPP22_MIB_COUNTERS_OFFSET 0x0 +#define MVPP22_MIB_COUNTERS_PORT_SZ 0x100 + +#define MVPP2_MIB_GOOD_OCTETS_RCVD 0x0 +#define MVPP2_MIB_BAD_OCTETS_RCVD 0x8 +#define MVPP2_MIB_CRC_ERRORS_SENT 0xc +#define MVPP2_MIB_UNICAST_FRAMES_RCVD 0x10 +#define MVPP2_MIB_BROADCAST_FRAMES_RCVD 0x18 +#define MVPP2_MIB_MULTICAST_FRAMES_RCVD 0x1c +#define MVPP2_MIB_FRAMES_64_OCTETS 0x20 +#define MVPP2_MIB_FRAMES_65_TO_127_OCTETS 0x24 +#define MVPP2_MIB_FRAMES_128_TO_255_OCTETS 0x28 +#define MVPP2_MIB_FRAMES_256_TO_511_OCTETS 0x2c +#define MVPP2_MIB_FRAMES_512_TO_1023_OCTETS 0x30 +#define MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS 0x34 +#define MVPP2_MIB_GOOD_OCTETS_SENT 0x38 +#define MVPP2_MIB_UNICAST_FRAMES_SENT 0x40 +#define MVPP2_MIB_MULTICAST_FRAMES_SENT 0x48 +#define MVPP2_MIB_BROADCAST_FRAMES_SENT 0x4c +#define MVPP2_MIB_FC_SENT 0x54 +#define MVPP2_MIB_FC_RCVD 0x58 +#define MVPP2_MIB_RX_FIFO_OVERRUN 0x5c +#define MVPP2_MIB_UNDERSIZE_RCVD 0x60 +#define MVPP2_MIB_FRAGMENTS_RCVD 0x64 +#define MVPP2_MIB_OVERSIZE_RCVD 0x68 +#define MVPP2_MIB_JABBER_RCVD 0x6c +#define MVPP2_MIB_MAC_RCV_ERROR 0x70 +#define MVPP2_MIB_BAD_CRC_EVENT 0x74 +#define MVPP2_MIB_COLLISION 0x78 +#define MVPP2_MIB_LATE_COLLISION 0x7c + +#define MVPP2_MIB_COUNTERS_STATS_DELAY (1 * HZ) + /* Definitions */ /* Shared Packet Processor resources */ @@ -796,6 +862,7 @@ struct mvpp2 { struct clk *axi_clk; /* List of pointers to port structures */ + int port_count; struct mvpp2_port **port_list; /* Aggregated TXQs */ @@ -817,6 +884,10 @@ struct mvpp2 { /* Maximum number of RXQs per port */ unsigned int max_port_rxqs; + + /* Workqueue to gather hardware statistics */ + char queue_name[30]; + struct workqueue_struct *stats_queue; }; struct mvpp2_pcpu_stats { @@ -861,6 +932,7 @@ struct mvpp2_port { /* Per-port registers' base address */ void __iomem *base; + void __iomem *stats_base; struct mvpp2_rx_queue **rxqs; unsigned int nrxqs; @@ -879,6 +951,11 @@ struct mvpp2_port { u16 tx_ring_size; u16 rx_ring_size; struct mvpp2_pcpu_stats __percpu *stats; + u64 *ethtool_stats; + + /* Per-port work and its lock to gather hardware statistics */ + struct mutex gather_stats_lock; + struct delayed_work stats_work; phy_interface_t phy_interface; struct device_node *phy_node; @@ -1022,6 +1099,9 @@ struct mvpp2_txq_pcpu { */ int count; + int wake_threshold; + int stop_threshold; + /* Number of Tx DMA descriptors reserved for each CPU */ int reserved_num; @@ -1257,13 +1337,20 @@ static void mvpp2_txdesc_dma_addr_set(struct mvpp2_port *port, struct mvpp2_tx_desc *tx_desc, dma_addr_t dma_addr) { + dma_addr_t addr, offset; + + addr = dma_addr & ~MVPP2_TX_DESC_ALIGN; + offset = dma_addr & MVPP2_TX_DESC_ALIGN; + if (port->priv->hw_version == MVPP21) { - tx_desc->pp21.buf_dma_addr = dma_addr; + tx_desc->pp21.buf_dma_addr = addr; + tx_desc->pp21.packet_offset = offset; } else { - u64 val = (u64)dma_addr; + u64 val = (u64)addr; tx_desc->pp22.buf_dma_addr_ptp &= ~GENMASK_ULL(40, 0); tx_desc->pp22.buf_dma_addr_ptp |= val; + tx_desc->pp22.packet_offset = offset; } } @@ -1306,16 +1393,6 @@ static void mvpp2_txdesc_cmd_set(struct mvpp2_port *port, tx_desc->pp22.command = command; } -static void mvpp2_txdesc_offset_set(struct mvpp2_port *port, - struct mvpp2_tx_desc *tx_desc, - unsigned int offset) -{ - if (port->priv->hw_version == MVPP21) - tx_desc->pp21.packet_offset = offset; - else - tx_desc->pp22.packet_offset = offset; -} - static unsigned int mvpp2_txdesc_offset_get(struct mvpp2_port *port, struct mvpp2_tx_desc *tx_desc) { @@ -4748,9 +4825,131 @@ static void mvpp2_port_loopback_set(struct mvpp2_port *port) writel(val, port->base + MVPP2_GMAC_CTRL_1_REG); } +struct mvpp2_ethtool_counter { + unsigned int offset; + const char string[ETH_GSTRING_LEN]; + bool reg_is_64b; +}; + +static u64 mvpp2_read_count(struct mvpp2_port *port, + const struct mvpp2_ethtool_counter *counter) +{ + u64 val; + + val = readl(port->stats_base + counter->offset); + if (counter->reg_is_64b) + val += (u64)readl(port->stats_base + counter->offset + 4) << 32; + + return val; +} + +/* Due to the fact that software statistics and hardware statistics are, by + * design, incremented at different moments in the chain of packet processing, + * it is very likely that incoming packets could have been dropped after being + * counted by hardware but before reaching software statistics (most probably + * multicast packets), and in the oppposite way, during transmission, FCS bytes + * are added in between as well as TSO skb will be split and header bytes added. + * Hence, statistics gathered from userspace with ifconfig (software) and + * ethtool (hardware) cannot be compared. + */ +static const struct mvpp2_ethtool_counter mvpp2_ethtool_regs[] = { + { MVPP2_MIB_GOOD_OCTETS_RCVD, "good_octets_received", true }, + { MVPP2_MIB_BAD_OCTETS_RCVD, "bad_octets_received" }, + { MVPP2_MIB_CRC_ERRORS_SENT, "crc_errors_sent" }, + { MVPP2_MIB_UNICAST_FRAMES_RCVD, "unicast_frames_received" }, + { MVPP2_MIB_BROADCAST_FRAMES_RCVD, "broadcast_frames_received" }, + { MVPP2_MIB_MULTICAST_FRAMES_RCVD, "multicast_frames_received" }, + { MVPP2_MIB_FRAMES_64_OCTETS, "frames_64_octets" }, + { MVPP2_MIB_FRAMES_65_TO_127_OCTETS, "frames_65_to_127_octet" }, + { MVPP2_MIB_FRAMES_128_TO_255_OCTETS, "frames_128_to_255_octet" }, + { MVPP2_MIB_FRAMES_256_TO_511_OCTETS, "frames_256_to_511_octet" }, + { MVPP2_MIB_FRAMES_512_TO_1023_OCTETS, "frames_512_to_1023_octet" }, + { MVPP2_MIB_FRAMES_1024_TO_MAX_OCTETS, "frames_1024_to_max_octet" }, + { MVPP2_MIB_GOOD_OCTETS_SENT, "good_octets_sent", true }, + { MVPP2_MIB_UNICAST_FRAMES_SENT, "unicast_frames_sent" }, + { MVPP2_MIB_MULTICAST_FRAMES_SENT, "multicast_frames_sent" }, + { MVPP2_MIB_BROADCAST_FRAMES_SENT, "broadcast_frames_sent" }, + { MVPP2_MIB_FC_SENT, "fc_sent" }, + { MVPP2_MIB_FC_RCVD, "fc_received" }, + { MVPP2_MIB_RX_FIFO_OVERRUN, "rx_fifo_overrun" }, + { MVPP2_MIB_UNDERSIZE_RCVD, "undersize_received" }, + { MVPP2_MIB_FRAGMENTS_RCVD, "fragments_received" }, + { MVPP2_MIB_OVERSIZE_RCVD, "oversize_received" }, + { MVPP2_MIB_JABBER_RCVD, "jabber_received" }, + { MVPP2_MIB_MAC_RCV_ERROR, "mac_receive_error" }, + { MVPP2_MIB_BAD_CRC_EVENT, "bad_crc_event" }, + { MVPP2_MIB_COLLISION, "collision" }, + { MVPP2_MIB_LATE_COLLISION, "late_collision" }, +}; + +static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset, + u8 *data) +{ + if (sset == ETH_SS_STATS) { + int i; + + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) + memcpy(data + i * ETH_GSTRING_LEN, + &mvpp2_ethtool_regs[i].string, ETH_GSTRING_LEN); + } +} + +static void mvpp2_gather_hw_statistics(struct work_struct *work) +{ + struct delayed_work *del_work = to_delayed_work(work); + struct mvpp2_port *port = container_of(del_work, struct mvpp2_port, + stats_work); + u64 *pstats; + int i; + + mutex_lock(&port->gather_stats_lock); + + pstats = port->ethtool_stats; + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) + *pstats++ += mvpp2_read_count(port, &mvpp2_ethtool_regs[i]); + + /* No need to read again the counters right after this function if it + * was called asynchronously by the user (ie. use of ethtool). + */ + cancel_delayed_work(&port->stats_work); + queue_delayed_work(port->priv->stats_queue, &port->stats_work, + MVPP2_MIB_COUNTERS_STATS_DELAY); + + mutex_unlock(&port->gather_stats_lock); +} + +static void mvpp2_ethtool_get_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mvpp2_port *port = netdev_priv(dev); + + /* Update statistics for the given port, then take the lock to avoid + * concurrent accesses on the ethtool_stats structure during its copy. + */ + mvpp2_gather_hw_statistics(&port->stats_work.work); + + mutex_lock(&port->gather_stats_lock); + memcpy(data, port->ethtool_stats, + sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_regs)); + mutex_unlock(&port->gather_stats_lock); +} + +static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset) +{ + if (sset == ETH_SS_STATS) + return ARRAY_SIZE(mvpp2_ethtool_regs); + + return -EOPNOTSUPP; +} + static void mvpp2_port_reset(struct mvpp2_port *port) { u32 val; + unsigned int i; + + /* Read the GOP statistics to reset the hardware counters */ + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) + mvpp2_read_count(port, &mvpp2_ethtool_regs[i]); val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) & ~MVPP2_GMAC_PORT_RESET_MASK; @@ -5022,7 +5221,7 @@ static void mvpp2_aggr_txq_pend_desc_add(struct mvpp2_port *port, int pending) static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv, struct mvpp2_tx_queue *aggr_txq, int num) { - if ((aggr_txq->count + num) > aggr_txq->size) { + if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE) { /* Update number of occupied aggregated Tx descriptors */ int cpu = smp_processor_id(); u32 val = mvpp2_read(priv, MVPP2_AGGR_TXQ_STATUS_REG(cpu)); @@ -5030,7 +5229,7 @@ static int mvpp2_aggr_desc_num_check(struct mvpp2 *priv, aggr_txq->count = val & MVPP2_AGGR_TXQ_PENDING_MASK; } - if ((aggr_txq->count + num) > aggr_txq->size) + if ((aggr_txq->count + num) > MVPP2_AGGR_TXQ_SIZE) return -ENOMEM; return 0; @@ -5370,7 +5569,7 @@ static void mvpp2_txq_done(struct mvpp2_port *port, struct mvpp2_tx_queue *txq, txq_pcpu->count -= tx_done; if (netif_tx_queue_stopped(nq)) - if (txq_pcpu->size - txq_pcpu->count >= MAX_SKB_FRAGS + 1) + if (txq_pcpu->count <= txq_pcpu->wake_threshold) netif_tx_wake_queue(nq); } @@ -5414,7 +5613,7 @@ static int mvpp2_aggr_txq_init(struct platform_device *pdev, if (!aggr_txq->descs) return -ENOMEM; - aggr_txq->last_desc = aggr_txq->size - 1; + aggr_txq->last_desc = MVPP2_AGGR_TXQ_SIZE - 1; /* Aggr TXQ no reset WA */ aggr_txq->next_desc_to_proc = mvpp2_read(priv, @@ -5613,6 +5812,9 @@ static int mvpp2_txq_init(struct mvpp2_port *port, txq_pcpu->txq_put_index = 0; txq_pcpu->txq_get_index = 0; + txq_pcpu->stop_threshold = txq->size - MVPP2_MAX_SKB_DESCS; + txq_pcpu->wake_threshold = txq_pcpu->stop_threshold / 2; + txq_pcpu->tso_headers = dma_alloc_coherent(port->dev->dev.parent, txq_pcpu->size * TSO_HEADER_SIZE, @@ -6256,10 +6458,7 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb, goto cleanup; } - mvpp2_txdesc_offset_set(port, tx_desc, - buf_dma_addr & MVPP2_TX_DESC_ALIGN); - mvpp2_txdesc_dma_addr_set(port, tx_desc, - buf_dma_addr & ~MVPP2_TX_DESC_ALIGN); + mvpp2_txdesc_dma_addr_set(port, tx_desc, buf_dma_addr); if (i == (skb_shinfo(skb)->nr_frags - 1)) { /* Last descriptor */ @@ -6302,8 +6501,7 @@ static inline void mvpp2_tso_put_hdr(struct sk_buff *skb, addr = txq_pcpu->tso_headers_dma + txq_pcpu->txq_put_index * TSO_HEADER_SIZE; - mvpp2_txdesc_offset_set(port, tx_desc, addr & MVPP2_TX_DESC_ALIGN); - mvpp2_txdesc_dma_addr_set(port, tx_desc, addr & ~MVPP2_TX_DESC_ALIGN); + mvpp2_txdesc_dma_addr_set(port, tx_desc, addr); mvpp2_txdesc_cmd_set(port, tx_desc, mvpp2_skb_tx_csum(port, skb) | MVPP2_TXD_F_DESC | @@ -6332,10 +6530,7 @@ static inline int mvpp2_tso_put_data(struct sk_buff *skb, return -ENOMEM; } - mvpp2_txdesc_offset_set(port, tx_desc, - buf_dma_addr & MVPP2_TX_DESC_ALIGN); - mvpp2_txdesc_dma_addr_set(port, tx_desc, - buf_dma_addr & ~MVPP2_TX_DESC_ALIGN); + mvpp2_txdesc_dma_addr_set(port, tx_desc, buf_dma_addr); if (!left) { mvpp2_txdesc_cmd_set(port, tx_desc, MVPP2_TXD_L_DESC); @@ -6447,10 +6642,7 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev) goto out; } - mvpp2_txdesc_offset_set(port, tx_desc, - buf_dma_addr & MVPP2_TX_DESC_ALIGN); - mvpp2_txdesc_dma_addr_set(port, tx_desc, - buf_dma_addr & ~MVPP2_TX_DESC_ALIGN); + mvpp2_txdesc_dma_addr_set(port, tx_desc, buf_dma_addr); tx_cmd = mvpp2_skb_tx_csum(port, skb); @@ -6469,7 +6661,6 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev) if (mvpp2_tx_frag_process(port, skb, aggr_txq, txq)) { tx_desc_unmap_put(port, txq, tx_desc); frags = 0; - goto out; } } @@ -6486,7 +6677,7 @@ out: wmb(); mvpp2_aggr_txq_pend_desc_add(port, frags); - if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1) + if (txq_pcpu->count >= txq_pcpu->stop_threshold) netif_tx_stop_queue(nq); u64_stats_update_begin(&stats->syncp); @@ -6784,6 +6975,39 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port) } } +static void mvpp22_init_rss(struct mvpp2_port *port) +{ + struct mvpp2 *priv = port->priv; + int i; + + /* Set the table width: replace the whole classifier Rx queue number + * with the ones configured in RSS table entries. + */ + mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(0)); + mvpp2_write(priv, MVPP22_RSS_WIDTH, 8); + + /* Loop through the classifier Rx Queues and map them to a RSS table. + * Map them all to the first table (0) by default. + */ + for (i = 0; i < MVPP2_CLS_RX_QUEUES; i++) { + mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(i)); + mvpp2_write(priv, MVPP22_RSS_TABLE, + MVPP22_RSS_TABLE_POINTER(0)); + } + + /* Configure the first table to evenly distribute the packets across + * real Rx Queues. The table entries map a hash to an port Rx Queue. + */ + for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) { + u32 sel = MVPP22_RSS_INDEX_TABLE(0) | + MVPP22_RSS_INDEX_TABLE_ENTRY(i); + mvpp2_write(priv, MVPP22_RSS_INDEX, sel); + + mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY, i % port->nrxqs); + } + +} + static int mvpp2_open(struct net_device *dev) { struct mvpp2_port *port = netdev_priv(dev); @@ -6858,6 +7082,13 @@ static int mvpp2_open(struct net_device *dev) mvpp2_start_dev(port); + if (priv->hw_version == MVPP22) + mvpp22_init_rss(port); + + /* Start hardware statistics gathering */ + queue_delayed_work(priv->stats_queue, &port->stats_work, + MVPP2_MIB_COUNTERS_STATS_DELAY); + return 0; err_free_link_irq: @@ -6902,6 +7133,8 @@ static int mvpp2_stop(struct net_device *dev) mvpp2_cleanup_rxqs(port); mvpp2_cleanup_txqs(port); + cancel_delayed_work_sync(&port->stats_work); + return 0; } @@ -7213,6 +7446,9 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = { .get_drvinfo = mvpp2_ethtool_get_drvinfo, .get_ringparam = mvpp2_ethtool_get_ringparam, .set_ringparam = mvpp2_ethtool_set_ringparam, + .get_strings = mvpp2_ethtool_get_strings, + .get_ethtool_stats = mvpp2_ethtool_get_stats, + .get_sset_count = mvpp2_ethtool_get_sset_count, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, }; @@ -7616,6 +7852,10 @@ static int mvpp2_port_probe(struct platform_device *pdev, err = PTR_ERR(port->base); goto err_free_irq; } + + port->stats_base = port->priv->lms_base + + MVPP21_MIB_COUNTERS_OFFSET + + port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ; } else { if (of_property_read_u32(port_node, "gop-port-id", &port->gop_id)) { @@ -7625,15 +7865,29 @@ static int mvpp2_port_probe(struct platform_device *pdev, } port->base = priv->iface_base + MVPP22_GMAC_BASE(port->gop_id); + port->stats_base = port->priv->iface_base + + MVPP22_MIB_COUNTERS_OFFSET + + port->gop_id * MVPP22_MIB_COUNTERS_PORT_SZ; } - /* Alloc per-cpu stats */ + /* Alloc per-cpu and ethtool stats */ port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats); if (!port->stats) { err = -ENOMEM; goto err_free_irq; } + port->ethtool_stats = devm_kcalloc(&pdev->dev, + ARRAY_SIZE(mvpp2_ethtool_regs), + sizeof(u64), GFP_KERNEL); + if (!port->ethtool_stats) { + err = -ENOMEM; + goto err_free_stats; + } + + mutex_init(&port->gather_stats_lock); + INIT_DELAYED_WORK(&port->stats_work, mvpp2_gather_hw_statistics); + mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from); port->tx_ring_size = MVPP2_MAX_TXD; @@ -7678,6 +7932,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->features = features | NETIF_F_RXCSUM; dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO; dev->vlan_features |= features; + dev->gso_max_segs = MVPP2_MAX_TSO_SEGS; /* MTU range: 68 - 9676 */ dev->min_mtu = ETH_MIN_MTU; @@ -7769,9 +8024,9 @@ static void mvpp2_rx_fifo_init(struct mvpp2 *priv) for (port = 0; port < MVPP2_MAX_PORTS; port++) { mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(port), - MVPP2_RX_FIFO_PORT_DATA_SIZE); + MVPP2_RX_FIFO_PORT_DATA_SIZE_4KB); mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(port), - MVPP2_RX_FIFO_PORT_ATTR_SIZE); + MVPP2_RX_FIFO_PORT_ATTR_SIZE_4KB); } mvpp2_write(priv, MVPP2_RX_MIN_PKT_SIZE_REG, @@ -7779,6 +8034,49 @@ static void mvpp2_rx_fifo_init(struct mvpp2 *priv) mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1); } +static void mvpp22_rx_fifo_init(struct mvpp2 *priv) +{ + int port; + + /* The FIFO size parameters are set depending on the maximum speed a + * given port can handle: + * - Port 0: 10Gbps + * - Port 1: 2.5Gbps + * - Ports 2 and 3: 1Gbps + */ + + mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(0), + MVPP2_RX_FIFO_PORT_DATA_SIZE_32KB); + mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(0), + MVPP2_RX_FIFO_PORT_ATTR_SIZE_32KB); + + mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(1), + MVPP2_RX_FIFO_PORT_DATA_SIZE_8KB); + mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(1), + MVPP2_RX_FIFO_PORT_ATTR_SIZE_8KB); + + for (port = 2; port < MVPP2_MAX_PORTS; port++) { + mvpp2_write(priv, MVPP2_RX_DATA_FIFO_SIZE_REG(port), + MVPP2_RX_FIFO_PORT_DATA_SIZE_4KB); + mvpp2_write(priv, MVPP2_RX_ATTR_FIFO_SIZE_REG(port), + MVPP2_RX_FIFO_PORT_ATTR_SIZE_4KB); + } + + mvpp2_write(priv, MVPP2_RX_MIN_PKT_SIZE_REG, + MVPP2_RX_FIFO_PORT_MIN_PKT); + mvpp2_write(priv, MVPP2_RX_FIFO_INIT_REG, 0x1); +} + +/* Initialize Tx FIFO's */ +static void mvpp22_tx_fifo_init(struct mvpp2 *priv) +{ + int port; + + for (port = 0; port < MVPP2_MAX_PORTS; port++) + mvpp2_write(priv, MVPP22_TX_FIFO_SIZE_REG(port), + MVPP22_TX_FIFO_DATA_SIZE_3KB); +} + static void mvpp2_axi_init(struct mvpp2 *priv) { u32 val, rdval, wrval; @@ -7874,8 +8172,13 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv) return err; } - /* Rx Fifo Init */ - mvpp2_rx_fifo_init(priv); + /* Fifo Init */ + if (priv->hw_version == MVPP21) { + mvpp2_rx_fifo_init(priv); + } else { + mvpp22_rx_fifo_init(priv); + mvpp22_tx_fifo_init(priv); + } if (priv->hw_version == MVPP21) writel(MVPP2_EXT_GLOBAL_CTRL_DEFAULT, @@ -7907,7 +8210,7 @@ static int mvpp2_probe(struct platform_device *pdev) struct mvpp2 *priv; struct resource *res; void __iomem *base; - int port_count, i; + int i; int err; priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); @@ -8022,14 +8325,14 @@ static int mvpp2_probe(struct platform_device *pdev) goto err_mg_clk; } - port_count = of_get_available_child_count(dn); - if (port_count == 0) { + priv->port_count = of_get_available_child_count(dn); + if (priv->port_count == 0) { dev_err(&pdev->dev, "no ports enabled\n"); err = -ENODEV; goto err_mg_clk; } - priv->port_list = devm_kcalloc(&pdev->dev, port_count, + priv->port_list = devm_kcalloc(&pdev->dev, priv->port_count, sizeof(*priv->port_list), GFP_KERNEL); if (!priv->port_list) { @@ -8046,6 +8349,21 @@ static int mvpp2_probe(struct platform_device *pdev) i++; } + /* Statistics must be gathered regularly because some of them (like + * packets counters) are 32-bit registers and could overflow quite + * quickly. For instance, a 10Gb link used at full bandwidth with the + * smallest packets (64B) will overflow a 32-bit counter in less than + * 30 seconds. Then, use a workqueue to fill 64-bit counters. + */ + snprintf(priv->queue_name, sizeof(priv->queue_name), + "stats-wq-%s%s", netdev_name(priv->port_list[0]->dev), + priv->port_count > 1 ? "+" : ""); + priv->stats_queue = create_singlethread_workqueue(priv->queue_name); + if (!priv->stats_queue) { + err = -ENOMEM; + goto err_mg_clk; + } + platform_set_drvdata(pdev, priv); return 0; @@ -8067,9 +8385,14 @@ static int mvpp2_remove(struct platform_device *pdev) struct device_node *port_node; int i = 0; + flush_workqueue(priv->stats_queue); + destroy_workqueue(priv->stats_queue); + for_each_available_child_of_node(dn, port_node) { - if (priv->port_list[i]) + if (priv->port_list[i]) { + mutex_destroy(&priv->port_list[i]->gather_stats_lock); mvpp2_port_remove(priv->port_list[i]); + } i++; } diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 993724959a7c..91b1c154fd29 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1496,9 +1496,8 @@ static int pxa168_eth_probe(struct platform_device *pdev) netif_napi_add(dev, &pep->napi, pxa168_rx_poll, pep->rx_ring_size); memset(&pep->timeout, 0, sizeof(struct timer_list)); - init_timer(&pep->timeout); - pep->timeout.function = rxq_refill_timer_wrapper; - pep->timeout.data = (unsigned long)pep; + setup_timer(&pep->timeout, rxq_refill_timer_wrapper, + (unsigned long)pep); pep->smi_bus = mdiobus_alloc(); if (!pep->smi_bus) { diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 5e81a7263654..54adfd967858 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1817,7 +1817,7 @@ static int mtk_open(struct net_device *dev) struct mtk_eth *eth = mac->hw; /* we run 2 netdevs on the same dma ring so we only bring it up once */ - if (!atomic_read(ð->dma_refcnt)) { + if (!refcount_read(ð->dma_refcnt)) { int err = mtk_start_dma(eth); if (err) @@ -1827,8 +1827,10 @@ static int mtk_open(struct net_device *dev) napi_enable(ð->rx_napi); mtk_tx_irq_enable(eth, MTK_TX_DONE_INT); mtk_rx_irq_enable(eth, MTK_RX_DONE_INT); + refcount_set(ð->dma_refcnt, 1); } - atomic_inc(ð->dma_refcnt); + else + refcount_inc(ð->dma_refcnt); phy_start(dev->phydev); netif_start_queue(dev); @@ -1868,7 +1870,7 @@ static int mtk_stop(struct net_device *dev) phy_stop(dev->phydev); /* only shutdown DMA if this is the last user */ - if (!atomic_dec_and_test(ð->dma_refcnt)) + if (!refcount_dec_and_test(ð->dma_refcnt)) return 0; mtk_tx_irq_disable(eth, MTK_TX_DONE_INT); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 3d3c24a28112..a3af4660de81 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -15,6 +15,8 @@ #ifndef MTK_ETH_H #define MTK_ETH_H +#include <linux/refcount.h> + #define MTK_QDMA_PAGE_SIZE 2048 #define MTK_MAX_RX_LENGTH 1536 #define MTK_TX_DMA_BUF_LEN 0x3fff @@ -632,7 +634,7 @@ struct mtk_eth { struct regmap *pctl; u32 chip_id; bool hwlro; - atomic_t dma_refcnt; + refcount_t dma_refcnt; struct mtk_tx_ring tx_ring; struct mtk_rx_ring rx_ring[MTK_MAX_RX_RING_NUM]; struct mtk_rx_ring rx_ring_qdma; diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 22b1cc012bc9..36054e6fb9d3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -38,3 +38,11 @@ config MLX4_DEBUG mlx4_core driver. The output can be turned on via the debug_level module parameter (which can also be set after the driver is loaded through sysfs). + +config MLX4_CORE_GEN2 + bool "Support for old gen2 Mellanox PCI IDs" if (MLX4_CORE) + depends on MLX4_CORE + default y + ---help--- + Say Y here if you want to use old gen2 Mellanox devices in the + driver. diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 53daa6ca5d83..e2b6b0cac1ac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -231,10 +231,10 @@ static void dump_err_buf(struct mlx4_dev *dev) i, swab32(readl(priv->catas_err.map + i))); } -static void poll_catas(unsigned long dev_ptr) +static void poll_catas(struct timer_list *t) { - struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; - struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_priv *priv = from_timer(priv, t, catas_err.timer); + struct mlx4_dev *dev = &priv->dev; u32 slave_read; if (mlx4_is_slave(dev)) { @@ -277,7 +277,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) phys_addr_t addr; INIT_LIST_HEAD(&priv->catas_err.list); - init_timer(&priv->catas_err.timer); + timer_setup(&priv->catas_err.timer, poll_catas, 0); priv->catas_err.map = NULL; if (!mlx4_is_slave(dev)) { @@ -293,8 +293,6 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) } } - priv->catas_err.timer.data = (unsigned long) dev; - priv->catas_err.timer.function = poll_catas; priv->catas_err.timer.expires = round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL); add_timer(&priv->catas_err.timer); diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 72eb50cd5ecd..d8e9a323122e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -69,7 +69,7 @@ void mlx4_cq_tasklet_cb(unsigned long data) list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) { list_del_init(&mcq->tasklet_ctx.list); mcq->tasklet_ctx.comp(mcq); - if (atomic_dec_and_test(&mcq->refcount)) + if (refcount_dec_and_test(&mcq->refcount)) complete(&mcq->free); if (time_after(jiffies, end)) break; @@ -92,7 +92,7 @@ static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq) * still arrive. */ if (list_empty_careful(&cq->tasklet_ctx.list)) { - atomic_inc(&cq->refcount); + refcount_inc(&cq->refcount); kick = list_empty(&tasklet_ctx->list); list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); if (kick) @@ -344,7 +344,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq->cons_index = 0; cq->arm_sn = 1; cq->uar = uar; - atomic_set(&cq->refcount, 1); + refcount_set(&cq->refcount, 1); init_completion(&cq->free); cq->comp = mlx4_add_cq_to_tasklet; cq->tasklet_ctx.priv = @@ -386,7 +386,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq) priv->eq_table.eq[MLX4_EQ_ASYNC].irq) synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq); - if (atomic_dec_and_test(&cq->refcount)) + if (refcount_dec_and_test(&cq->refcount)) complete(&cq->free); wait_for_completion(&cq->free); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 3d4e4a5d00d1..bf1f04164885 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1742,13 +1742,18 @@ static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) return err; } +static int mlx4_en_get_max_num_rx_rings(struct net_device *dev) +{ + return min_t(int, num_online_cpus(), MAX_RX_RINGS); +} + static void mlx4_en_get_channels(struct net_device *dev, struct ethtool_channels *channel) { struct mlx4_en_priv *priv = netdev_priv(dev); - channel->max_rx = MAX_RX_RINGS; - channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP; + channel->max_rx = mlx4_en_get_max_num_rx_rings(dev); + channel->max_tx = priv->mdev->profile.max_num_tx_rings_p_up; channel->rx_count = priv->rx_ring_num; channel->tx_count = priv->tx_ring_num[TX] / @@ -1777,7 +1782,7 @@ static int mlx4_en_set_channels(struct net_device *dev, mutex_lock(&mdev->state_lock); xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0; if (channel->tx_count * priv->prof->num_up + xdp_count > - MAX_TX_RINGS) { + priv->mdev->profile.max_num_tx_rings_p_up * priv->prof->num_up) { err = -EINVAL; en_err(priv, "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 686e18de9a97..2c2965497ed3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -153,7 +153,7 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) int i; params->udp_rss = udp_rss; - params->num_tx_rings_p_up = mlx4_low_memory_profile() ? + params->max_num_tx_rings_p_up = mlx4_low_memory_profile() ? MLX4_EN_MIN_TX_RING_P_UP : min_t(int, num_online_cpus(), MLX4_EN_MAX_TX_RING_P_UP); @@ -170,8 +170,8 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; params->prof[i].num_up = MLX4_EN_NUM_UP_LOW; - params->prof[i].num_tx_rings_p_up = params->num_tx_rings_p_up; - params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up * + params->prof[i].num_tx_rings_p_up = params->max_num_tx_rings_p_up; + params->prof[i].tx_ring_num[TX] = params->max_num_tx_rings_p_up * params->prof[i].num_up; params->prof[i].rss_rings = 0; params->prof[i].inline_thold = inline_thold; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 9c218f1cfc6c..99051a294fa6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -135,7 +135,7 @@ static int __mlx4_en_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct tc_mqprio_qopt *mqprio = type_data; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; if (mqprio->num_tc && mqprio->num_tc != MLX4_EN_NUM_UP_HIGH) @@ -1752,6 +1752,7 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_en_arm_cq(priv, cq); } else { + mlx4_en_init_tx_xdp_ring_descs(priv, tx_ring); mlx4_en_init_recycle_ring(priv, i); /* XDP TX CQ should never be armed */ } @@ -2915,7 +2916,7 @@ static u32 mlx4_xdp_query(struct net_device *dev) return prog_id; } -static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int mlx4_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: @@ -2957,7 +2958,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, - .ndo_xdp = mlx4_xdp, + .ndo_bpf = mlx4_xdp, }; static const struct net_device_ops mlx4_netdev_ops_master = { @@ -2994,7 +2995,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, - .ndo_xdp = mlx4_xdp, + .ndo_bpf = mlx4_xdp, }; struct mlx4_en_bond { @@ -3305,7 +3306,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->pflags = MLX4_EN_PRIV_FLAGS_BLUEFLAME; priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | MLX4_WQE_CTRL_SOLICITED); - priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; + priv->num_tx_rings_p_up = mdev->profile.max_num_tx_rings_p_up; priv->tx_work_limit = MLX4_EN_DEFAULT_TX_WORK; netdev_rss_key_fill(priv->rss_key, sizeof(priv->rss_key)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 5a47f9669621..6883ac75d37f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -53,7 +53,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, if (is_tx) { context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4); if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP) - context->params2 |= MLX4_QP_BIT_FPP; + context->params2 |= cpu_to_be32(MLX4_QP_BIT_FPP); } else { context->sq_size_stride = ilog2(TXBB_SIZE) - 4; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b97a55c827eb..85e28efcda33 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -193,7 +193,7 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) if (mlx4_en_prepare_rx_desc(priv, ring, ring->actual_size, - GFP_KERNEL | __GFP_COLD)) { + GFP_KERNEL)) { if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { en_err(priv, "Failed to allocate enough rx buffers\n"); return -ENOMEM; @@ -254,8 +254,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev) DEF_RX_RINGS)); num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS : - min_t(int, num_of_eqs, - netif_get_num_default_rss_queues()); + min_t(int, num_of_eqs, num_online_cpus()); mdev->profile.prof[i].rx_ring_num = rounddown_pow_of_two(num_rx_rings); } @@ -552,8 +551,7 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, do { if (mlx4_en_prepare_rx_desc(priv, ring, ring->prod & ring->size_mask, - GFP_ATOMIC | __GFP_COLD | - __GFP_MEMALLOC)) + GFP_ATOMIC | __GFP_MEMALLOC)) break; ring->prod++; } while (likely(--missing)); @@ -762,6 +760,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud xdp.data_hard_start = va - frags[0].page_offset; xdp.data = va; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + length; orig_data = xdp.data; @@ -778,7 +777,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_PASS: break; case XDP_TX: - if (likely(!mlx4_en_xmit_frame(ring, frags, dev, + if (likely(!mlx4_en_xmit_frame(ring, frags, priv, length, cq_ring, &doorbell_pending))) { frags[0].page = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 3541a7f9d12e..6b6853773848 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -718,7 +718,7 @@ void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring) #else iowrite32be( #endif - ring->doorbell_qpn, + (__force u32)ring->doorbell_qpn, ring->bf.uar->map + MLX4_SEND_DOORBELL); } @@ -1085,13 +1085,35 @@ tx_drop: #define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \ / 16) & 0x3f) +void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring) +{ + int i; + + for (i = 0; i < ring->size; i++) { + struct mlx4_en_tx_info *tx_info = &ring->tx_info[i]; + struct mlx4_en_tx_desc *tx_desc = ring->buf + + (i << LOG_TXBB_SIZE); + + tx_info->map0_byte_count = PAGE_SIZE; + tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB; + tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data); + tx_info->ts_requested = 0; + tx_info->nr_maps = 1; + tx_info->linear = 1; + tx_info->inl = 0; + + tx_desc->data.lkey = ring->mr_key; + tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ; + tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; + } +} + netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, - struct net_device *dev, unsigned int length, + struct mlx4_en_priv *priv, unsigned int length, int tx_ind, bool *doorbell_pending) { - struct mlx4_en_priv *priv = netdev_priv(dev); - union mlx4_wqe_qpn_vlan qpn_vlan = {}; struct mlx4_en_tx_desc *tx_desc; struct mlx4_en_tx_info *tx_info; struct mlx4_wqe_data_seg *data; @@ -1123,25 +1145,16 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, tx_info->page = frame->page; frame->page = NULL; tx_info->map0_dma = dma; - tx_info->map0_byte_count = PAGE_SIZE; - tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB; tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); - tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data); - tx_info->ts_requested = 0; - tx_info->nr_maps = 1; - tx_info->linear = 1; - tx_info->inl = 0; dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset, length, PCI_DMA_TODEVICE); data->addr = cpu_to_be64(dma + frame->page_offset); - data->lkey = ring->mr_key; dma_wmb(); data->byte_count = cpu_to_be32(length); /* tx completion can avoid cache line miss for common cases */ - tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; op_own = cpu_to_be32(MLX4_OPCODE_SEND) | ((ring->prod & ring->size) ? @@ -1152,10 +1165,13 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, ring->prod += MLX4_EN_XDP_TX_NRTXBB; - qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ; + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + ring->xmit_more++; - mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0, - op_own, false, false); *doorbell_pending = true; return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 16c09949afd5..634f603f941c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -57,12 +57,12 @@ MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)"); #define MLX4_GET(dest, source, offset) \ do { \ void *__p = (char *) (source) + (offset); \ - u64 val; \ - switch (sizeof(dest)) { \ + __be64 val; \ + switch (sizeof(dest)) { \ case 1: (dest) = *(u8 *) __p; break; \ case 2: (dest) = be16_to_cpup(__p); break; \ case 4: (dest) = be32_to_cpup(__p); break; \ - case 8: val = get_unaligned((u64 *)__p); \ + case 8: val = get_unaligned((__be64 *)__p); \ (dest) = be64_to_cpu(val); break; \ default: __buggy_use_of_MLX4_GET(); \ } \ diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index e61c99ef741d..4d84cab77105 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4066,6 +4066,7 @@ int mlx4_restart_one(struct pci_dev *pdev) #define MLX_GN(id) { PCI_VDEVICE(MELLANOX, id), 0 } static const struct pci_device_id mlx4_pci_table[] = { +#ifdef CONFIG_MLX4_CORE_GEN2 /* MT25408 "Hermon" */ MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_SDR), /* SDR */ MLX_SP(PCI_DEVICE_ID_MELLANOX_HERMON_DDR), /* DDR */ @@ -4085,6 +4086,7 @@ static const struct pci_device_id mlx4_pci_table[] = { MLX_SP(PCI_DEVICE_ID_MELLANOX_CONNECTX2), /* MT25400 Family [ConnectX-2] */ MLX_VF(0x1002), /* Virtual Function */ +#endif /* CONFIG_MLX4_CORE_GEN2 */ /* MT27500 Family [ConnectX-3] */ MLX_GN(PCI_DEVICE_ID_MELLANOX_CONNECTX3), MLX_VF(0x1004), /* Virtual Function */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index fdb3ad0cbe54..1856e279a7e0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -399,7 +399,7 @@ struct mlx4_en_profile { u32 active_ports; u32 small_pkt_int; u8 no_reset; - u8 num_tx_rings_p_up; + u8 max_num_tx_rings_p_up; struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1]; }; @@ -693,7 +693,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_rx_alloc *frame, - struct net_device *dev, unsigned int length, + struct mlx4_en_priv *priv, unsigned int length, int tx_ind, bool *doorbell_pending); void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, @@ -705,6 +705,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, int node, int queue_index); void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring); +void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring); int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, int cq, int user_prio); diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 728a2fb1f5c0..769598f7b6c8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -55,7 +55,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) qp = __mlx4_qp_lookup(dev, qpn); if (qp) - atomic_inc(&qp->refcount); + refcount_inc(&qp->refcount); spin_unlock(&qp_table->lock); @@ -66,7 +66,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) qp->event(qp, event_type); - if (atomic_dec_and_test(&qp->refcount)) + if (refcount_dec_and_test(&qp->refcount)) complete(&qp->free); } @@ -420,7 +420,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) if (err) goto err_icm; - atomic_set(&qp->refcount, 1); + refcount_set(&qp->refcount, 1); init_completion(&qp->free); return 0; @@ -520,7 +520,7 @@ EXPORT_SYMBOL_GPL(mlx4_qp_remove); void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp) { - if (atomic_dec_and_test(&qp->refcount)) + if (refcount_dec_and_test(&qp->refcount)) complete(&qp->free); wait_for_completion(&qp->free); @@ -925,7 +925,7 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, context->flags &= cpu_to_be32(~(0xf << 28)); context->flags |= cpu_to_be32(states[i + 1] << 28); if (states[i + 1] != MLX4_QP_STATE_RTR) - context->params2 &= ~MLX4_QP_BIT_FPP; + context->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP); err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1], context, 0, 0, qp); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index fabb53379727..04304dd894c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -3185,7 +3185,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev, optpar = be32_to_cpu(*(__be32 *) inbox->buf); if (slave != mlx4_master_func_num(dev)) { - qp_ctx->params2 &= ~MLX4_QP_BIT_FPP; + qp_ctx->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP); /* setting QP rate-limit is disallowed for VFs */ if (qp_ctx->rate_limit_params) return -EPERM; diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index bedf52126824..cbe4d9746ddf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -49,7 +49,7 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); rcu_read_unlock(); if (srq) - atomic_inc(&srq->refcount); + refcount_inc(&srq->refcount); else { mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); return; @@ -57,7 +57,7 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) srq->event(srq, event_type); - if (atomic_dec_and_test(&srq->refcount)) + if (refcount_dec_and_test(&srq->refcount)) complete(&srq->free); } @@ -203,7 +203,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd, if (err) goto err_radix; - atomic_set(&srq->refcount, 1); + refcount_set(&srq->refcount, 1); init_completion(&srq->free); return 0; @@ -232,7 +232,7 @@ void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq) radix_tree_delete(&srq_table->tree, srq->srqn); spin_unlock_irq(&srq_table->lock); - if (atomic_dec_and_test(&srq->refcount)) + if (refcount_dec_and_test(&srq->refcount)) complete(&srq->free); wait_for_completion(&srq->free); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index fdaef00465d7..25deaa5a534c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -6,6 +6,7 @@ config MLX5_CORE tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver" depends on MAY_USE_DEVLINK depends on PCI + imply PTP_1588_CLOCK default n ---help--- Core driver for low level functionality of the ConnectX-4 and @@ -29,7 +30,6 @@ config MLX5_CORE_EN bool "Mellanox Technologies ConnectX-4 Ethernet support" depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE depends on IPV6=y || IPV6=n || MLX5_CORE=m - imply PTP_1588_CLOCK default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 714dd0dc5eef..19b21b40ab07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -5,7 +5,7 @@ subdir-ccflags-y += -I$(src) mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ + fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o lib/clock.o \ diag/fs_tracepoint.o mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o @@ -14,7 +14,7 @@ mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ fpga/ipsec.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ - en_tx.o en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ + en_tx.o en_rx.o en_rx_am.o en_txrx.o en_stats.o vxlan.o \ en_arfs.o en_fs_ethtool.o en_selftest.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o @@ -23,7 +23,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o en_rep.o en_tc. mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o -mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o +mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ en_accel/ipsec_stats.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 336d4738b807..1016e05c7ec7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -58,7 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data) tasklet_ctx.list) { list_del_init(&mcq->tasklet_ctx.list); mcq->tasklet_ctx.comp(mcq); - if (atomic_dec_and_test(&mcq->refcount)) + if (refcount_dec_and_test(&mcq->refcount)) complete(&mcq->free); if (time_after(jiffies, end)) break; @@ -80,7 +80,7 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) * still arrive. */ if (list_empty_careful(&cq->tasklet_ctx.list)) { - atomic_inc(&cq->refcount); + refcount_inc(&cq->refcount); list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); } spin_unlock_irqrestore(&tasklet_ctx->lock, flags); @@ -94,7 +94,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) spin_lock(&table->lock); cq = radix_tree_lookup(&table->tree, cqn); if (likely(cq)) - atomic_inc(&cq->refcount); + refcount_inc(&cq->refcount); spin_unlock(&table->lock); if (!cq) { @@ -106,7 +106,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn) cq->comp(cq); - if (atomic_dec_and_test(&cq->refcount)) + if (refcount_dec_and_test(&cq->refcount)) complete(&cq->free); } @@ -119,7 +119,7 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type) cq = radix_tree_lookup(&table->tree, cqn); if (cq) - atomic_inc(&cq->refcount); + refcount_inc(&cq->refcount); spin_unlock(&table->lock); @@ -130,7 +130,7 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type) cq->event(cq, event_type); - if (atomic_dec_and_test(&cq->refcount)) + if (refcount_dec_and_test(&cq->refcount)) complete(&cq->free); } @@ -159,7 +159,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->cqn = MLX5_GET(create_cq_out, out, cqn); cq->cons_index = 0; cq->arm_sn = 0; - atomic_set(&cq->refcount, 1); + refcount_set(&cq->refcount, 1); init_completion(&cq->free); if (!cq->comp) cq->comp = mlx5_add_cq_to_tasklet; @@ -222,7 +222,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) synchronize_irq(cq->irqn); mlx5_debug_cq_remove(dev, cq); - if (atomic_dec_and_test(&cq->refcount)) + if (refcount_dec_and_test(&cq->refcount)) complete(&cq->free); wait_for_completion(&cq->free); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 13b5ef9d8703..c0872b3284cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -57,6 +57,7 @@ #define MLX5E_HW2SW_MTU(priv, hwmtu) ((hwmtu) - ((priv)->hard_mtu)) #define MLX5E_SW2HW_MTU(priv, swmtu) ((swmtu) + ((priv)->hard_mtu)) +#define MLX5E_MAX_DSCP 64 #define MLX5E_MAX_NUM_TC 8 #define MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE 0x6 @@ -105,6 +106,7 @@ #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 @@ -126,6 +128,16 @@ #define MLX5E_NUM_MAIN_GROUPS 9 +#define MLX5E_MSG_LEVEL NETIF_MSG_LINK + +#define mlx5e_dbg(mlevel, priv, format, ...) \ +do { \ + if (NETIF_MSG_##mlevel & (priv)->msglevel) \ + netdev_warn(priv->netdev, format, \ + ##__VA_ARGS__); \ +} while (0) + + static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) { switch (wq_type) { @@ -187,12 +199,14 @@ extern const char mlx5e_self_tests[][ETH_GSTRING_LEN]; static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { "rx_cqe_moder", + "tx_cqe_moder", "rx_cqe_compress", }; enum mlx5e_priv_flag { MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0), - MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 1), + MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1), + MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2), }; #define MLX5E_SET_PFLAG(params, pflag, enable) \ @@ -212,6 +226,7 @@ enum mlx5e_priv_flag { struct mlx5e_cq_moder { u16 usec; u16 pkts; + u8 cq_period_mode; }; struct mlx5e_params { @@ -223,7 +238,6 @@ struct mlx5e_params { u8 log_rq_size; u16 num_channels; u8 num_tc; - u8 rx_cq_period_mode; bool rx_cqe_compress_def; struct mlx5e_cq_moder rx_cq_moderation; struct mlx5e_cq_moder tx_cq_moderation; @@ -260,34 +274,18 @@ enum { struct mlx5e_dcbx { enum mlx5_dcbx_oper_mode mode; struct mlx5e_cee_config cee_cfg; /* pending configuration */ + u8 dscp_app_cnt; /* The only setting that cannot be read from FW */ u8 tc_tsa[IEEE_8021QAZ_MAX_TCS]; u8 cap; }; -#endif -#define MAX_PIN_NUM 8 -struct mlx5e_pps { - u8 pin_caps[MAX_PIN_NUM]; - struct work_struct out_work; - u64 start[MAX_PIN_NUM]; - u8 enabled; -}; - -struct mlx5e_tstamp { - rwlock_t lock; - struct cyclecounter cycles; - struct timecounter clock; - struct hwtstamp_config hwtstamp_config; - u32 nominal_c_mult; - unsigned long overflow_period; - struct delayed_work overflow_work; - struct mlx5_core_dev *mdev; - struct ptp_clock *ptp; - struct ptp_clock_info ptp_info; - struct mlx5e_pps pps_info; +struct mlx5e_dcbx_dp { + u8 dscp2prio[MLX5E_MAX_DSCP]; + u8 trust_state; }; +#endif enum { MLX5E_RQ_STATE_ENABLED, @@ -375,9 +373,10 @@ struct mlx5e_txqsq { u8 min_inline_mode; u16 edge; struct device *pdev; - struct mlx5e_tstamp *tstamp; __be32 mkey_be; unsigned long state; + struct hwtstamp_config *tstamp; + struct mlx5_clock *clock; /* control path */ struct mlx5_wq_ctrl wq_ctrl; @@ -543,10 +542,11 @@ struct mlx5e_rq { struct mlx5e_channel *channel; struct device *pdev; struct net_device *netdev; - struct mlx5e_tstamp *tstamp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; struct mlx5e_page_cache page_cache; + struct hwtstamp_config *tstamp; + struct mlx5_clock *clock; mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_post_rx_wqes post_wqes; @@ -588,7 +588,7 @@ struct mlx5e_channel { /* control */ struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; - struct mlx5e_tstamp *tstamp; + struct hwtstamp_config *tstamp; int ix; }; @@ -655,12 +655,14 @@ struct mlx5e_tc_table { struct mlx5e_vlan_table { struct mlx5e_flow_table ft; - unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - struct mlx5_flow_handle *active_vlans_rule[VLAN_N_VID]; + DECLARE_BITMAP(active_cvlans, VLAN_N_VID); + DECLARE_BITMAP(active_svlans, VLAN_N_VID); + struct mlx5_flow_handle *active_cvlans_rule[VLAN_N_VID]; + struct mlx5_flow_handle *active_svlans_rule[VLAN_N_VID]; struct mlx5_flow_handle *untagged_rule; struct mlx5_flow_handle *any_cvlan_rule; struct mlx5_flow_handle *any_svlan_rule; - bool filter_disabled; + bool cvlan_filter_disabled; }; struct mlx5e_l2_table { @@ -762,8 +764,12 @@ struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; int channel_tc2txq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; +#ifdef CONFIG_MLX5_CORE_EN_DCB + struct mlx5e_dcbx_dp dcbx_dp; +#endif /* priv data path fields - end */ + u32 msglevel; unsigned long state; struct mutex state_lock; /* Protects Interface state */ struct mlx5e_rq drop_rq; @@ -789,7 +795,7 @@ struct mlx5e_priv { struct mlx5_core_dev *mdev; struct net_device *netdev; struct mlx5e_stats stats; - struct mlx5e_tstamp tstamp; + struct hwtstamp_config tstamp; u16 q_counter; #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx dcbx; @@ -820,6 +826,8 @@ struct mlx5e_profile { mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe; } rx_handlers; + void (*netdev_registered_init)(struct mlx5e_priv *priv); + void (*netdev_registered_remove)(struct mlx5e_priv *priv); int max_tc; }; @@ -873,12 +881,6 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_work(struct work_struct *work); -void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp, - struct skb_shared_hwtstamps *hwts); -void mlx5e_timestamp_init(struct mlx5e_priv *priv); -void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv); -void mlx5e_pps_event_handler(struct mlx5e_priv *priv, - struct ptp_clock_event *event); int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr); int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr); int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); @@ -887,8 +889,9 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); -void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); -void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); +void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv); +void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); +void mlx5e_timestamp_set(struct mlx5e_priv *priv); struct mlx5e_redirect_rqt_param { bool is_rss; @@ -928,6 +931,8 @@ void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels); int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, + u8 cq_period_mode); void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, @@ -993,6 +998,8 @@ extern const struct ethtool_ops mlx5e_ethtool_ops; extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets); void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv); +void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv); +void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv); #endif #ifndef CONFIG_RFS_ACCEL @@ -1045,6 +1052,9 @@ void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); int mlx5e_create_ttc_table(struct mlx5e_priv *priv); void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); +int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv); +void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv); + int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, u32 underlay_qpn, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); @@ -1081,6 +1091,9 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash); +int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv); + /* mlx5e generic netdev management API */ struct net_device* mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, @@ -1091,5 +1104,5 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u16 max_channels); - +u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 4614ddfa91eb..6a7c8b04447e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -256,7 +256,7 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, goto drop; } mdata = mlx5e_ipsec_add_metadata(skb); - if (unlikely(IS_ERR(mdata))) { + if (IS_ERR(mdata)) { atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata); goto drop; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 12d3ced61114..610d485c4b03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -92,7 +92,7 @@ static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) static int arfs_disable(struct mlx5e_priv *priv) { - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5e_tir *tir = priv->indir_tir; int err = 0; int tt; @@ -126,7 +126,7 @@ int mlx5e_arfs_disable(struct mlx5e_priv *priv) int mlx5e_arfs_enable(struct mlx5e_priv *priv) { - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; int err = 0; int tt; int i; @@ -175,7 +175,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, { struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; struct mlx5e_tir *tir = priv->indir_tir; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; enum mlx5e_traffic_types tt; @@ -466,7 +466,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct arfs_tuple *tuple = &arfs_rule->tuple; struct mlx5_flow_handle *rule = NULL; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); struct arfs_table *arfs_table; struct mlx5_flow_spec *spec; @@ -557,7 +557,7 @@ out: static void arfs_modify_rule_rq(struct mlx5e_priv *priv, struct mlx5_flow_handle *rule, u16 rxq) { - struct mlx5_flow_destination dst; + struct mlx5_flow_destination dst = {}; int err = 0; dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c deleted file mode 100644 index 84dd63e74041..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ /dev/null @@ -1,619 +0,0 @@ -/* - * Copyright (c) 2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <linux/clocksource.h> -#include "en.h" - -enum { - MLX5E_CYCLES_SHIFT = 23 -}; - -enum { - MLX5E_PIN_MODE_IN = 0x0, - MLX5E_PIN_MODE_OUT = 0x1, -}; - -enum { - MLX5E_OUT_PATTERN_PULSE = 0x0, - MLX5E_OUT_PATTERN_PERIODIC = 0x1, -}; - -enum { - MLX5E_EVENT_MODE_DISABLE = 0x0, - MLX5E_EVENT_MODE_REPETETIVE = 0x1, - MLX5E_EVENT_MODE_ONCE_TILL_ARM = 0x2, -}; - -enum { - MLX5E_MTPPS_FS_ENABLE = BIT(0x0), - MLX5E_MTPPS_FS_PATTERN = BIT(0x2), - MLX5E_MTPPS_FS_PIN_MODE = BIT(0x3), - MLX5E_MTPPS_FS_TIME_STAMP = BIT(0x4), - MLX5E_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), - MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), -}; - -void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp, - struct skb_shared_hwtstamps *hwts) -{ - u64 nsec; - - read_lock(&tstamp->lock); - nsec = timecounter_cyc2time(&tstamp->clock, timestamp); - read_unlock(&tstamp->lock); - - hwts->hwtstamp = ns_to_ktime(nsec); -} - -static u64 mlx5e_read_internal_timer(const struct cyclecounter *cc) -{ - struct mlx5e_tstamp *tstamp = container_of(cc, struct mlx5e_tstamp, - cycles); - - return mlx5_read_internal_timer(tstamp->mdev) & cc->mask; -} - -static void mlx5e_pps_out(struct work_struct *work) -{ - struct mlx5e_pps *pps_info = container_of(work, struct mlx5e_pps, - out_work); - struct mlx5e_tstamp *tstamp = container_of(pps_info, struct mlx5e_tstamp, - pps_info); - u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; - unsigned long flags; - int i; - - for (i = 0; i < tstamp->ptp_info.n_pins; i++) { - u64 tstart; - - write_lock_irqsave(&tstamp->lock, flags); - tstart = tstamp->pps_info.start[i]; - tstamp->pps_info.start[i] = 0; - write_unlock_irqrestore(&tstamp->lock, flags); - if (!tstart) - continue; - - MLX5_SET(mtpps_reg, in, pin, i); - MLX5_SET64(mtpps_reg, in, time_stamp, tstart); - MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_TIME_STAMP); - mlx5_set_mtpps(tstamp->mdev, in, sizeof(in)); - } -} - -static void mlx5e_timestamp_overflow(struct work_struct *work) -{ - struct delayed_work *dwork = to_delayed_work(work); - struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp, - overflow_work); - struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp); - unsigned long flags; - - write_lock_irqsave(&tstamp->lock, flags); - timecounter_read(&tstamp->clock); - write_unlock_irqrestore(&tstamp->lock, flags); - queue_delayed_work(priv->wq, &tstamp->overflow_work, - msecs_to_jiffies(tstamp->overflow_period * 1000)); -} - -int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) -{ - struct hwtstamp_config config; - int err; - - if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) - return -EOPNOTSUPP; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - /* TX HW timestamp */ - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - case HWTSTAMP_TX_ON: - break; - default: - return -ERANGE; - } - - mutex_lock(&priv->state_lock); - /* RX HW timestamp */ - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); - break; - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_SOME: - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - case HWTSTAMP_FILTER_NTP_ALL: - /* Disable CQE compression */ - netdev_warn(priv->netdev, "Disabling cqe compression"); - err = mlx5e_modify_rx_cqe_compression_locked(priv, false); - if (err) { - netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); - mutex_unlock(&priv->state_lock); - return err; - } - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - default: - mutex_unlock(&priv->state_lock); - return -ERANGE; - } - - memcpy(&priv->tstamp.hwtstamp_config, &config, sizeof(config)); - mutex_unlock(&priv->state_lock); - - return copy_to_user(ifr->ifr_data, &config, - sizeof(config)) ? -EFAULT : 0; -} - -int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) -{ - struct hwtstamp_config *cfg = &priv->tstamp.hwtstamp_config; - - if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) - return -EOPNOTSUPP; - - return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0; -} - -static int mlx5e_ptp_settime(struct ptp_clock_info *ptp, - const struct timespec64 *ts) -{ - struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, - ptp_info); - u64 ns = timespec64_to_ns(ts); - unsigned long flags; - - write_lock_irqsave(&tstamp->lock, flags); - timecounter_init(&tstamp->clock, &tstamp->cycles, ns); - write_unlock_irqrestore(&tstamp->lock, flags); - - return 0; -} - -static int mlx5e_ptp_gettime(struct ptp_clock_info *ptp, - struct timespec64 *ts) -{ - struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, - ptp_info); - u64 ns; - unsigned long flags; - - write_lock_irqsave(&tstamp->lock, flags); - ns = timecounter_read(&tstamp->clock); - write_unlock_irqrestore(&tstamp->lock, flags); - - *ts = ns_to_timespec64(ns); - - return 0; -} - -static int mlx5e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) -{ - struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, - ptp_info); - unsigned long flags; - - write_lock_irqsave(&tstamp->lock, flags); - timecounter_adjtime(&tstamp->clock, delta); - write_unlock_irqrestore(&tstamp->lock, flags); - - return 0; -} - -static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) -{ - u64 adj; - u32 diff; - unsigned long flags; - int neg_adj = 0; - struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp, - ptp_info); - - if (delta < 0) { - neg_adj = 1; - delta = -delta; - } - - adj = tstamp->nominal_c_mult; - adj *= delta; - diff = div_u64(adj, 1000000000ULL); - - write_lock_irqsave(&tstamp->lock, flags); - timecounter_read(&tstamp->clock); - tstamp->cycles.mult = neg_adj ? tstamp->nominal_c_mult - diff : - tstamp->nominal_c_mult + diff; - write_unlock_irqrestore(&tstamp->lock, flags); - - return 0; -} - -static int mlx5e_extts_configure(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int on) -{ - struct mlx5e_tstamp *tstamp = - container_of(ptp, struct mlx5e_tstamp, ptp_info); - struct mlx5e_priv *priv = - container_of(tstamp, struct mlx5e_priv, tstamp); - u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; - u32 field_select = 0; - u8 pin_mode = 0; - u8 pattern = 0; - int pin = -1; - int err = 0; - - if (!MLX5_PPS_CAP(priv->mdev)) - return -EOPNOTSUPP; - - if (rq->extts.index >= tstamp->ptp_info.n_pins) - return -EINVAL; - - if (on) { - pin = ptp_find_pin(tstamp->ptp, PTP_PF_EXTTS, rq->extts.index); - if (pin < 0) - return -EBUSY; - pin_mode = MLX5E_PIN_MODE_IN; - pattern = !!(rq->extts.flags & PTP_FALLING_EDGE); - field_select = MLX5E_MTPPS_FS_PIN_MODE | - MLX5E_MTPPS_FS_PATTERN | - MLX5E_MTPPS_FS_ENABLE; - } else { - pin = rq->extts.index; - field_select = MLX5E_MTPPS_FS_ENABLE; - } - - MLX5_SET(mtpps_reg, in, pin, pin); - MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); - MLX5_SET(mtpps_reg, in, pattern, pattern); - MLX5_SET(mtpps_reg, in, enable, on); - MLX5_SET(mtpps_reg, in, field_select, field_select); - - err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); - if (err) - return err; - - return mlx5_set_mtppse(priv->mdev, pin, 0, - MLX5E_EVENT_MODE_REPETETIVE & on); -} - -static int mlx5e_perout_configure(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int on) -{ - struct mlx5e_tstamp *tstamp = - container_of(ptp, struct mlx5e_tstamp, ptp_info); - struct mlx5e_priv *priv = - container_of(tstamp, struct mlx5e_priv, tstamp); - u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; - u64 nsec_now, nsec_delta, time_stamp = 0; - u64 cycles_now, cycles_delta; - struct timespec64 ts; - unsigned long flags; - u32 field_select = 0; - u8 pin_mode = 0; - u8 pattern = 0; - int pin = -1; - int err = 0; - s64 ns; - - if (!MLX5_PPS_CAP(priv->mdev)) - return -EOPNOTSUPP; - - if (rq->perout.index >= tstamp->ptp_info.n_pins) - return -EINVAL; - - if (on) { - pin = ptp_find_pin(tstamp->ptp, PTP_PF_PEROUT, - rq->perout.index); - if (pin < 0) - return -EBUSY; - - pin_mode = MLX5E_PIN_MODE_OUT; - pattern = MLX5E_OUT_PATTERN_PERIODIC; - ts.tv_sec = rq->perout.period.sec; - ts.tv_nsec = rq->perout.period.nsec; - ns = timespec64_to_ns(&ts); - - if ((ns >> 1) != 500000000LL) - return -EINVAL; - - ts.tv_sec = rq->perout.start.sec; - ts.tv_nsec = rq->perout.start.nsec; - ns = timespec64_to_ns(&ts); - cycles_now = mlx5_read_internal_timer(tstamp->mdev); - write_lock_irqsave(&tstamp->lock, flags); - nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); - nsec_delta = ns - nsec_now; - cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, - tstamp->cycles.mult); - write_unlock_irqrestore(&tstamp->lock, flags); - time_stamp = cycles_now + cycles_delta; - field_select = MLX5E_MTPPS_FS_PIN_MODE | - MLX5E_MTPPS_FS_PATTERN | - MLX5E_MTPPS_FS_ENABLE | - MLX5E_MTPPS_FS_TIME_STAMP; - } else { - pin = rq->perout.index; - field_select = MLX5E_MTPPS_FS_ENABLE; - } - - MLX5_SET(mtpps_reg, in, pin, pin); - MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); - MLX5_SET(mtpps_reg, in, pattern, pattern); - MLX5_SET(mtpps_reg, in, enable, on); - MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); - MLX5_SET(mtpps_reg, in, field_select, field_select); - - err = mlx5_set_mtpps(priv->mdev, in, sizeof(in)); - if (err) - return err; - - return mlx5_set_mtppse(priv->mdev, pin, 0, - MLX5E_EVENT_MODE_REPETETIVE & on); -} - -static int mlx5e_pps_configure(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int on) -{ - struct mlx5e_tstamp *tstamp = - container_of(ptp, struct mlx5e_tstamp, ptp_info); - - tstamp->pps_info.enabled = !!on; - return 0; -} - -static int mlx5e_ptp_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, - int on) -{ - switch (rq->type) { - case PTP_CLK_REQ_EXTTS: - return mlx5e_extts_configure(ptp, rq, on); - case PTP_CLK_REQ_PEROUT: - return mlx5e_perout_configure(ptp, rq, on); - case PTP_CLK_REQ_PPS: - return mlx5e_pps_configure(ptp, rq, on); - default: - return -EOPNOTSUPP; - } - return 0; -} - -static int mlx5e_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, - enum ptp_pin_function func, unsigned int chan) -{ - return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0; -} - -static const struct ptp_clock_info mlx5e_ptp_clock_info = { - .owner = THIS_MODULE, - .max_adj = 100000000, - .n_alarm = 0, - .n_ext_ts = 0, - .n_per_out = 0, - .n_pins = 0, - .pps = 0, - .adjfreq = mlx5e_ptp_adjfreq, - .adjtime = mlx5e_ptp_adjtime, - .gettime64 = mlx5e_ptp_gettime, - .settime64 = mlx5e_ptp_settime, - .enable = NULL, - .verify = NULL, -}; - -static void mlx5e_timestamp_init_config(struct mlx5e_tstamp *tstamp) -{ - tstamp->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF; - tstamp->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; -} - -static int mlx5e_init_pin_config(struct mlx5e_tstamp *tstamp) -{ - int i; - - tstamp->ptp_info.pin_config = - kzalloc(sizeof(*tstamp->ptp_info.pin_config) * - tstamp->ptp_info.n_pins, GFP_KERNEL); - if (!tstamp->ptp_info.pin_config) - return -ENOMEM; - tstamp->ptp_info.enable = mlx5e_ptp_enable; - tstamp->ptp_info.verify = mlx5e_ptp_verify; - tstamp->ptp_info.pps = 1; - - for (i = 0; i < tstamp->ptp_info.n_pins; i++) { - snprintf(tstamp->ptp_info.pin_config[i].name, - sizeof(tstamp->ptp_info.pin_config[i].name), - "mlx5_pps%d", i); - tstamp->ptp_info.pin_config[i].index = i; - tstamp->ptp_info.pin_config[i].func = PTP_PF_NONE; - tstamp->ptp_info.pin_config[i].chan = i; - } - - return 0; -} - -static void mlx5e_get_pps_caps(struct mlx5e_priv *priv, - struct mlx5e_tstamp *tstamp) -{ - u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; - - mlx5_query_mtpps(priv->mdev, out, sizeof(out)); - - tstamp->ptp_info.n_pins = MLX5_GET(mtpps_reg, out, - cap_number_of_pps_pins); - tstamp->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out, - cap_max_num_of_pps_in_pins); - tstamp->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, - cap_max_num_of_pps_out_pins); - - tstamp->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); - tstamp->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); - tstamp->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); - tstamp->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); - tstamp->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); - tstamp->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); - tstamp->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); - tstamp->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); -} - -void mlx5e_pps_event_handler(struct mlx5e_priv *priv, - struct ptp_clock_event *event) -{ - struct net_device *netdev = priv->netdev; - struct mlx5e_tstamp *tstamp = &priv->tstamp; - struct timespec64 ts; - u64 nsec_now, nsec_delta; - u64 cycles_now, cycles_delta; - int pin = event->index; - s64 ns; - unsigned long flags; - - switch (tstamp->ptp_info.pin_config[pin].func) { - case PTP_PF_EXTTS: - if (tstamp->pps_info.enabled) { - event->type = PTP_CLOCK_PPSUSR; - event->pps_times.ts_real = ns_to_timespec64(event->timestamp); - } else { - event->type = PTP_CLOCK_EXTTS; - } - ptp_clock_event(tstamp->ptp, event); - break; - case PTP_PF_PEROUT: - mlx5e_ptp_gettime(&tstamp->ptp_info, &ts); - cycles_now = mlx5_read_internal_timer(tstamp->mdev); - ts.tv_sec += 1; - ts.tv_nsec = 0; - ns = timespec64_to_ns(&ts); - write_lock_irqsave(&tstamp->lock, flags); - nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now); - nsec_delta = ns - nsec_now; - cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift, - tstamp->cycles.mult); - tstamp->pps_info.start[pin] = cycles_now + cycles_delta; - queue_work(priv->wq, &tstamp->pps_info.out_work); - write_unlock_irqrestore(&tstamp->lock, flags); - break; - default: - netdev_err(netdev, "%s: Unhandled event\n", __func__); - } -} - -void mlx5e_timestamp_init(struct mlx5e_priv *priv) -{ - struct mlx5e_tstamp *tstamp = &priv->tstamp; - u64 ns; - u64 frac = 0; - u32 dev_freq; - - mlx5e_timestamp_init_config(tstamp); - dev_freq = MLX5_CAP_GEN(priv->mdev, device_frequency_khz); - if (!dev_freq) { - mlx5_core_warn(priv->mdev, "invalid device_frequency_khz, aborting HW clock init\n"); - return; - } - rwlock_init(&tstamp->lock); - tstamp->cycles.read = mlx5e_read_internal_timer; - tstamp->cycles.shift = MLX5E_CYCLES_SHIFT; - tstamp->cycles.mult = clocksource_khz2mult(dev_freq, - tstamp->cycles.shift); - tstamp->nominal_c_mult = tstamp->cycles.mult; - tstamp->cycles.mask = CLOCKSOURCE_MASK(41); - tstamp->mdev = priv->mdev; - - timecounter_init(&tstamp->clock, &tstamp->cycles, - ktime_to_ns(ktime_get_real())); - - /* Calculate period in seconds to call the overflow watchdog - to make - * sure counter is checked at least once every wrap around. - */ - ns = cyclecounter_cyc2ns(&tstamp->cycles, tstamp->cycles.mask, - frac, &frac); - do_div(ns, NSEC_PER_SEC / 2 / HZ); - tstamp->overflow_period = ns; - - INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out); - INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow); - if (tstamp->overflow_period) - queue_delayed_work(priv->wq, &tstamp->overflow_work, 0); - else - mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n"); - - /* Configure the PHC */ - tstamp->ptp_info = mlx5e_ptp_clock_info; - snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp"); - - /* Initialize 1PPS data structures */ - if (MLX5_PPS_CAP(priv->mdev)) - mlx5e_get_pps_caps(priv, tstamp); - if (tstamp->ptp_info.n_pins) - mlx5e_init_pin_config(tstamp); - - tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, - &priv->mdev->pdev->dev); - if (IS_ERR(tstamp->ptp)) { - mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n", - PTR_ERR(tstamp->ptp)); - tstamp->ptp = NULL; - } -} - -void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv) -{ - struct mlx5e_tstamp *tstamp = &priv->tstamp; - - if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) - return; - - if (priv->tstamp.ptp) { - ptp_clock_unregister(priv->tstamp.ptp); - priv->tstamp.ptp = NULL; - } - - cancel_work_sync(&tstamp->pps_info.out_work); - cancel_delayed_work_sync(&tstamp->overflow_work); - kfree(tstamp->ptp_info.pin_config); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index ece3fb147e3e..784e282803db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -134,6 +134,7 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) mlx5_core_destroy_mkey(mdev, &res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); mlx5_core_dealloc_pd(mdev, res->pdn); + memset(res, 0, sizeof(*res)); } int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) @@ -170,3 +171,15 @@ out: return err; } + +u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev) +{ + u8 min_inline_mode; + + mlx5_query_min_inline(mdev, &min_inline_mode); + if (min_inline_mode == MLX5_INLINE_MODE_NONE && + !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + min_inline_mode = MLX5_INLINE_MODE_L2; + + return min_inline_mode; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 51c4cc00a186..c6d90b6dd80e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -46,6 +46,13 @@ enum { MLX5E_LOWEST_PRIO_GROUP = 0, }; +#define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg) && \ + MLX5_CAP_QCAM_REG(mdev, qpts) && \ + MLX5_CAP_QCAM_REG(mdev, qpdpm)) + +static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state); +static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio); + /* If dcbx mode is non-host set the dcbx mode to host. */ static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv, @@ -234,7 +241,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS]; u8 tc_group[IEEE_8021QAZ_MAX_TCS]; int max_tc = mlx5_max_tc(mdev); - int err; + int err, i; mlx5e_build_tc_group(ets, tc_group, max_tc); mlx5e_build_tc_tx_bw(ets, tc_tx_bw, tc_group, max_tc); @@ -253,6 +260,14 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) return err; memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa)); + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + mlx5e_dbg(HW, priv, "%s: prio_%d <=> tc_%d\n", + __func__, i, ets->prio_tc[i]); + mlx5e_dbg(HW, priv, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n", + __func__, i, tc_tx_bw[i], tc_group[i]); + } + return err; } @@ -338,6 +353,11 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en); mlx5_toggle_port_link(mdev); + if (!ret) { + mlx5e_dbg(HW, priv, + "%s: PFC per priority bit mask: 0x%x\n", + __func__, pfc->pfc_en); + } return ret; } @@ -381,6 +401,113 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode) return 0; } +static int mlx5e_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct dcb_app temp; + bool is_new; + int err; + + if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP) + return -EINVAL; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return -EINVAL; + + if (!MLX5_DSCP_SUPPORTED(priv->mdev)) + return -EINVAL; + + if (app->protocol >= MLX5E_MAX_DSCP) + return -EINVAL; + + /* Save the old entry info */ + temp.selector = IEEE_8021QAZ_APP_SEL_DSCP; + temp.protocol = app->protocol; + temp.priority = priv->dcbx_dp.dscp2prio[app->protocol]; + + /* Check if need to switch to dscp trust state */ + if (!priv->dcbx.dscp_app_cnt) { + err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_DSCP); + if (err) + return err; + } + + /* Skip the fw command if new and old mapping are the same */ + if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) { + err = mlx5e_set_dscp2prio(priv, app->protocol, app->priority); + if (err) + goto fw_err; + } + + /* Delete the old entry if exists */ + is_new = false; + err = dcb_ieee_delapp(dev, &temp); + if (err) + is_new = true; + + /* Add new entry and update counter */ + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + if (is_new) + priv->dcbx.dscp_app_cnt++; + + return err; + +fw_err: + mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + return err; +} + +static int mlx5e_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP) + return -EINVAL; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return -EINVAL; + + if (!MLX5_DSCP_SUPPORTED(priv->mdev)) + return -EINVAL; + + if (app->protocol >= MLX5E_MAX_DSCP) + return -EINVAL; + + /* Skip if no dscp app entry */ + if (!priv->dcbx.dscp_app_cnt) + return -ENOENT; + + /* Check if the entry matches fw setting */ + if (app->priority != priv->dcbx_dp.dscp2prio[app->protocol]) + return -ENOENT; + + /* Delete the app entry */ + err = dcb_ieee_delapp(dev, app); + if (err) + return err; + + /* Reset the priority mapping back to zero */ + err = mlx5e_set_dscp2prio(priv, app->protocol, 0); + if (err) + goto fw_err; + + priv->dcbx.dscp_app_cnt--; + + /* Check if need to switch to pcp trust state */ + if (!priv->dcbx.dscp_app_cnt) + err = mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + + return err; + +fw_err: + mlx5e_set_trust_state(priv, MLX5_QPTS_TRUST_PCP); + return err; +} + static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device *netdev, struct ieee_maxrate *maxrate) { @@ -446,6 +573,11 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, } } + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + mlx5e_dbg(HW, priv, "%s: tc_%d <=> max_bw %d Gbps\n", + __func__, i, max_bw_value[i]); + } + return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); } @@ -471,6 +603,10 @@ static u8 mlx5e_dcbnl_setall(struct net_device *netdev) ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i]; ets.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; ets.prio_tc[i] = cee_cfg->prio_to_pg_map[i]; + mlx5e_dbg(HW, priv, + "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n", + __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i], + ets.prio_tc[i]); } err = mlx5e_dbcnl_validate_ets(netdev, &ets); @@ -740,6 +876,8 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = { .ieee_setmaxrate = mlx5e_dcbnl_ieee_setmaxrate, .ieee_getpfc = mlx5e_dcbnl_ieee_getpfc, .ieee_setpfc = mlx5e_dcbnl_ieee_setpfc, + .ieee_setapp = mlx5e_dcbnl_ieee_setapp, + .ieee_delapp = mlx5e_dcbnl_ieee_delapp, .getdcbx = mlx5e_dcbnl_getdcbx, .setdcbx = mlx5e_dcbnl_setdcbx, @@ -801,10 +939,135 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) mlx5e_dcbnl_ieee_setets_core(priv, &ets); } +enum { + INIT, + DELETE, +}; + +static void mlx5e_dcbnl_dscp_app(struct mlx5e_priv *priv, int action) +{ + struct dcb_app temp; + int i; + + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return; + + if (!MLX5_DSCP_SUPPORTED(priv->mdev)) + return; + + /* No SEL_DSCP entry in non DSCP state */ + if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_DSCP) + return; + + temp.selector = IEEE_8021QAZ_APP_SEL_DSCP; + for (i = 0; i < MLX5E_MAX_DSCP; i++) { + temp.protocol = i; + temp.priority = priv->dcbx_dp.dscp2prio[i]; + if (action == INIT) + dcb_ieee_setapp(priv->netdev, &temp); + else + dcb_ieee_delapp(priv->netdev, &temp); + } + + priv->dcbx.dscp_app_cnt = (action == INIT) ? MLX5E_MAX_DSCP : 0; +} + +void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) +{ + mlx5e_dcbnl_dscp_app(priv, INIT); +} + +void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) +{ + mlx5e_dcbnl_dscp_app(priv, DELETE); +} + +static void mlx5e_trust_update_tx_min_inline_mode(struct mlx5e_priv *priv, + struct mlx5e_params *params) +{ + params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(priv->mdev); + if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP && + params->tx_min_inline_mode == MLX5_INLINE_MODE_L2) + params->tx_min_inline_mode = MLX5_INLINE_MODE_IP; +} + +static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv) +{ + struct mlx5e_channels new_channels = {}; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto out; + + new_channels.params = priv->channels.params; + mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params); + + /* Skip if tx_min_inline is the same */ + if (new_channels.params.tx_min_inline_mode == + priv->channels.params.tx_min_inline_mode) + goto out; + + if (mlx5e_open_channels(priv, &new_channels)) + goto out; + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + +out: + mutex_unlock(&priv->state_lock); +} + +static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) +{ + int err; + + err = mlx5_set_trust_state(priv->mdev, trust_state); + if (err) + return err; + priv->dcbx_dp.trust_state = trust_state; + mlx5e_trust_update_sq_inline_mode(priv); + + return err; +} + +static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio) +{ + int err; + + err = mlx5_set_dscp2prio(priv->mdev, dscp, prio); + if (err) + return err; + + priv->dcbx_dp.dscp2prio[dscp] = prio; + return err; +} + +static int mlx5e_trust_initialize(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (!MLX5_DSCP_SUPPORTED(mdev)) + return 0; + + err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state); + if (err) + return err; + + mlx5e_trust_update_tx_min_inline_mode(priv, &priv->channels.params); + + err = mlx5_query_dscp2prio(priv->mdev, priv->dcbx_dp.dscp2prio); + if (err) + return err; + + return 0; +} + void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) { struct mlx5e_dcbx *dcbx = &priv->dcbx; + mlx5e_trust_initialize(priv); + if (!MLX5_CAP_GEN(priv->mdev, qos)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d12e9fc0d76b..23425f028405 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -31,7 +31,6 @@ */ #include "en.h" -#include "en_accel/ipsec.h" void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) @@ -136,59 +135,15 @@ void mlx5e_build_ptys2ethtool_map(void) ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); } -static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - u8 pfc_en_tx; - u8 pfc_en_rx; - int err; - - if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return 0; - - err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx); - - return err ? 0 : pfc_en_tx | pfc_en_rx; -} - -static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - u32 rx_pause; - u32 tx_pause; - int err; - - if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return false; - - err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); - - return err ? false : rx_pause | tx_pause; -} - -#define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) -#define MLX5E_NUM_RQ_STATS(priv) (NUM_RQ_STATS * (priv)->channels.num) -#define MLX5E_NUM_SQ_STATS(priv) \ - (NUM_SQ_STATS * (priv)->channels.num * (priv)->channels.params.num_tc) -#define MLX5E_NUM_PFC_COUNTERS(priv) \ - ((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \ - NUM_PPORT_PER_PRIO_PFC_COUNTERS) - int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { + int i, num_stats = 0; + switch (sset) { case ETH_SS_STATS: - return NUM_SW_COUNTERS + - MLX5E_NUM_Q_CNTRS(priv) + - NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS(priv) + - NUM_PCIE_COUNTERS(priv) + - MLX5E_NUM_RQ_STATS(priv) + - MLX5E_NUM_SQ_STATS(priv) + - MLX5E_NUM_PFC_COUNTERS(priv) + - ARRAY_SIZE(mlx5e_pme_status_desc) + - ARRAY_SIZE(mlx5e_pme_error_desc) + - mlx5e_ipsec_get_count(priv); - + for (i = 0; i < mlx5e_num_stats_grps; i++) + num_stats += mlx5e_stats_grps[i].get_num_stats(priv); + return num_stats; case ETH_SS_PRIV_FLAGS: return ARRAY_SIZE(mlx5e_priv_flags); case ETH_SS_TEST: @@ -208,104 +163,10 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) { - int i, j, tc, prio, idx = 0; - unsigned long pfc_combined; - - /* SW counters */ - for (i = 0; i < NUM_SW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format); - - /* Q counters */ - for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format); - - /* VPORT counters */ - for (i = 0; i < NUM_VPORT_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - vport_stats_desc[i].format); - - /* PPORT counters */ - for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_802_3_stats_desc[i].format); - - for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_2863_stats_desc[i].format); - - for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_2819_stats_desc[i].format); - - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_phy_statistical_stats_desc[i].format); - - for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_eth_ext_stats_desc[i].format); - - for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_perf_stats_desc[i].format); - - for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_perf_stats_desc64[i].format); - - for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pcie_perf_stall_stats_desc[i].format); - - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { - for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_prio_traffic_stats_desc[i].format, prio); - } - - pfc_combined = mlx5e_query_pfc_combined(priv); - for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { - for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - char pfc_string[ETH_GSTRING_LEN]; - - snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio); - sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_prio_pfc_stats_desc[i].format, pfc_string); - } - } - - if (mlx5e_query_global_pause_combined(priv)) { - for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_prio_pfc_stats_desc[i].format, "global"); - } - } - - /* port module event counters */ - for (i = 0; i < ARRAY_SIZE(mlx5e_pme_status_desc); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format); - - for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format); - - /* IPSec counters */ - idx += mlx5e_ipsec_get_strings(priv, data + idx * ETH_GSTRING_LEN); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - return; - - /* per channel counters */ - for (i = 0; i < priv->channels.num; i++) - for (j = 0; j < NUM_RQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - rq_stats_desc[j].format, i); + int i, idx = 0; - for (tc = 0; tc < priv->channels.params.num_tc; tc++) - for (i = 0; i < priv->channels.num; i++) - for (j = 0; j < NUM_SQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - sq_stats_desc[j].format, - priv->channel_tc2txq[i][tc]); + for (i = 0; i < mlx5e_num_stats_grps; i++) + idx = mlx5e_stats_grps[i].fill_strings(priv, data, idx); } void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) @@ -340,10 +201,7 @@ static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data) void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, struct ethtool_stats *stats, u64 *data) { - struct mlx5e_channels *channels; - struct mlx5_priv *mlx5_priv; - int i, j, tc, prio, idx = 0; - unsigned long pfc_combined; + int i, idx = 0; if (!data) return; @@ -351,102 +209,10 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_update_stats(priv, true); - channels = &priv->channels; mutex_unlock(&priv->state_lock); - for (i = 0; i < NUM_SW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, - sw_stats_desc, i); - - for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) - data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, - q_stats_desc, i); - - for (i = 0; i < NUM_VPORT_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, - vport_stats_desc, i); - - for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, - pport_802_3_stats_desc, i); - - for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, - pport_2863_stats_desc, i); - - for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, - pport_2819_stats_desc, i); - - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv); i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, - pport_phy_statistical_stats_desc, i); - - for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters, - pport_eth_ext_stats_desc, i); - - for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) - data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, - pcie_perf_stats_desc, i); - - for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters, - pcie_perf_stats_desc64, i); - - for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++) - data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, - pcie_perf_stall_stats_desc, i); - - for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { - for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], - pport_per_prio_traffic_stats_desc, i); - } - - pfc_combined = mlx5e_query_pfc_combined(priv); - for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { - for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], - pport_per_prio_pfc_stats_desc, i); - } - } - - if (mlx5e_query_global_pause_combined(priv)) { - for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], - pport_per_prio_pfc_stats_desc, i); - } - } - - /* port module event counters */ - mlx5_priv = &priv->mdev->priv; - for (i = 0; i < ARRAY_SIZE(mlx5e_pme_status_desc); i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters, - mlx5e_pme_status_desc, i); - - for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++) - data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters, - mlx5e_pme_error_desc, i); - - /* IPSec counters */ - idx += mlx5e_ipsec_get_stats(priv, data + idx); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - return; - - /* per channel counters */ - for (i = 0; i < channels->num; i++) - for (j = 0; j < NUM_RQ_STATS; j++) - data[idx++] = - MLX5E_READ_CTR64_CPU(&channels->c[i]->rq.stats, - rq_stats_desc, j); - - for (tc = 0; tc < priv->channels.params.num_tc; tc++) - for (i = 0; i < channels->num; i++) - for (j = 0; j < NUM_SQ_STATS; j++) - data[idx++] = MLX5E_READ_CTR64_CPU(&channels->c[i]->sq[tc].stats, - sq_stats_desc, j); + for (i = 0; i < mlx5e_num_stats_grps; i++) + idx = mlx5e_stats_grps[i].fill_stats(priv, data, idx); } static void mlx5e_get_ethtool_stats(struct net_device *dev, @@ -1417,14 +1183,15 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info) { + struct mlx5_core_dev *mdev = priv->mdev; int ret; ret = ethtool_op_get_ts_info(priv->netdev, info); if (ret) return ret; - info->phc_index = priv->tstamp.ptp ? - ptp_clock_index(priv->tstamp.ptp) : -1; + info->phc_index = mdev->clock.ptp ? + ptp_clock_index(mdev->clock.ptp) : -1; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) return 0; @@ -1573,6 +1340,16 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return mlx5_set_port_wol(mdev, mlx5_wol_mode); } +static u32 mlx5e_get_msglevel(struct net_device *dev) +{ + return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel; +} + +static void mlx5e_set_msglevel(struct net_device *dev, u32 val) +{ + ((struct mlx5e_priv *)netdev_priv(dev))->msglevel = val; +} + static int mlx5e_set_phys_id(struct net_device *dev, enum ethtool_phys_id_state state) { @@ -1677,29 +1454,36 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); -static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) +static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, + bool is_rx_cq) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; - bool rx_mode_changed; - u8 rx_cq_period_mode; + bool mode_changed; + u8 cq_period_mode, current_cq_period_mode; int err = 0; - rx_cq_period_mode = enable ? + cq_period_mode = enable ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - rx_mode_changed = rx_cq_period_mode != priv->channels.params.rx_cq_period_mode; + current_cq_period_mode = is_rx_cq ? + priv->channels.params.rx_cq_moderation.cq_period_mode : + priv->channels.params.tx_cq_moderation.cq_period_mode; + mode_changed = cq_period_mode != current_cq_period_mode; - if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) return -EOPNOTSUPP; - if (!rx_mode_changed) + if (!mode_changed) return 0; new_channels.params = priv->channels.params; - mlx5e_set_rx_cq_mode_params(&new_channels.params, rx_cq_period_mode); + if (is_rx_cq) + mlx5e_set_rx_cq_mode_params(&new_channels.params, cq_period_mode); + else + mlx5e_set_tx_cq_mode_params(&new_channels.params, cq_period_mode); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; @@ -1714,6 +1498,16 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) return 0; } +static int set_pflag_tx_cqe_based_moder(struct net_device *netdev, bool enable) +{ + return set_pflag_cqe_based_moder(netdev, enable, false); +} + +static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) +{ + return set_pflag_cqe_based_moder(netdev, enable, true); +} + int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val) { bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS); @@ -1754,7 +1548,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -EOPNOTSUPP; - if (enable && priv->tstamp.hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE) { + if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) { netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n"); return -EINVAL; } @@ -1802,6 +1596,12 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) goto out; err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_TX_CQE_BASED_MODER, + set_pflag_tx_cqe_based_moder); + if (err) + goto out; + + err = mlx5e_handle_pflag(netdev, pflags, MLX5E_PFLAG_RX_CQE_COMPRESS, set_pflag_rx_cqe_compress); @@ -1905,4 +1705,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_priv_flags = mlx5e_get_priv_flags, .set_priv_flags = mlx5e_set_priv_flags, .self_test = mlx5e_self_test, + .get_msglevel = mlx5e_get_msglevel, + .set_msglevel = mlx5e_set_msglevel, + }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 4837045ffba3..def513484845 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -118,7 +118,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) int i; list_size = 0; - for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) + for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) list_size++; max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); @@ -135,7 +135,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) return -ENOMEM; i = 0; - for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) { + for_each_set_bit(vlan, priv->fs.vlan.active_cvlans, VLAN_N_VID) { if (i >= list_size) break; vlans[i++] = vlan; @@ -154,7 +154,8 @@ enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_UNTAGGED, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, - MLX5E_VLAN_RULE_TYPE_MATCH_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, + MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, }; static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, @@ -162,7 +163,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, u16 vid, struct mlx5_flow_spec *spec) { struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle **rule_p; MLX5_DECLARE_FLOW_ACT(flow_act); int err = 0; @@ -174,6 +175,10 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: + /* cvlan_tag enabled in match criteria and + * disabled in match value means both S & C tags + * don't exist (untagged of both) + */ rule_p = &priv->fs.vlan.untagged_rule; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); @@ -190,8 +195,18 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, outer_headers.svlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); break; - default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ - rule_p = &priv->fs.vlan.active_vlans_rule[vid]; + case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: + rule_p = &priv->fs.vlan.active_svlans_rule[vid]; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.svlan_tag); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.svlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); + break; + default: /* MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID */ + rule_p = &priv->fs.vlan.active_cvlans_rule[vid]; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 1); @@ -223,7 +238,7 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, if (!spec) return -ENOMEM; - if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID) + if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID) mlx5e_vport_context_update_vlans(priv); err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec); @@ -255,11 +270,17 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, priv->fs.vlan.any_svlan_rule = NULL; } break; - case MLX5E_VLAN_RULE_TYPE_MATCH_VID: + case MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID: + if (priv->fs.vlan.active_svlans_rule[vid]) { + mlx5_del_flow_rules(priv->fs.vlan.active_svlans_rule[vid]); + priv->fs.vlan.active_svlans_rule[vid] = NULL; + } + break; + case MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID: mlx5e_vport_context_update_vlans(priv); - if (priv->fs.vlan.active_vlans_rule[vid]) { - mlx5_del_flow_rules(priv->fs.vlan.active_vlans_rule[vid]); - priv->fs.vlan.active_vlans_rule[vid] = NULL; + if (priv->fs.vlan.active_cvlans_rule[vid]) { + mlx5_del_flow_rules(priv->fs.vlan.active_cvlans_rule[vid]); + priv->fs.vlan.active_cvlans_rule[vid] = NULL; } mlx5e_vport_context_update_vlans(priv); break; @@ -283,46 +304,83 @@ static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv) return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); } -void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) +void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv) { - if (!priv->fs.vlan.filter_disabled) + if (!priv->fs.vlan.cvlan_filter_disabled) return; - priv->fs.vlan.filter_disabled = false; + priv->fs.vlan.cvlan_filter_disabled = false; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } -void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) +void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv) { - if (priv->fs.vlan.filter_disabled) + if (priv->fs.vlan.cvlan_filter_disabled) return; - priv->fs.vlan.filter_disabled = true; + priv->fs.vlan.cvlan_filter_disabled = true; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_CTAG_VID, 0); } -int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, - u16 vid) +static int mlx5e_vlan_rx_add_cvid(struct mlx5e_priv *priv, u16 vid) { - struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + set_bit(vid, priv->fs.vlan.active_cvlans); - set_bit(vid, priv->fs.vlan.active_vlans); + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + if (err) + clear_bit(vid, priv->fs.vlan.active_cvlans); + + return err; +} + +static int mlx5e_vlan_rx_add_svid(struct mlx5e_priv *priv, u16 vid) +{ + struct net_device *netdev = priv->netdev; + int err; + + set_bit(vid, priv->fs.vlan.active_svlans); + + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + if (err) { + clear_bit(vid, priv->fs.vlan.active_svlans); + return err; + } - return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + /* Need to fix some features.. */ + netdev_update_features(netdev); + return err; } -int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, - u16 vid) +int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct mlx5e_priv *priv = netdev_priv(dev); - clear_bit(vid, priv->fs.vlan.active_vlans); + if (be16_to_cpu(proto) == ETH_P_8021Q) + return mlx5e_vlan_rx_add_cvid(priv, vid); + else if (be16_to_cpu(proto) == ETH_P_8021AD) + return mlx5e_vlan_rx_add_svid(priv, vid); - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); + return -EOPNOTSUPP; +} + +int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (be16_to_cpu(proto) == ETH_P_8021Q) { + clear_bit(vid, priv->fs.vlan.active_cvlans); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, vid); + } else if (be16_to_cpu(proto) == ETH_P_8021AD) { + clear_bit(vid, priv->fs.vlan.active_svlans); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + netdev_update_features(dev); + } return 0; } @@ -333,11 +391,14 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { - mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) { + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); } - if (priv->fs.vlan.filter_disabled && + for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + + if (priv->fs.vlan.cvlan_filter_disabled && !(priv->netdev->flags & IFF_PROMISC)) mlx5e_add_any_vid_rules(priv); } @@ -348,11 +409,14 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + for_each_set_bit(i, priv->fs.vlan.active_cvlans, VLAN_N_VID) { + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_CTAG_VID, i); } - if (priv->fs.vlan.filter_disabled && + for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); + + if (priv->fs.vlan.cvlan_filter_disabled && !(priv->netdev->flags & IFF_PROMISC)) mlx5e_del_any_vid_rules(priv); } @@ -548,8 +612,11 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; if (enable_promisc) { + if (!priv->channels.params.vlan_strip_disable) + netdev_warn_once(ndev, + "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n"); mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC); - if (!priv->fs.vlan.filter_disabled) + if (!priv->fs.vlan.cvlan_filter_disabled) mlx5e_add_any_vid_rules(priv); } if (enable_allmulti) @@ -564,7 +631,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) if (disable_allmulti) mlx5e_del_l2_flow_rule(priv, &ea->allmulti); if (disable_promisc) { - if (!priv->fs.vlan.filter_disabled) + if (!priv->fs.vlan.cvlan_filter_disabled) mlx5e_del_any_vid_rules(priv); mlx5e_del_l2_flow_rule(priv, &ea->promisc); } @@ -741,7 +808,7 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) { - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5e_ttc_table *ttc; struct mlx5_flow_handle **rules; struct mlx5_flow_table *ft; @@ -912,7 +979,7 @@ mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv) { - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle **rules; struct mlx5e_ttc_table *ttc; struct mlx5_flow_table *ft; @@ -1008,7 +1075,7 @@ err: return err; } -static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) +int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; struct mlx5_flow_table_attr ft_attr = {}; @@ -1044,7 +1111,7 @@ err: return err; } -static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; @@ -1109,7 +1176,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type) { struct mlx5_flow_table *ft = priv->fs.l2.ft.t; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; int err = 0; @@ -1268,13 +1335,15 @@ err_destroy_flow_table: return err; } -#define MLX5E_NUM_VLAN_GROUPS 3 +#define MLX5E_NUM_VLAN_GROUPS 4 #define MLX5E_VLAN_GROUP0_SIZE BIT(12) -#define MLX5E_VLAN_GROUP1_SIZE BIT(1) -#define MLX5E_VLAN_GROUP2_SIZE BIT(0) +#define MLX5E_VLAN_GROUP1_SIZE BIT(12) +#define MLX5E_VLAN_GROUP2_SIZE BIT(1) +#define MLX5E_VLAN_GROUP3_SIZE BIT(0) #define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ MLX5E_VLAN_GROUP1_SIZE +\ - MLX5E_VLAN_GROUP2_SIZE) + MLX5E_VLAN_GROUP2_SIZE +\ + MLX5E_VLAN_GROUP3_SIZE) static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in, int inlen) @@ -1297,7 +1366,8 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); @@ -1308,7 +1378,7 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in memset(in, 0, inlen); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.cvlan_tag); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_VLAN_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); @@ -1317,6 +1387,17 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in goto err_destroy_groups; ft->num_groups++; + memset(in, 0, inlen); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.svlan_tag); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + return 0; err_destroy_groups: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cc11bbbd0309..d2b057a3e512 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -196,6 +196,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_bytes += rq_stats->bytes; s->rx_lro_packets += rq_stats->lro_packets; s->rx_lro_bytes += rq_stats->lro_bytes; + s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; s->rx_csum_unnecessary += rq_stats->csum_unnecessary; @@ -224,6 +225,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->tx_tso_bytes += sq_stats->tso_bytes; s->tx_tso_inner_packets += sq_stats->tso_inner_packets; s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; + s->tx_added_vlan_packets += sq_stats->added_vlan_packets; s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; @@ -373,8 +375,6 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, enum mlx5_dev_event event, unsigned long param) { struct mlx5e_priv *priv = vpriv; - struct ptp_clock_event ptp_event; - struct mlx5_eqe *eqe = NULL; if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) return; @@ -384,14 +384,6 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, case MLX5_DEV_EVENT_PORT_DOWN: queue_work(priv->wq, &priv->update_carrier_work); break; - case MLX5_DEV_EVENT_PPS: - eqe = (struct mlx5_eqe *)param; - ptp_event.index = eqe->data.pps.pin; - ptp_event.timestamp = - timecounter_cyc2time(&priv->tstamp.clock, - be64_to_cpu(eqe->data.pps.time_stamp)); - mlx5e_pps_event_handler(vpriv, &ptp_event); - break; default: break; } @@ -585,6 +577,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->pdev = c->pdev; rq->netdev = c->netdev; rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; rq->channel = c; rq->ix = c->ix; rq->mdev = mdev; @@ -690,7 +683,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, } INIT_WORK(&rq->am.work, mlx5e_rx_am_work); - rq->am.mode = params->rx_cq_period_mode; + rq->am.mode = params->rx_cq_moderation.cq_period_mode; rq->page_cache.head = 0; rq->page_cache.tail = 0; @@ -1123,6 +1116,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->pdev = c->pdev; sq->tstamp = c->tstamp; + sq->clock = &mdev->clock; sq->mkey_be = c->mkey_be; sq->channel = c; sq->txq_ix = txq_ix; @@ -1982,7 +1976,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, } mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = params->rx_cq_period_mode; + param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, @@ -1994,8 +1988,7 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); mlx5e_build_common_cq_param(priv, param); - - param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; } static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, @@ -2678,6 +2671,12 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, netif_carrier_on(netdev); } +void mlx5e_timestamp_set(struct mlx5e_priv *priv) +{ + priv->tstamp.tx_type = HWTSTAMP_TX_OFF; + priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE; +} + int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -2693,7 +2692,7 @@ int mlx5e_open_locked(struct net_device *netdev) mlx5e_activate_priv_channels(priv); if (priv->profile->update_carrier) priv->profile->update_carrier(priv); - mlx5e_timestamp_init(priv); + mlx5e_timestamp_set(priv); if (priv->profile->update_stats) queue_delayed_work(priv->wq, &priv->update_stats_work, 0); @@ -2731,7 +2730,6 @@ int mlx5e_close_locked(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); - mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); mlx5e_deactivate_priv_channels(priv); mlx5e_close_channels(&priv->channels); @@ -3086,13 +3084,10 @@ out: } #ifdef CONFIG_MLX5_ESWITCH -static int mlx5e_setup_tc_cls_flower(struct net_device *dev, +static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *cls_flower) { - struct mlx5e_priv *priv = netdev_priv(dev); - - if (!is_classid_clsact_ingress(cls_flower->common.classid) || - cls_flower->common.chain_index) + if (cls_flower->common.chain_index) return -EOPNOTSUPP; switch (cls_flower->command) { @@ -3106,17 +3101,54 @@ static int mlx5e_setup_tc_cls_flower(struct net_device *dev, return -EOPNOTSUPP; } } + +int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct mlx5e_priv *priv = cb_priv; + + if (!tc_can_offload(priv->netdev)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_setup_tc_cls_flower(priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb, + priv, priv); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb, + priv); + return 0; + default: + return -EOPNOTSUPP; + } +} #endif -static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) +int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { switch (type) { #ifdef CONFIG_MLX5_ESWITCH - case TC_SETUP_CLSFLOWER: - return mlx5e_setup_tc_cls_flower(dev, type_data); + case TC_SETUP_BLOCK: + return mlx5e_setup_tc_block(dev, type_data); #endif - case TC_SETUP_MQPRIO: + case TC_SETUP_QDISC_MQPRIO: return mlx5e_setup_tc_mqprio(dev, type_data); default: return -EOPNOTSUPP; @@ -3230,14 +3262,14 @@ out: return err; } -static int set_feature_vlan_filter(struct net_device *netdev, bool enable) +static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); if (enable) - mlx5e_enable_vlan_filter(priv); + mlx5e_enable_cvlan_filter(priv); else - mlx5e_disable_vlan_filter(priv); + mlx5e_disable_cvlan_filter(priv); return 0; } @@ -3348,7 +3380,7 @@ static int mlx5e_set_features(struct net_device *netdev, set_feature_lro); err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_VLAN_CTAG_FILTER, - set_feature_vlan_filter); + set_feature_cvlan_filter); err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_TC, set_feature_tc_num_filters); err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL, @@ -3365,6 +3397,25 @@ static int mlx5e_set_features(struct net_device *netdev, return err ? -EINVAL : 0; } +static netdev_features_t mlx5e_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mutex_lock(&priv->state_lock); + if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) { + /* HW strips the outer C-tag header, this is a problem + * for S-tag traffic. + */ + features &= ~NETIF_F_HW_VLAN_CTAG_RX; + if (!priv->channels.params.vlan_strip_disable) + netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); + } + mutex_unlock(&priv->state_lock); + + return features; +} + static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -3403,6 +3454,80 @@ out: return err; } +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return -EOPNOTSUPP; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* TX HW timestamp */ + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + /* RX HW timestamp */ + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + /* Reset CQE compression to Admin default */ + mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + /* Disable CQE compression */ + netdev_warn(priv->netdev, "Disabling cqe compression"); + err = mlx5e_modify_rx_cqe_compression_locked(priv, false); + if (err) { + netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); + mutex_unlock(&priv->state_lock); + return err; + } + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + mutex_unlock(&priv->state_lock); + return -ERANGE; + } + + memcpy(&priv->tstamp, &config, sizeof(config)); + mutex_unlock(&priv->state_lock); + + return copy_to_user(ifr->ifr_data, &config, + sizeof(config)) ? -EFAULT : 0; +} + +int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) +{ + struct hwtstamp_config *cfg = &priv->tstamp; + + if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0; +} + static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -3726,7 +3851,7 @@ static u32 mlx5e_xdp_query(struct net_device *dev) return prog_id; } -static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) +static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: @@ -3768,6 +3893,7 @@ static const struct net_device_ops mlx5e_netdev_ops = { .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, .ndo_set_features = mlx5e_set_features, + .ndo_fix_features = mlx5e_fix_features, .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, @@ -3778,7 +3904,7 @@ static const struct net_device_ops mlx5e_netdev_ops = { .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif .ndo_tx_timeout = mlx5e_tx_timeout, - .ndo_xdp = mlx5e_xdp, + .ndo_bpf = mlx5e_xdp, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx5e_netpoll, #endif @@ -3882,14 +4008,32 @@ static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw) (pci_bw <= 16000) && (pci_bw < link_speed)); } +void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + params->tx_cq_moderation.cq_period_mode = cq_period_mode; + + params->tx_cq_moderation.pkts = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + params->tx_cq_moderation.usec = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + params->tx_cq_moderation.usec = + MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE; + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER, + params->tx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); +} + void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) { - params->rx_cq_period_mode = cq_period_mode; + params->rx_cq_moderation.cq_period_mode = cq_period_mode; params->rx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; params->rx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) params->rx_cq_moderation.usec = @@ -3897,10 +4041,11 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) if (params->rx_am_enabled) params->rx_cq_moderation = - mlx5e_am_get_def_profile(params->rx_cq_period_mode); + mlx5e_am_get_def_profile(cq_period_mode); MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, - params->rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); + params->rx_cq_moderation.cq_period_mode == + MLX5_CQ_PERIOD_MODE_START_FROM_CQE); } u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) @@ -3960,16 +4105,11 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_EQE; params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, cq_period_mode); - - params->tx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - params->tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + mlx5e_set_tx_cq_mode_params(params, cq_period_mode); /* TX inline */ params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); - mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); - if (params->tx_min_inline_mode == MLX5_INLINE_MODE_NONE && - !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - params->tx_min_inline_mode = MLX5_INLINE_MODE_L2; + params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); /* RSS */ params->rss_hfunc = ETH_RSS_HASH_XOR; @@ -3989,6 +4129,7 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->netdev = netdev; priv->profile = profile; priv->ppriv = ppriv; + priv->msglevel = MLX5E_MSG_LEVEL; priv->hard_mtu = MLX5E_ETH_HARD_MTU; mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); @@ -4055,6 +4196,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; if (mlx5e_vxlan_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { netdev->hw_features |= NETIF_F_GSO_PARTIAL; @@ -4112,6 +4254,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) } netdev->features |= NETIF_F_HIGHDMA; + netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; netdev->priv_flags |= IFF_UNICAST_FLT; @@ -4269,7 +4412,9 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) if (netdev->reg_state != NETREG_REGISTERED) return; - +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_init_app(priv); +#endif /* Device already registered: sync netdev system state */ if (mlx5e_vxlan_allowed(mdev)) { rtnl_lock(); @@ -4290,6 +4435,11 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (priv->netdev->reg_state == NETREG_REGISTERED) + mlx5e_dcbnl_delete_app(priv); +#endif + rtnl_lock(); if (netif_running(priv->netdev)) mlx5e_close(priv->netdev); @@ -4510,6 +4660,9 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) goto err_detach; } +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_init_app(priv); +#endif return priv; err_detach: @@ -4526,6 +4679,9 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) struct mlx5e_priv *priv = vpriv; void *ppriv = priv->ppriv; +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_delete_app(priv); +#endif unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); mlx5e_destroy_netdev(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 45e03c427faf..2c43606c26b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -34,6 +34,7 @@ #include <linux/mlx5/fs.h> #include <net/switchdev.h> #include <net/pkt_cls.h> +#include <net/act_api.h> #include <net/netevent.h> #include <net/arp.h> @@ -658,23 +659,12 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, } static int -mlx5e_rep_setup_tc_cls_flower(struct net_device *dev, +mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *cls_flower) { - struct mlx5e_priv *priv = netdev_priv(dev); - - if (!is_classid_clsact_ingress(cls_flower->common.classid) || - cls_flower->common.chain_index) + if (cls_flower->common.chain_index) return -EOPNOTSUPP; - if (cls_flower->egress_dev) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - - dev = mlx5_eswitch_get_uplink_netdev(esw); - return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, - cls_flower); - } - switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: return mlx5e_configure_flower(priv, cls_flower); @@ -687,12 +677,48 @@ mlx5e_rep_setup_tc_cls_flower(struct net_device *dev, } } +static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct mlx5e_priv *priv = cb_priv; + + if (!tc_can_offload(priv->netdev)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return mlx5e_rep_setup_tc_cls_flower(priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int mlx5e_rep_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb, + priv, priv); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { switch (type) { - case TC_SETUP_CLSFLOWER: - return mlx5e_rep_setup_tc_cls_flower(dev, type_data); + case TC_SETUP_BLOCK: + return mlx5e_rep_setup_tc_block(dev, type_data); default: return -EOPNOTSUPP; } @@ -986,6 +1012,7 @@ mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { struct mlx5e_rep_priv *rpriv; struct net_device *netdev; + struct mlx5e_priv *upriv; int err; rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); @@ -1017,15 +1044,25 @@ mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) goto err_detach_netdev; } + upriv = netdev_priv(mlx5_eswitch_get_uplink_netdev(esw)); + err = tc_setup_cb_egdev_register(netdev, mlx5e_setup_tc_block_cb, + upriv); + if (err) + goto err_neigh_cleanup; + err = register_netdev(netdev); if (err) { pr_warn("Failed to register representor netdev for vport %d\n", rep->vport); - goto err_neigh_cleanup; + goto err_egdev_cleanup; } return 0; +err_egdev_cleanup: + tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb, + upriv); + err_neigh_cleanup: mlx5e_rep_neigh_cleanup(rpriv); @@ -1045,9 +1082,12 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; void *ppriv = priv->ppriv; + struct mlx5e_priv *upriv; unregister_netdev(rep->netdev); - + upriv = netdev_priv(mlx5_eswitch_get_uplink_netdev(esw)); + tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb, + upriv); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); mlx5e_destroy_netdev(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 91b1b0938931..5b499c7a698f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -42,10 +42,11 @@ #include "en_rep.h" #include "ipoib/ipoib.h" #include "en_accel/ipsec_rxtx.h" +#include "lib/clock.h" -static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) +static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) { - return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL; + return config->rx_filter == HWTSTAMP_FILTER_ALL; } static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc, @@ -560,7 +561,6 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); - skb->mac_len = ETH_HLEN; proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); tot_len = cqe_bcnt - network_depth; @@ -607,10 +607,11 @@ static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht); } -static inline bool is_first_ethertype_ip(struct sk_buff *skb) +static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth) { __be16 ethertype = ((struct ethhdr *)skb->data)->h_proto; + ethertype = __vlan_get_protocol(skb, ethertype, network_depth); return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6)); } @@ -620,6 +621,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, struct sk_buff *skb, bool lro) { + int network_depth = 0; + if (unlikely(!(netdev->features & NETIF_F_RXCSUM))) goto csum_none; @@ -629,9 +632,17 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } - if (is_first_ethertype_ip(skb)) { + if (is_last_ethertype_ip(skb, &network_depth)) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + if (network_depth > ETH_HLEN) + /* CQE csum is calculated from the IP header and does + * not cover VLAN headers (if present). This will add + * the checksum manually. + */ + skb->csum = csum_partial(skb->data + ETH_HLEN, + network_depth - ETH_HLEN, + skb->csum); rq->stats.csum_complete++; return; } @@ -659,9 +670,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; - struct mlx5e_tstamp *tstamp = rq->tstamp; int lro_num_seg; + skb->mac_len = ETH_HLEN; lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; if (lro_num_seg > 1) { mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); @@ -674,17 +685,20 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, rq->stats.lro_bytes += cqe_bcnt; } - if (unlikely(mlx5e_rx_hw_stamp(tstamp))) - mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); + if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp))) + skb_hwtstamps(skb)->hwtstamp = + mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe)); skb_record_rx_queue(skb, rq->ix); if (likely(netdev->features & NETIF_F_RXHASH)) mlx5e_skb_set_hash(cqe, skb); - if (cqe_has_vlan(cqe)) + if (cqe_has_vlan(cqe)) { __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->vlan_info)); + rq->stats.removed_vlan_packets++; + } skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; @@ -795,6 +809,7 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, return false; xdp.data = va + *rx_headroom; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *len; xdp.data_hard_start = va; @@ -1160,12 +1175,25 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, u32 cqe_bcnt, struct sk_buff *skb) { - struct net_device *netdev = rq->netdev; - struct mlx5e_tstamp *tstamp = rq->tstamp; + struct net_device *netdev; char *pseudo_header; + u32 qpn; u8 *dgid; u8 g; + qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff; + netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn); + + /* No mapping present, cannot process SKB. This might happen if a child + * interface is going down while having unprocessed CQEs on parent RQ + */ + if (unlikely(!netdev)) { + /* TODO: add drop counters support */ + skb->dev = NULL; + pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn); + return; + } + g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; if ((!g) || dgid[0] != 0xff) @@ -1186,8 +1214,9 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); - if (unlikely(mlx5e_rx_hw_stamp(tstamp))) - mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); + if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp))) + skb_hwtstamps(skb)->hwtstamp = + mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe)); skb_record_rx_queue(skb, rq->ix); @@ -1227,6 +1256,10 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_free_wqe; mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (unlikely(!skb->dev)) { + dev_kfree_skb_any(skb); + goto wq_free_wqe; + } napi_gro_receive(rq->cq.napi, skb); wq_free_wqe: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c index acf32fe952cd..e401d9d245f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -63,7 +63,11 @@ profile[MLX5_CQ_PERIOD_NUM_MODES][MLX5E_PARAMS_AM_NUM_PROFILES] = { static inline struct mlx5e_cq_moder mlx5e_am_get_profile(u8 cq_period_mode, int ix) { - return profile[cq_period_mode][ix]; + struct mlx5e_cq_moder cq_moder; + + cq_moder = profile[cq_period_mode][ix]; + cq_moder.cq_period_mode = cq_period_mode; + return cq_moder; } struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode) @@ -75,7 +79,7 @@ struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode) else /* MLX5_CQ_PERIOD_MODE_START_FROM_EQE */ default_profile_ix = MLX5E_RX_AM_DEF_PROFILE_EQE; - return profile[rx_cq_period_mode][default_profile_ix]; + return mlx5e_am_get_profile(rx_cq_period_mode, default_profile_ix); } /* Adaptive moderation logic */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c new file mode 100644 index 000000000000..b74ddc7984bc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -0,0 +1,899 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" +#include "en_accel/ipsec.h" + +static const struct counter_desc sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, +}; + +#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) + +static int mlx5e_grp_sw_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_SW_COUNTERS; +} + +static int mlx5e_grp_sw_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) +{ + int i; + + for (i = 0; i < NUM_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_sw_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +{ + int i; + + for (i = 0; i < NUM_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, sw_stats_desc, i); + return idx; +} + +static const struct counter_desc q_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, +}; + +#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) + +static int mlx5e_grp_q_get_num_stats(struct mlx5e_priv *priv) +{ + return priv->q_counter ? NUM_Q_COUNTERS : 0; +} + +static int mlx5e_grp_q_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) +{ + int i; + + for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_q_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +{ + int i; + + for (i = 0; i < NUM_Q_COUNTERS && priv->q_counter; i++) + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, q_stats_desc, i); + return idx; +} + +#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) +static const struct counter_desc vport_stats_desc[] = { + { "rx_vport_unicast_packets", + VPORT_COUNTER_OFF(received_eth_unicast.packets) }, + { "rx_vport_unicast_bytes", + VPORT_COUNTER_OFF(received_eth_unicast.octets) }, + { "tx_vport_unicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) }, + { "tx_vport_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) }, + { "rx_vport_multicast_packets", + VPORT_COUNTER_OFF(received_eth_multicast.packets) }, + { "rx_vport_multicast_bytes", + VPORT_COUNTER_OFF(received_eth_multicast.octets) }, + { "tx_vport_multicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) }, + { "tx_vport_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) }, + { "rx_vport_broadcast_packets", + VPORT_COUNTER_OFF(received_eth_broadcast.packets) }, + { "rx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(received_eth_broadcast.octets) }, + { "tx_vport_broadcast_packets", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, + { "tx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, + { "rx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(received_ib_unicast.packets) }, + { "rx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(received_ib_unicast.octets) }, + { "tx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) }, + { "tx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) }, + { "rx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(received_ib_multicast.packets) }, + { "rx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(received_ib_multicast.octets) }, + { "tx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) }, + { "tx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, +}; + +#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) + +static int mlx5e_grp_vport_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_VPORT_COUNTERS; +} + +static int mlx5e_grp_vport_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_vport_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_stats_desc, i); + return idx; +} + +#define PPORT_802_3_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_802_3_stats_desc[] = { + { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) }, + { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) }, + { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, + { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) }, + { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) }, + { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, + { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, + { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, + { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, + { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) }, + { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) }, + { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) }, + { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, + { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, + { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) }, + { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) }, + { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, + { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, +}; + +#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) + +static int mlx5e_grp_802_3_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PPORT_802_3_COUNTERS; +} + +static int mlx5e_grp_802_3_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_802_3_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_802_3_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, + pport_802_3_stats_desc, i); + return idx; +} + +#define PPORT_2863_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_2863_stats_desc[] = { + { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) }, + { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) }, + { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) }, +}; + +#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) + +static int mlx5e_grp_2863_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PPORT_2863_COUNTERS; +} + +static int mlx5e_grp_2863_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2863_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_2863_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, + pport_2863_stats_desc, i); + return idx; +} + +#define PPORT_2819_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_2819_stats_desc[] = { + { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) }, + { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) }, + { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) }, + { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) }, + { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, + { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, + { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, + { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, + { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, + { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, + { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, + { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, + { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, +}; + +#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) + +static int mlx5e_grp_2819_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PPORT_2819_COUNTERS; +} + +static int mlx5e_grp_2819_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_2819_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_2819_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, + pport_2819_stats_desc, i); + return idx; +} + +#define PPORT_PHY_STATISTICAL_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.phys_layer_statistical_cntrs.c##_high) +static const struct counter_desc pport_phy_statistical_stats_desc[] = { + { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, + { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, +}; + +#define NUM_PPORT_PHY_COUNTERS ARRAY_SIZE(pport_phy_statistical_stats_desc) + +static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv) +{ + return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ? + NUM_PPORT_PHY_COUNTERS : 0; +} + +static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) + for (i = 0; i < NUM_PPORT_PHY_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_phy_statistical_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) + for (i = 0; i < NUM_PPORT_PHY_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_stats_desc, i); + return idx; +} + +#define PPORT_ETH_EXT_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pport_eth_ext_stats_desc[] = { + { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) }, +}; + +#define NUM_PPORT_ETH_EXT_COUNTERS ARRAY_SIZE(pport_eth_ext_stats_desc) + +static int mlx5e_grp_eth_ext_get_num_stats(struct mlx5e_priv *priv) +{ + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + return NUM_PPORT_ETH_EXT_COUNTERS; + + return 0; +} + +static int mlx5e_grp_eth_ext_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_eth_ext_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_eth_ext_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + if (MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters, + pport_eth_ext_stats_desc, i); + return idx; +} + +#define PCIE_PERF_OFF(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) +static const struct counter_desc pcie_perf_stats_desc[] = { + { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) }, + { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, +}; + +#define PCIE_PERF_OFF64(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) +static const struct counter_desc pcie_perf_stats_desc64[] = { + { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) }, +}; + +static const struct counter_desc pcie_perf_stall_stats_desc[] = { + { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) }, + { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) }, + { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) }, + { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) }, +}; + +#define NUM_PCIE_PERF_COUNTERS ARRAY_SIZE(pcie_perf_stats_desc) +#define NUM_PCIE_PERF_COUNTERS64 ARRAY_SIZE(pcie_perf_stats_desc64) +#define NUM_PCIE_PERF_STALL_COUNTERS ARRAY_SIZE(pcie_perf_stall_stats_desc) + +static int mlx5e_grp_pcie_get_num_stats(struct mlx5e_priv *priv) +{ + int num_stats = 0; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + num_stats += NUM_PCIE_PERF_COUNTERS; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + num_stats += NUM_PCIE_PERF_COUNTERS64; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + num_stats += NUM_PCIE_PERF_STALL_COUNTERS; + + return num_stats; +} + +static int mlx5e_grp_pcie_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc[i].format); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc64[i].format); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stall_stats_desc[i].format); + return idx; +} + +static int mlx5e_grp_pcie_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + int i; + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc, i); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc64, i); + + if (MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stall_stats_desc, i); + return idx; +} + +#define PPORT_PER_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { + { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, + { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, + { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) }, + { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) }, +}; + +#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS ARRAY_SIZE(pport_per_prio_traffic_stats_desc) + +static int mlx5e_grp_per_prio_traffic_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * NUM_PPORT_PRIO; +} + +static int mlx5e_grp_per_prio_traffic_fill_strings(struct mlx5e_priv *priv, + u8 *data, + int idx) +{ + int i, prio; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_traffic_stats_desc[i].format, prio); + } + + return idx; +} + +static int mlx5e_grp_per_prio_traffic_fill_stats(struct mlx5e_priv *priv, + u64 *data, + int idx) +{ + int i, prio; + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_traffic_stats_desc, i); + } + + return idx; +} + +static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { + /* %s is "global" or "prio{i}" */ + { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, +}; + +#define NUM_PPORT_PER_PRIO_PFC_COUNTERS ARRAY_SIZE(pport_per_prio_pfc_stats_desc) + +static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 pfc_en_tx; + u8 pfc_en_rx; + int err; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx); + + return err ? 0 : pfc_en_tx | pfc_en_rx; +} + +static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 rx_pause; + u32 tx_pause; + int err; + + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return false; + + err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + + return err ? false : rx_pause | tx_pause; +} + +static int mlx5e_grp_per_prio_pfc_get_num_stats(struct mlx5e_priv *priv) +{ + return (mlx5e_query_global_pause_combined(priv) + + hweight8(mlx5e_query_pfc_combined(priv))) * + NUM_PPORT_PER_PRIO_PFC_COUNTERS; +} + +static int mlx5e_grp_per_prio_pfc_fill_strings(struct mlx5e_priv *priv, + u8 *data, + int idx) +{ + unsigned long pfc_combined; + int i, prio; + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + char pfc_string[ETH_GSTRING_LEN]; + + snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, pfc_string); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, "global"); + } + } + + return idx; +} + +static int mlx5e_grp_per_prio_pfc_fill_stats(struct mlx5e_priv *priv, + u64 *data, + int idx) +{ + unsigned long pfc_combined; + int i, prio; + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_pfc_stats_desc, i); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], + pport_per_prio_pfc_stats_desc, i); + } + } + + return idx; +} + +static const struct counter_desc mlx5e_pme_status_desc[] = { + { "module_unplug", 8 }, +}; + +static const struct counter_desc mlx5e_pme_error_desc[] = { + { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */ + { "module_high_temp", 48 }, /* high temperature */ + { "module_bad_shorted", 56 }, /* bad or shorted cable/module */ +}; + +#define NUM_PME_STATUS_STATS ARRAY_SIZE(mlx5e_pme_status_desc) +#define NUM_PME_ERR_STATS ARRAY_SIZE(mlx5e_pme_error_desc) + +static int mlx5e_grp_pme_get_num_stats(struct mlx5e_priv *priv) +{ + return NUM_PME_STATUS_STATS + NUM_PME_ERR_STATS; +} + +static int mlx5e_grp_pme_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i; + + for (i = 0; i < NUM_PME_STATUS_STATS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format); + + for (i = 0; i < NUM_PME_ERR_STATS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format); + + return idx; +} + +static int mlx5e_grp_pme_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + struct mlx5_priv *mlx5_priv = &priv->mdev->priv; + int i; + + for (i = 0; i < NUM_PME_STATUS_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters, + mlx5e_pme_status_desc, i); + + for (i = 0; i < NUM_PME_ERR_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters, + mlx5e_pme_error_desc, i); + + return idx; +} + +static int mlx5e_grp_ipsec_get_num_stats(struct mlx5e_priv *priv) +{ + return mlx5e_ipsec_get_count(priv); +} + +static int mlx5e_grp_ipsec_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + return idx + mlx5e_ipsec_get_strings(priv, + data + idx * ETH_GSTRING_LEN); +} + +static int mlx5e_grp_ipsec_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + return idx + mlx5e_ipsec_get_stats(priv, data + idx); +} + +static const struct counter_desc rq_stats_desc[] = { + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, +}; + +static const struct counter_desc sq_stats_desc[] = { + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, +}; + +#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) +#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) + +static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) +{ + return (NUM_RQ_STATS * priv->channels.num) + + (NUM_SQ_STATS * priv->channels.num * priv->channels.params.num_tc); +} + +static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, + int idx) +{ + int i, j, tc; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return idx; + + for (i = 0; i < priv->channels.num; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i); + + for (tc = 0; tc < priv->channels.params.num_tc; tc++) + for (i = 0; i < priv->channels.num; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + sq_stats_desc[j].format, + priv->channel_tc2txq[i][tc]); + + return idx; +} + +static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, + int idx) +{ + struct mlx5e_channels *channels = &priv->channels; + int i, j, tc; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return idx; + + for (i = 0; i < channels->num; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&channels->c[i]->rq.stats, + rq_stats_desc, j); + + for (tc = 0; tc < priv->channels.params.num_tc; tc++) + for (i = 0; i < channels->num; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&channels->c[i]->sq[tc].stats, + sq_stats_desc, j); + + return idx; +} + +const struct mlx5e_stats_grp mlx5e_stats_grps[] = { + { + .get_num_stats = mlx5e_grp_sw_get_num_stats, + .fill_strings = mlx5e_grp_sw_fill_strings, + .fill_stats = mlx5e_grp_sw_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_q_get_num_stats, + .fill_strings = mlx5e_grp_q_fill_strings, + .fill_stats = mlx5e_grp_q_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_vport_get_num_stats, + .fill_strings = mlx5e_grp_vport_fill_strings, + .fill_stats = mlx5e_grp_vport_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_802_3_get_num_stats, + .fill_strings = mlx5e_grp_802_3_fill_strings, + .fill_stats = mlx5e_grp_802_3_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_2863_get_num_stats, + .fill_strings = mlx5e_grp_2863_fill_strings, + .fill_stats = mlx5e_grp_2863_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_2819_get_num_stats, + .fill_strings = mlx5e_grp_2819_fill_strings, + .fill_stats = mlx5e_grp_2819_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_phy_get_num_stats, + .fill_strings = mlx5e_grp_phy_fill_strings, + .fill_stats = mlx5e_grp_phy_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_eth_ext_get_num_stats, + .fill_strings = mlx5e_grp_eth_ext_fill_strings, + .fill_stats = mlx5e_grp_eth_ext_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_pcie_get_num_stats, + .fill_strings = mlx5e_grp_pcie_fill_strings, + .fill_stats = mlx5e_grp_pcie_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_per_prio_traffic_get_num_stats, + .fill_strings = mlx5e_grp_per_prio_traffic_fill_strings, + .fill_stats = mlx5e_grp_per_prio_traffic_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_per_prio_pfc_get_num_stats, + .fill_strings = mlx5e_grp_per_prio_pfc_fill_strings, + .fill_stats = mlx5e_grp_per_prio_pfc_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_pme_get_num_stats, + .fill_strings = mlx5e_grp_pme_fill_strings, + .fill_stats = mlx5e_grp_pme_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_ipsec_get_num_stats, + .fill_strings = mlx5e_grp_ipsec_fill_strings, + .fill_stats = mlx5e_grp_ipsec_fill_stats, + }, + { + .get_num_stats = mlx5e_grp_channels_get_num_stats, + .fill_strings = mlx5e_grp_channels_fill_strings, + .fill_stats = mlx5e_grp_channels_fill_stats, + } +}; + +const int mlx5e_num_stats_grps = ARRAY_SIZE(mlx5e_stats_grps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index f8637213afc0..d679e21f686e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -59,8 +59,10 @@ struct mlx5e_sw_stats { u64 tx_tso_bytes; u64 tx_tso_inner_packets; u64 tx_tso_inner_bytes; + u64 tx_added_vlan_packets; u64 rx_lro_packets; u64 rx_lro_bytes; + u64 rx_removed_vlan_packets; u64 rx_csum_unnecessary; u64 rx_csum_none; u64 rx_csum_complete; @@ -91,54 +93,10 @@ struct mlx5e_sw_stats { u64 link_down_events_phy; }; -static const struct counter_desc sw_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_none) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_page_reuse) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, -}; - struct mlx5e_qcounter_stats { u32 rx_out_of_buffer; }; -static const struct counter_desc q_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, -}; - -#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) #define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \ vstats->query_vport_out, c) @@ -146,83 +104,22 @@ struct mlx5e_vport_stats { __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)]; }; -static const struct counter_desc vport_stats_desc[] = { - { "rx_vport_unicast_packets", - VPORT_COUNTER_OFF(received_eth_unicast.packets) }, - { "rx_vport_unicast_bytes", - VPORT_COUNTER_OFF(received_eth_unicast.octets) }, - { "tx_vport_unicast_packets", - VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) }, - { "tx_vport_unicast_bytes", - VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) }, - { "rx_vport_multicast_packets", - VPORT_COUNTER_OFF(received_eth_multicast.packets) }, - { "rx_vport_multicast_bytes", - VPORT_COUNTER_OFF(received_eth_multicast.octets) }, - { "tx_vport_multicast_packets", - VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) }, - { "tx_vport_multicast_bytes", - VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) }, - { "rx_vport_broadcast_packets", - VPORT_COUNTER_OFF(received_eth_broadcast.packets) }, - { "rx_vport_broadcast_bytes", - VPORT_COUNTER_OFF(received_eth_broadcast.octets) }, - { "tx_vport_broadcast_packets", - VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, - { "tx_vport_broadcast_bytes", - VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, - { "rx_vport_rdma_unicast_packets", - VPORT_COUNTER_OFF(received_ib_unicast.packets) }, - { "rx_vport_rdma_unicast_bytes", - VPORT_COUNTER_OFF(received_ib_unicast.octets) }, - { "tx_vport_rdma_unicast_packets", - VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) }, - { "tx_vport_rdma_unicast_bytes", - VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) }, - { "rx_vport_rdma_multicast_packets", - VPORT_COUNTER_OFF(received_ib_multicast.packets) }, - { "rx_vport_rdma_multicast_bytes", - VPORT_COUNTER_OFF(received_ib_multicast.octets) }, - { "tx_vport_rdma_multicast_packets", - VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) }, - { "tx_vport_rdma_multicast_bytes", - VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, -}; - -#define PPORT_802_3_OFF(c) \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) #define PPORT_802_3_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \ counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) -#define PPORT_2863_OFF(c) \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_2863_cntrs_grp_data_layout.c##_high) #define PPORT_2863_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \ counter_set.eth_2863_cntrs_grp_data_layout.c##_high) -#define PPORT_2819_OFF(c) \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_2819_cntrs_grp_data_layout.c##_high) #define PPORT_2819_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \ counter_set.eth_2819_cntrs_grp_data_layout.c##_high) -#define PPORT_PHY_STATISTICAL_OFF(c) \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.phys_layer_statistical_cntrs.c##_high) #define PPORT_PHY_STATISTICAL_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \ counter_set.phys_layer_statistical_cntrs.c##_high) -#define PPORT_PER_PRIO_OFF(c) \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_per_prio_grp_data_layout.c##_high) #define PPORT_PER_PRIO_GET(pstats, prio, c) \ MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ counter_set.eth_per_prio_grp_data_layout.c##_high) #define NUM_PPORT_PRIO 8 -#define PPORT_ETH_EXT_OFF(c) \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_extended_cntrs_grp_data_layout.c##_high) #define PPORT_ETH_EXT_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \ counter_set.eth_extended_cntrs_grp_data_layout.c##_high) @@ -237,82 +134,10 @@ struct mlx5e_pport_stats { __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; -static const struct counter_desc pport_802_3_stats_desc[] = { - { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) }, - { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) }, - { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, - { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) }, - { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) }, - { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, - { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, - { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, - { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, - { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) }, - { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) }, - { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) }, - { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, - { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, - { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) }, - { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) }, - { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, - { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, -}; - -static const struct counter_desc pport_2863_stats_desc[] = { - { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) }, - { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) }, - { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) }, -}; - -static const struct counter_desc pport_2819_stats_desc[] = { - { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) }, - { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) }, - { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) }, - { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) }, - { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, - { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, - { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, - { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, - { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, - { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, - { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, - { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, - { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, -}; - -static const struct counter_desc pport_phy_statistical_stats_desc[] = { - { "rx_pcs_symbol_err_phy", PPORT_PHY_STATISTICAL_OFF(phy_symbol_errors) }, - { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, -}; - -static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { - { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, - { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, - { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) }, - { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) }, -}; - -static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { - /* %s is "global" or "prio{i}" */ - { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) }, - { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, - { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) }, - { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, - { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, -}; - -static const struct counter_desc pport_eth_ext_stats_desc[] = { - { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) }, -}; - -#define PCIE_PERF_OFF(c) \ - MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) #define PCIE_PERF_GET(pcie_stats, c) \ MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ counter_set.pcie_perf_cntrs_grp_data_layout.c) -#define PCIE_PERF_OFF64(c) \ - MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) #define PCIE_PERF_GET64(pcie_stats, c) \ MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) @@ -321,22 +146,6 @@ struct mlx5e_pcie_stats { __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; }; -static const struct counter_desc pcie_perf_stats_desc[] = { - { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) }, - { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, -}; - -static const struct counter_desc pcie_perf_stats_desc64[] = { - { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) }, -}; - -static const struct counter_desc pcie_perf_stall_stats_desc[] = { - { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) }, - { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) }, - { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) }, - { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) }, -}; - struct mlx5e_rq_stats { u64 packets; u64 bytes; @@ -346,6 +155,7 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 removed_vlan_packets; u64 xdp_drop; u64 xdp_tx; u64 xdp_tx_full; @@ -362,31 +172,6 @@ struct mlx5e_rq_stats { u64 cache_waive; }; -static const struct counter_desc rq_stats_desc[] = { - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, page_reuse) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, -}; - struct mlx5e_sq_stats { /* commonly accessed in data path */ u64 packets; @@ -398,6 +183,7 @@ struct mlx5e_sq_stats { u64 tso_inner_bytes; u64 csum_partial; u64 csum_partial_inner; + u64 added_vlan_packets; u64 nop; /* less likely accessed in data path */ u64 csum_none; @@ -406,61 +192,6 @@ struct mlx5e_sq_stats { u64 dropped; }; -static const struct counter_desc sq_stats_desc[] = { - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, - { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, -}; - -#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) -#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) -#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) -#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) -#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) -#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) -#define NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) \ - (ARRAY_SIZE(pport_phy_statistical_stats_desc) * \ - MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) -#define NUM_PCIE_PERF_COUNTERS(priv) \ - (ARRAY_SIZE(pcie_perf_stats_desc) * \ - MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) -#define NUM_PCIE_PERF_COUNTERS64(priv) \ - (ARRAY_SIZE(pcie_perf_stats_desc64) * \ - MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) -#define NUM_PCIE_PERF_STALL_COUNTERS(priv) \ - (ARRAY_SIZE(pcie_perf_stall_stats_desc) * \ - MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) -#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ - ARRAY_SIZE(pport_per_prio_traffic_stats_desc) -#define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ - ARRAY_SIZE(pport_per_prio_pfc_stats_desc) -#define NUM_PPORT_ETH_EXT_COUNTERS(priv) \ - (ARRAY_SIZE(pport_eth_ext_stats_desc) * \ - MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) -#define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_802_3_COUNTERS + \ - NUM_PPORT_2863_COUNTERS + \ - NUM_PPORT_2819_COUNTERS + \ - NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \ - NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ - NUM_PPORT_PRIO + \ - NUM_PPORT_ETH_EXT_COUNTERS(priv)) -#define NUM_PCIE_COUNTERS(priv) (NUM_PCIE_PERF_COUNTERS(priv) + \ - NUM_PCIE_PERF_COUNTERS64(priv) +\ - NUM_PCIE_PERF_STALL_COUNTERS(priv)) -#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) -#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) - struct mlx5e_stats { struct mlx5e_sw_stats sw; struct mlx5e_qcounter_stats qcnt; @@ -470,14 +201,14 @@ struct mlx5e_stats { struct mlx5e_pcie_stats pcie; }; -static const struct counter_desc mlx5e_pme_status_desc[] = { - { "module_unplug", 8 }, +struct mlx5e_priv; +struct mlx5e_stats_grp { + int (*get_num_stats)(struct mlx5e_priv *priv); + int (*fill_strings)(struct mlx5e_priv *priv, u8 *data, int idx); + int (*fill_stats)(struct mlx5e_priv *priv, u64 *data, int idx); }; -static const struct counter_desc mlx5e_pme_error_desc[] = { - { "module_bus_stuck", 16 }, /* bus stuck (I2C or data shorted) */ - { "module_high_temp", 48 }, /* high temperature */ - { "module_bad_shorted", 56 }, /* bad or shorted cable/module */ -}; +extern const struct mlx5e_stats_grp mlx5e_stats_grps[]; +extern const int mlx5e_num_stats_grps; #endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9ba1f72060aa..55979ec2e88a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -90,8 +90,8 @@ enum { MLX5_HEADER_TYPE_NVGRE = 0x1, }; -#define MLX5E_TC_TABLE_NUM_ENTRIES 1024 #define MLX5E_TC_TABLE_NUM_GROUPS 4 +#define MLX5E_TC_TABLE_MAX_GROUP_SIZE (1 << 16) struct mod_hdr_key { int num_actions; @@ -263,10 +263,21 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } if (IS_ERR_OR_NULL(priv->fs.tc.t)) { + int tc_grp_size, tc_tbl_size; + u32 max_flow_counter; + + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + + tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE); + + tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS, + BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size))); + priv->fs.tc.t = mlx5_create_auto_grouped_flow_table(priv->fs.ns, MLX5E_TC_PRIO, - MLX5E_TC_TABLE_NUM_ENTRIES, + tc_tbl_size, MLX5E_TC_TABLE_NUM_GROUPS, 0, 0); if (IS_ERR(priv->fs.tc.t)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 1d6925d4369a..569b42a01026 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -32,9 +32,11 @@ #include <linux/tcp.h> #include <linux/if_vlan.h> +#include <net/dsfield.h> #include "en.h" #include "ipoib/ipoib.h" #include "en_accel/ipsec_rxtx.h" +#include "lib/clock.h" #define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ @@ -85,6 +87,20 @@ static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) } } +#ifdef CONFIG_MLX5_CORE_EN_DCB +static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb) +{ + int dscp_cp = 0; + + if (skb->protocol == htons(ETH_P_IP)) + dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + else if (skb->protocol == htons(ETH_P_IPV6)) + dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; + + return priv->dcbx_dp.dscp2prio[dscp_cp]; +} +#endif + u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { @@ -96,8 +112,13 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, if (!netdev_get_num_tc(dev)) return channel_ix; - if (skb_vlan_tag_present(skb)) - up = skb->vlan_tci >> VLAN_PRIO_SHIFT; +#ifdef CONFIG_MLX5_CORE_EN_DCB + if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP) + up = mlx5e_get_dscp_up(priv, skb); + else +#endif + if (skb_vlan_tag_present(skb)) + up = skb->vlan_tci >> VLAN_PRIO_SHIFT; /* channel_ix can be larger than num_channels since * dev->num_real_tx_queues = num_channels * num_tc @@ -340,6 +361,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, if (skb_vlan_tag_present(skb)) { mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); ihs += VLAN_HLEN; + sq->stats.added_vlan_packets++; } else { memcpy(eseg->inline_hdr.start, skb_data, ihs); mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); @@ -348,7 +370,10 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); } else if (skb_vlan_tag_present(skb)) { eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); + if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) + eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); + sq->stats.added_vlan_packets++; } headlen = skb_len - skb->data_len; @@ -452,8 +477,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) SKBTX_HW_TSTAMP)) { struct skb_shared_hwtstamps hwts = {}; - mlx5e_fill_hwstamp(sq->tstamp, - get_cqe_ts(cqe), &hwts); + hwts.hwtstamp = + mlx5_timecounter_cyc2time(sq->clock, + get_cqe_ts(cqe)); skb_tstamp_tx(skb, &hwts); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index fc606bfd1d6e..60771865c99c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -491,8 +491,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) break; case MLX5_EVENT_TYPE_PPS_EVENT: - if (dev->event) - dev->event(dev, MLX5_DEV_EVENT_PPS, (unsigned long)eqe); + mlx5_pps_event(dev, eqe); break; case MLX5_EVENT_TYPE_FPGA_ERROR: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index c77f4c0c7769..bbb140f517c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -157,7 +157,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, MLX5_MATCH_OUTER_HEADERS); struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_spec *spec; void *mv_misc = NULL; void *mc_misc = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d9fd8570b07c..1143d80119bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -306,7 +306,7 @@ static struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; void *misc; @@ -395,7 +395,7 @@ out_err: static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_spec *spec; int err = 0; @@ -670,7 +670,7 @@ struct mlx5_flow_handle * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) { struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; void *misc; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 36ecc2b2e187..881e2e55840c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -40,7 +40,8 @@ #include "eswitch.h" int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, u32 underlay_qpn) + struct mlx5_flow_table *ft, u32 underlay_qpn, + bool disconnect) { u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; @@ -52,7 +53,15 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, MLX5_SET(set_flow_table_root_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); - MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + + if (disconnect) { + MLX5_SET(set_flow_table_root_in, in, op_mod, 1); + MLX5_SET(set_flow_table_root_in, in, table_id, 0); + } else { + MLX5_SET(set_flow_table_root_in, in, op_mod, 0); + MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + } + MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn); if (ft->vport) { MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index c6d7bdf255b6..71e2d0f37ad9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -71,8 +71,8 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, unsigned int index); int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft, - u32 underlay_qpn); + struct mlx5_flow_table *ft, u32 underlay_qpn, + bool disconnect); int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 5a7bea688ec8..c70fd663a633 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -145,10 +145,10 @@ static struct init_tree_node { } }; -enum fs_i_mutex_lock_class { - FS_MUTEX_GRANDPARENT, - FS_MUTEX_PARENT, - FS_MUTEX_CHILD +enum fs_i_lock_class { + FS_LOCK_GRANDPARENT, + FS_LOCK_PARENT, + FS_LOCK_CHILD }; static const struct rhashtable_params rhash_fte = { @@ -168,10 +168,16 @@ static const struct rhashtable_params rhash_fg = { }; -static void del_rule(struct fs_node *node); -static void del_flow_table(struct fs_node *node); -static void del_flow_group(struct fs_node *node); -static void del_fte(struct fs_node *node); +static void del_hw_flow_table(struct fs_node *node); +static void del_hw_flow_group(struct fs_node *node); +static void del_hw_fte(struct fs_node *node); +static void del_sw_flow_table(struct fs_node *node); +static void del_sw_flow_group(struct fs_node *node); +static void del_sw_fte(struct fs_node *node); +/* Delete rule (destination) is special case that + * requires to lock the FTE for all the deletion process. + */ +static void del_sw_hw_rule(struct fs_node *node); static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, struct mlx5_flow_destination *d2); static struct mlx5_flow_rule * @@ -179,20 +185,22 @@ find_flow_rule(struct fs_fte *fte, struct mlx5_flow_destination *dest); static void tree_init_node(struct fs_node *node, - unsigned int refcount, - void (*remove_func)(struct fs_node *)) + void (*del_hw_func)(struct fs_node *), + void (*del_sw_func)(struct fs_node *)) { - atomic_set(&node->refcount, refcount); + refcount_set(&node->refcount, 1); INIT_LIST_HEAD(&node->list); INIT_LIST_HEAD(&node->children); - mutex_init(&node->lock); - node->remove_func = remove_func; + init_rwsem(&node->lock); + node->del_hw_func = del_hw_func; + node->del_sw_func = del_sw_func; + node->active = false; } static void tree_add_node(struct fs_node *node, struct fs_node *parent) { if (parent) - atomic_inc(&parent->refcount); + refcount_inc(&parent->refcount); node->parent = parent; /* Parent is the root */ @@ -202,58 +210,78 @@ static void tree_add_node(struct fs_node *node, struct fs_node *parent) node->root = parent->root; } -static void tree_get_node(struct fs_node *node) +static int tree_get_node(struct fs_node *node) { - atomic_inc(&node->refcount); + return refcount_inc_not_zero(&node->refcount); } -static void nested_lock_ref_node(struct fs_node *node, - enum fs_i_mutex_lock_class class) +static void nested_down_read_ref_node(struct fs_node *node, + enum fs_i_lock_class class) { if (node) { - mutex_lock_nested(&node->lock, class); - atomic_inc(&node->refcount); + down_read_nested(&node->lock, class); + refcount_inc(&node->refcount); } } -static void lock_ref_node(struct fs_node *node) +static void nested_down_write_ref_node(struct fs_node *node, + enum fs_i_lock_class class) { if (node) { - mutex_lock(&node->lock); - atomic_inc(&node->refcount); + down_write_nested(&node->lock, class); + refcount_inc(&node->refcount); } } -static void unlock_ref_node(struct fs_node *node) +static void down_write_ref_node(struct fs_node *node) { if (node) { - atomic_dec(&node->refcount); - mutex_unlock(&node->lock); + down_write(&node->lock); + refcount_inc(&node->refcount); } } +static void up_read_ref_node(struct fs_node *node) +{ + refcount_dec(&node->refcount); + up_read(&node->lock); +} + +static void up_write_ref_node(struct fs_node *node) +{ + refcount_dec(&node->refcount); + up_write(&node->lock); +} + static void tree_put_node(struct fs_node *node) { struct fs_node *parent_node = node->parent; - lock_ref_node(parent_node); - if (atomic_dec_and_test(&node->refcount)) { - if (parent_node) + if (refcount_dec_and_test(&node->refcount)) { + if (node->del_hw_func) + node->del_hw_func(node); + if (parent_node) { + /* Only root namespace doesn't have parent and we just + * need to free its node. + */ + down_write_ref_node(parent_node); list_del_init(&node->list); - if (node->remove_func) - node->remove_func(node); - kfree(node); + if (node->del_sw_func) + node->del_sw_func(node); + up_write_ref_node(parent_node); + } else { + kfree(node); + } node = NULL; } - unlock_ref_node(parent_node); if (!node && parent_node) tree_put_node(parent_node); } static int tree_remove_node(struct fs_node *node) { - if (atomic_read(&node->refcount) > 1) { - atomic_dec(&node->refcount); + if (refcount_read(&node->refcount) > 1) { + refcount_dec(&node->refcount); return -EEXIST; } tree_put_node(node); @@ -362,6 +390,15 @@ static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) return container_of(ns, struct mlx5_flow_root_namespace, ns); } +static inline struct mlx5_flow_steering *get_steering(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev->priv.steering; + return NULL; +} + static inline struct mlx5_core_dev *get_dev(struct fs_node *node) { struct mlx5_flow_root_namespace *root = find_root(node); @@ -371,26 +408,36 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node) return NULL; } -static void del_flow_table(struct fs_node *node) +static void del_hw_flow_table(struct fs_node *node) { struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; - struct fs_prio *prio; int err; fs_get_obj(ft, node); dev = get_dev(&ft->node); - err = mlx5_cmd_destroy_flow_table(dev, ft); - if (err) - mlx5_core_warn(dev, "flow steering can't destroy ft\n"); - ida_destroy(&ft->fte_allocator); + if (node->active) { + err = mlx5_cmd_destroy_flow_table(dev, ft); + if (err) + mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + } +} + +static void del_sw_flow_table(struct fs_node *node) +{ + struct mlx5_flow_table *ft; + struct fs_prio *prio; + + fs_get_obj(ft, node); + rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; + kfree(ft); } -static void del_rule(struct fs_node *node) +static void del_sw_hw_rule(struct fs_node *node) { struct mlx5_flow_rule *rule; struct mlx5_flow_table *ft; @@ -406,7 +453,6 @@ static void del_rule(struct fs_node *node) fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); trace_mlx5_fs_del_rule(rule); - list_del(&rule->node.list); if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { mutex_lock(&rule->dest_attr.ft->lock); list_del(&rule->next_ft); @@ -434,117 +480,203 @@ out: "%s can't del rule fg id=%d fte_index=%d\n", __func__, fg->id, fte->index); } + kfree(rule); } -static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg) +static void del_hw_fte(struct fs_node *node) { struct mlx5_flow_table *ft; - int ret; + struct mlx5_flow_group *fg; + struct mlx5_core_dev *dev; + struct fs_fte *fte; + int err; - ret = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, rhash_fte); - WARN_ON(ret); - fte->status = 0; + fs_get_obj(fte, node); + fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); - ida_simple_remove(&ft->fte_allocator, fte->index); + + trace_mlx5_fs_del_fte(fte); + dev = get_dev(&ft->node); + if (node->active) { + err = mlx5_cmd_delete_fte(dev, ft, + fte->index); + if (err) + mlx5_core_warn(dev, + "flow steering can't delete fte in index %d of flow group id %d\n", + fte->index, fg->id); + } } -static void del_fte(struct fs_node *node) +static void del_sw_fte(struct fs_node *node) { - struct mlx5_flow_table *ft; + struct mlx5_flow_steering *steering = get_steering(node); struct mlx5_flow_group *fg; - struct mlx5_core_dev *dev; struct fs_fte *fte; int err; fs_get_obj(fte, node); fs_get_obj(fg, fte->node.parent); - fs_get_obj(ft, fg->node.parent); - trace_mlx5_fs_del_fte(fte); - dev = get_dev(&ft->node); - err = mlx5_cmd_delete_fte(dev, ft, - fte->index); - if (err) - mlx5_core_warn(dev, - "flow steering can't delete fte in index %d of flow group id %d\n", - fte->index, fg->id); - - destroy_fte(fte, fg); + err = rhashtable_remove_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + WARN_ON(err); + ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); + kmem_cache_free(steering->ftes_cache, fte); } -static void del_flow_group(struct fs_node *node) +static void del_hw_flow_group(struct fs_node *node) { struct mlx5_flow_group *fg; struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; - int err; fs_get_obj(fg, node); fs_get_obj(ft, fg->node.parent); dev = get_dev(&ft->node); trace_mlx5_fs_del_fg(fg); - if (ft->autogroup.active) - ft->autogroup.num_groups--; + if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) + mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", + fg->id, ft->id); +} + +static void del_sw_flow_group(struct fs_node *node) +{ + struct mlx5_flow_steering *steering = get_steering(node); + struct mlx5_flow_group *fg; + struct mlx5_flow_table *ft; + int err; + + fs_get_obj(fg, node); + fs_get_obj(ft, fg->node.parent); rhashtable_destroy(&fg->ftes_hash); + ida_destroy(&fg->fte_allocator); + if (ft->autogroup.active) + ft->autogroup.num_groups--; err = rhltable_remove(&ft->fgs_hash, &fg->hash, rhash_fg); WARN_ON(err); - if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) - mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", - fg->id, ft->id); + kmem_cache_free(steering->fgs_cache, fg); +} + +static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) +{ + int index; + int ret; + + index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL); + if (index < 0) + return index; + + fte->index = index + fg->start_index; + ret = rhashtable_insert_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + if (ret) + goto err_ida_remove; + + tree_add_node(&fte->node, &fg->node); + list_add_tail(&fte->node.list, &fg->node.children); + return 0; + +err_ida_remove: + ida_simple_remove(&fg->fte_allocator, index); + return ret; } -static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, +static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, u32 *match_value, - unsigned int index) + struct mlx5_flow_act *flow_act) { + struct mlx5_flow_steering *steering = get_steering(&ft->node); struct fs_fte *fte; - fte = kzalloc(sizeof(*fte), GFP_KERNEL); + fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL); if (!fte) return ERR_PTR(-ENOMEM); memcpy(fte->val, match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; fte->flow_tag = flow_act->flow_tag; - fte->index = index; fte->action = flow_act->action; fte->encap_id = flow_act->encap_id; fte->modify_id = flow_act->modify_id; + tree_init_node(&fte->node, del_hw_fte, del_sw_fte); + return fte; } -static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) +static void dealloc_flow_group(struct mlx5_flow_steering *steering, + struct mlx5_flow_group *fg) +{ + rhashtable_destroy(&fg->ftes_hash); + kmem_cache_free(steering->fgs_cache, fg); +} + +static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering, + u8 match_criteria_enable, + void *match_criteria, + int start_index, + int end_index) { struct mlx5_flow_group *fg; - void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, - create_fg_in, match_criteria); - u8 match_criteria_enable = MLX5_GET(create_flow_group_in, - create_fg_in, - match_criteria_enable); int ret; - fg = kzalloc(sizeof(*fg), GFP_KERNEL); + fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL); if (!fg) return ERR_PTR(-ENOMEM); ret = rhashtable_init(&fg->ftes_hash, &rhash_fte); if (ret) { - kfree(fg); + kmem_cache_free(steering->fgs_cache, fg); return ERR_PTR(ret); - } +} + ida_init(&fg->fte_allocator); fg->mask.match_criteria_enable = match_criteria_enable; memcpy(&fg->mask.match_criteria, match_criteria, sizeof(fg->mask.match_criteria)); fg->node.type = FS_TYPE_FLOW_GROUP; - fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in, - start_flow_index); - fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in, - end_flow_index) - fg->start_index + 1; + fg->start_index = start_index; + fg->max_ftes = end_index - start_index + 1; + + return fg; +} + +static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + void *match_criteria, + int start_index, + int end_index, + struct list_head *prev) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_group *fg; + int ret; + + fg = alloc_flow_group(steering, match_criteria_enable, match_criteria, + start_index, end_index); + if (IS_ERR(fg)) + return fg; + + /* initialize refcnt, add to parent list */ + ret = rhltable_insert(&ft->fgs_hash, + &fg->hash, + rhash_fg); + if (ret) { + dealloc_flow_group(steering, fg); + return ERR_PTR(ret); + } + + tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group); + tree_add_node(&fg->node, &ft->node); + /* Add node to group list */ + list_add(&fg->node.list, prev); + atomic_inc(&ft->node.version); + return fg; } @@ -575,7 +707,6 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->flags = flags; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); - ida_init(&ft->fte_allocator); return ft; } @@ -693,8 +824,10 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio *prio) { struct mlx5_flow_root_namespace *root = find_root(&prio->node); + struct mlx5_ft_underlay_qp *uqp; int min_level = INT_MAX; int err; + u32 qpn; if (root->root_ft) min_level = root->root_ft->level; @@ -702,10 +835,24 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio if (ft->level >= min_level) return 0; - err = mlx5_cmd_update_root_ft(root->dev, ft, root->underlay_qpn); + if (list_empty(&root->underlay_qpns)) { + /* Don't set any QPN (zero) in case QPN list is empty */ + qpn = 0; + err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, false); + } else { + list_for_each_entry(uqp, &root->underlay_qpns, list) { + qpn = uqp->qpn; + err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, + false); + if (err) + break; + } + } + if (err) - mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", - ft->id); + mlx5_core_warn(root->dev, + "Update root flow table of id(%u) qpn(%d) failed\n", + ft->id, qpn); else root->root_ft = ft; @@ -724,7 +871,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, fs_get_obj(fte, rule->node.parent); if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return -EINVAL; - lock_ref_node(&fte->node); + down_write_ref_node(&fte->node); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); @@ -733,7 +880,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, ft, fg->id, modify_mask, fte); - unlock_ref_node(&fte->node); + up_write_ref_node(&fte->node); return err; } @@ -765,7 +912,7 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev, struct mlx5_flow_table *new_next_ft, struct mlx5_flow_table *old_next_ft) { - struct mlx5_flow_destination dest; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_rule *iter; int err = 0; @@ -870,7 +1017,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa goto unlock_root; } - tree_init_node(&ft->node, 1, del_flow_table); + tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type, @@ -882,17 +1029,17 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa err = connect_flow_table(root->dev, ft, fs_prio); if (err) goto destroy_ft; - lock_ref_node(&fs_prio->node); + ft->node.active = true; + down_write_ref_node(&fs_prio->node); tree_add_node(&ft->node, &fs_prio->node); list_add_flow_table(ft, fs_prio); fs_prio->num_ft++; - unlock_ref_node(&fs_prio->node); + up_write_ref_node(&fs_prio->node); mutex_unlock(&root->chain_lock); return ft; destroy_ft: mlx5_cmd_destroy_flow_table(root->dev, ft); free_ft: - ida_destroy(&ft->fte_allocator); kfree(ft); unlock_root: mutex_unlock(&root->chain_lock); @@ -960,54 +1107,6 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, } EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table); -/* Flow table should be locked */ -static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft, - u32 *fg_in, - struct list_head - *prev_fg, - bool is_auto_fg) -{ - struct mlx5_flow_group *fg; - struct mlx5_core_dev *dev = get_dev(&ft->node); - int err; - - if (!dev) - return ERR_PTR(-ENODEV); - - fg = alloc_flow_group(fg_in); - if (IS_ERR(fg)) - return fg; - - err = rhltable_insert(&ft->fgs_hash, &fg->hash, rhash_fg); - if (err) - goto err_free_fg; - - err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); - if (err) - goto err_remove_fg; - - if (ft->autogroup.active) - ft->autogroup.num_groups++; - /* Add node to tree */ - tree_init_node(&fg->node, !is_auto_fg, del_flow_group); - tree_add_node(&fg->node, &ft->node); - /* Add node to group list */ - list_add(&fg->node.list, prev_fg); - - trace_mlx5_fs_add_fg(fg); - return fg; - -err_remove_fg: - WARN_ON(rhltable_remove(&ft->fgs_hash, - &fg->hash, - rhash_fg)); -err_free_fg: - rhashtable_destroy(&fg->ftes_hash); - kfree(fg); - - return ERR_PTR(err); -} - struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *fg_in) { @@ -1016,7 +1115,13 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u8 match_criteria_enable = MLX5_GET(create_flow_group_in, fg_in, match_criteria_enable); + int start_index = MLX5_GET(create_flow_group_in, fg_in, + start_flow_index); + int end_index = MLX5_GET(create_flow_group_in, fg_in, + end_flow_index); + struct mlx5_core_dev *dev = get_dev(&ft->node); struct mlx5_flow_group *fg; + int err; if (!check_valid_mask(match_criteria_enable, match_criteria)) return ERR_PTR(-EINVAL); @@ -1024,9 +1129,21 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, if (ft->autogroup.active) return ERR_PTR(-EPERM); - lock_ref_node(&ft->node); - fg = create_flow_group_common(ft, fg_in, ft->node.children.prev, false); - unlock_ref_node(&ft->node); + down_write_ref_node(&ft->node); + fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria, + start_index, end_index, + ft->node.children.prev); + up_write_ref_node(&ft->node); + if (IS_ERR(fg)) + return fg; + + err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); + if (err) { + tree_put_node(&fg->node); + return ERR_PTR(err); + } + trace_mlx5_fs_add_fg(fg); + fg->node.active = true; return fg; } @@ -1067,7 +1184,7 @@ static void destroy_flow_handle(struct fs_fte *fte, int i) { for (; --i >= 0;) { - if (atomic_dec_and_test(&handle->rule[i]->node.refcount)) { + if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) { fte->dests_size--; list_del(&handle->rule[i]->node.list); kfree(handle->rule[i]); @@ -1098,7 +1215,7 @@ create_flow_handle(struct fs_fte *fte, if (dest) { rule = find_flow_rule(fte, dest + i); if (rule) { - atomic_inc(&rule->node.refcount); + refcount_inc(&rule->node.refcount); goto rule_found; } } @@ -1111,7 +1228,7 @@ create_flow_handle(struct fs_fte *fte, /* Add dest to dests list- we need flow tables to be in the * end of the list for forward to next prio rules. */ - tree_init_node(&rule->node, 1, del_rule); + tree_init_node(&rule->node, NULL, del_sw_hw_rule); if (dest && dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) list_add(&rule->node.list, &fte->node.children); @@ -1167,7 +1284,9 @@ add_rule_fte(struct fs_fte *fte, if (err) goto free_handle; + fte->node.active = true; fte->status |= FS_FTE_STATUS_EXISTING; + atomic_inc(&fte->node.version); out: return handle; @@ -1177,59 +1296,17 @@ free_handle: return ERR_PTR(err); } -static struct fs_fte *create_fte(struct mlx5_flow_group *fg, - u32 *match_value, - struct mlx5_flow_act *flow_act) -{ - struct mlx5_flow_table *ft; - struct fs_fte *fte; - int index; - int ret; - - fs_get_obj(ft, fg->node.parent); - index = ida_simple_get(&ft->fte_allocator, fg->start_index, - fg->start_index + fg->max_ftes, - GFP_KERNEL); - if (index < 0) - return ERR_PTR(index); - - fte = alloc_fte(flow_act, match_value, index); - if (IS_ERR(fte)) { - ret = PTR_ERR(fte); - goto err_alloc; - } - ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte); - if (ret) - goto err_hash; - - return fte; - -err_hash: - kfree(fte); -err_alloc: - ida_simple_remove(&ft->fte_allocator, index); - return ERR_PTR(ret); -} - -static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria) +static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec) { - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct list_head *prev = &ft->node.children; - unsigned int candidate_index = 0; struct mlx5_flow_group *fg; - void *match_criteria_addr; + unsigned int candidate_index = 0; unsigned int group_size = 0; - u32 *in; if (!ft->autogroup.active) return ERR_PTR(-ENOENT); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return ERR_PTR(-ENOMEM); - if (ft->autogroup.num_groups < ft->autogroup.required_groups) /* We save place for flow groups in addition to max types */ group_size = ft->max_fte / (ft->autogroup.required_groups + 1); @@ -1247,25 +1324,55 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, prev = &fg->node.list; } - if (candidate_index + group_size > ft->max_fte) { - fg = ERR_PTR(-ENOSPC); + if (candidate_index + group_size > ft->max_fte) + return ERR_PTR(-ENOSPC); + + fg = alloc_insert_flow_group(ft, + spec->match_criteria_enable, + spec->match_criteria, + candidate_index, + candidate_index + group_size - 1, + prev); + if (IS_ERR(fg)) goto out; - } + + ft->autogroup.num_groups++; + +out: + return fg; +} + +static int create_auto_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + struct mlx5_core_dev *dev = get_dev(&ft->node); + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *match_criteria_addr; + int err; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; MLX5_SET(create_flow_group_in, in, match_criteria_enable, - match_criteria_enable); - MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index); - MLX5_SET(create_flow_group_in, in, end_flow_index, candidate_index + - group_size - 1); + fg->mask.match_criteria_enable); + MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index); + MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index + + fg->max_ftes - 1); match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - memcpy(match_criteria_addr, match_criteria, - MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(match_criteria_addr, fg->mask.match_criteria, + sizeof(fg->mask.match_criteria)); + + err = mlx5_cmd_create_flow_group(dev, ft, in, &fg->id); + if (!err) { + fg->node.active = true; + trace_mlx5_fs_add_fg(fg); + } - fg = create_flow_group_common(ft, in, prev, true); -out: kvfree(in); - return fg; + return err; } static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, @@ -1340,60 +1447,30 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, struct fs_fte *fte) { struct mlx5_flow_handle *handle; - struct mlx5_flow_table *ft; + int old_action; int i; + int ret; - if (fte) { - int old_action; - int ret; - - nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); - ret = check_conflicting_ftes(fte, flow_act); - if (ret) { - handle = ERR_PTR(ret); - goto unlock_fte; - } - - old_action = fte->action; - fte->action |= flow_act->action; - handle = add_rule_fte(fte, fg, dest, dest_num, - old_action != flow_act->action); - if (IS_ERR(handle)) { - fte->action = old_action; - goto unlock_fte; - } else { - trace_mlx5_fs_set_fte(fte, false); - goto add_rules; - } - } - fs_get_obj(ft, fg->node.parent); + ret = check_conflicting_ftes(fte, flow_act); + if (ret) + return ERR_PTR(ret); - fte = create_fte(fg, match_value, flow_act); - if (IS_ERR(fte)) - return (void *)fte; - tree_init_node(&fte->node, 0, del_fte); - nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); - handle = add_rule_fte(fte, fg, dest, dest_num, false); + old_action = fte->action; + fte->action |= flow_act->action; + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != flow_act->action); if (IS_ERR(handle)) { - unlock_ref_node(&fte->node); - destroy_fte(fte, fg); - kfree(fte); + fte->action = old_action; return handle; } + trace_mlx5_fs_set_fte(fte, false); - tree_add_node(&fte->node, &fg->node); - /* fte list isn't sorted */ - list_add_tail(&fte->node.list, &fg->node.children); - trace_mlx5_fs_set_fte(fte, true); -add_rules: for (i = 0; i < handle->num_rules; i++) { - if (atomic_read(&handle->rule[i]->node.refcount) == 1) { + if (refcount_read(&handle->rule[i]->node.refcount) == 1) { tree_add_node(&handle->rule[i]->node, &fte->node); trace_mlx5_fs_add_rule(handle->rule[i]); } } -unlock_fte: - unlock_ref_node(&fte->node); return handle; } @@ -1441,93 +1518,197 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, return true; } -static struct mlx5_flow_handle * -try_add_to_existing_fg(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, - struct mlx5_flow_act *flow_act, - struct mlx5_flow_destination *dest, - int dest_num) -{ +struct match_list { + struct list_head list; struct mlx5_flow_group *g; - struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT); +}; + +struct match_list_head { + struct list_head list; + struct match_list first; +}; + +static void free_match_list(struct match_list_head *head) +{ + if (!list_empty(&head->list)) { + struct match_list *iter, *match_tmp; + + list_del(&head->first.list); + tree_put_node(&head->first.g->node); + list_for_each_entry_safe(iter, match_tmp, &head->list, + list) { + tree_put_node(&iter->g->node); + list_del(&iter->list); + kfree(iter); + } + } +} + +static int build_match_list(struct match_list_head *match_head, + struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec) +{ struct rhlist_head *tmp, *list; - struct match_list { - struct list_head list; - struct mlx5_flow_group *g; - } match_list, *iter; - LIST_HEAD(match_head); + struct mlx5_flow_group *g; + int err = 0; rcu_read_lock(); + INIT_LIST_HEAD(&match_head->list); /* Collect all fgs which has a matching match_criteria */ list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg); + /* RCU is atomic, we can't execute FW commands here */ rhl_for_each_entry_rcu(g, tmp, list, hash) { struct match_list *curr_match; - if (likely(list_empty(&match_head))) { - match_list.g = g; - list_add_tail(&match_list.list, &match_head); + if (likely(list_empty(&match_head->list))) { + if (!tree_get_node(&g->node)) + continue; + match_head->first.g = g; + list_add_tail(&match_head->first.list, + &match_head->list); continue; } - curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); + curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); if (!curr_match) { - rcu_read_unlock(); - rule = ERR_PTR(-ENOMEM); - goto free_list; + free_match_list(match_head); + err = -ENOMEM; + goto out; + } + if (!tree_get_node(&g->node)) { + kfree(curr_match); + continue; } curr_match->g = g; - list_add_tail(&curr_match->list, &match_head); + list_add_tail(&curr_match->list, &match_head->list); } +out: rcu_read_unlock(); + return err; +} + +static u64 matched_fgs_get_version(struct list_head *match_head) +{ + struct match_list *iter; + u64 version = 0; + + list_for_each_entry(iter, match_head, list) + version += (u64)atomic_read(&iter->g->node.version); + return version; +} +static struct mlx5_flow_handle * +try_add_to_existing_fg(struct mlx5_flow_table *ft, + struct list_head *match_head, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num, + int ft_version) +{ + struct mlx5_flow_steering *steering = get_steering(&ft->node); + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule; + struct match_list *iter; + bool take_write = false; + struct fs_fte *fte; + u64 version; + int err; + + fte = alloc_fte(ft, spec->match_value, flow_act); + if (IS_ERR(fte)) + return ERR_PTR(-ENOMEM); + + list_for_each_entry(iter, match_head, list) { + nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT); + ida_pre_get(&iter->g->fte_allocator, GFP_KERNEL); + } + +search_again_locked: + version = matched_fgs_get_version(match_head); /* Try to find a fg that already contains a matching fte */ - list_for_each_entry(iter, &match_head, list) { - struct fs_fte *fte; + list_for_each_entry(iter, match_head, list) { + struct fs_fte *fte_tmp; g = iter->g; - nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); - fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, - rhash_fte); - if (fte) { - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte); - unlock_ref_node(&g->node); - goto free_list; + fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, + rhash_fte); + if (!fte_tmp || !tree_get_node(&fte_tmp->node)) + continue; + + nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); + if (!take_write) { + list_for_each_entry(iter, match_head, list) + up_read_ref_node(&iter->g->node); + } else { + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); } - unlock_ref_node(&g->node); + + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte_tmp); + up_write_ref_node(&fte_tmp->node); + tree_put_node(&fte_tmp->node); + kmem_cache_free(steering->ftes_cache, fte); + return rule; } /* No group with matching fte found. Try to add a new fte to any * matching fg. */ - list_for_each_entry(iter, &match_head, list) { - g = iter->g; - nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, NULL); - if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) { - unlock_ref_node(&g->node); - goto free_list; - } - unlock_ref_node(&g->node); + if (!take_write) { + list_for_each_entry(iter, match_head, list) + up_read_ref_node(&iter->g->node); + list_for_each_entry(iter, match_head, list) + nested_down_write_ref_node(&iter->g->node, + FS_LOCK_PARENT); + take_write = true; } -free_list: - if (!list_empty(&match_head)) { - struct match_list *match_tmp; + /* Check the ft version, for case that new flow group + * was added while the fgs weren't locked + */ + if (atomic_read(&ft->node.version) != ft_version) { + rule = ERR_PTR(-EAGAIN); + goto out; + } - /* The most common case is having one FG. Since we want to - * optimize this case, we save the first on the stack. - * Therefore, no need to free it. - */ - list_del(&list_first_entry(&match_head, typeof(*iter), list)->list); - list_for_each_entry_safe(iter, match_tmp, &match_head, list) { - list_del(&iter->list); - kfree(iter); + /* Check the fgs version, for case the new FTE with the + * same values was added while the fgs weren't locked + */ + if (version != matched_fgs_get_version(match_head)) + goto search_again_locked; + + list_for_each_entry(iter, match_head, list) { + g = iter->g; + + if (!g->node.active) + continue; + err = insert_fte(g, fte); + if (err) { + if (err == -ENOSPC) + continue; + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); + kmem_cache_free(steering->ftes_cache, fte); + return ERR_PTR(err); } - } + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte); + up_write_ref_node(&fte->node); + tree_put_node(&fte->node); + return rule; + } + rule = ERR_PTR(-ENOENT); +out: + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); + kmem_cache_free(steering->ftes_cache, fte); return rule; } @@ -1539,8 +1720,14 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, int dest_num) { + struct mlx5_flow_steering *steering = get_steering(&ft->node); struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; + struct match_list_head match_head; + bool take_write = false; + struct fs_fte *fte; + int version; + int err; int i; if (!check_valid_spec(spec)) @@ -1550,33 +1737,73 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, if (!dest_is_valid(&dest[i], flow_act->action, ft)) return ERR_PTR(-EINVAL); } + nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT); +search_again_locked: + version = atomic_read(&ft->node.version); + + /* Collect all fgs which has a matching match_criteria */ + err = build_match_list(&match_head, ft, spec); + if (err) + return ERR_PTR(err); - nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); - rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); - if (!IS_ERR(rule)) - goto unlock; + if (!take_write) + up_read_ref_node(&ft->node); - g = create_autogroup(ft, spec->match_criteria_enable, - spec->match_criteria); + rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest, + dest_num, version); + free_match_list(&match_head); + if (!IS_ERR(rule) || + (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) + return rule; + + if (!take_write) { + nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT); + take_write = true; + } + + if (PTR_ERR(rule) == -EAGAIN || + version != atomic_read(&ft->node.version)) + goto search_again_locked; + + g = alloc_auto_flow_group(ft, spec); if (IS_ERR(g)) { rule = (void *)g; - goto unlock; + up_write_ref_node(&ft->node); + return rule; } - rule = add_rule_fg(g, spec->match_value, flow_act, dest, - dest_num, NULL); - if (IS_ERR(rule)) { - /* Remove assumes refcount > 0 and autogroup creates a group - * with a refcount = 0. - */ - unlock_ref_node(&ft->node); - tree_get_node(&g->node); - tree_remove_node(&g->node); - return rule; + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + up_write_ref_node(&ft->node); + + err = create_auto_flow_group(ft, g); + if (err) + goto err_release_fg; + + fte = alloc_fte(ft, spec->match_value, flow_act); + if (IS_ERR(fte)) { + err = PTR_ERR(fte); + goto err_release_fg; } -unlock: - unlock_ref_node(&ft->node); + + err = insert_fte(g, fte); + if (err) { + kmem_cache_free(steering->ftes_cache, fte); + goto err_release_fg; + } + + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + up_write_ref_node(&g->node); + rule = add_rule_fg(g, spec->match_value, flow_act, dest, + dest_num, fte); + up_write_ref_node(&fte->node); + tree_put_node(&fte->node); + tree_put_node(&g->node); return rule; + +err_release_fg: + up_write_ref_node(&g->node); + tree_put_node(&g->node); + return ERR_PTR(err); } static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) @@ -1593,7 +1820,7 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft, int dest_num) { struct mlx5_flow_root_namespace *root = find_root(&ft->node); - struct mlx5_flow_destination gen_dest; + struct mlx5_flow_destination gen_dest = {}; struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_handle *handle = NULL; u32 sw_action = flow_act->action; @@ -1661,23 +1888,43 @@ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft) static int update_root_ft_destroy(struct mlx5_flow_table *ft) { struct mlx5_flow_root_namespace *root = find_root(&ft->node); + struct mlx5_ft_underlay_qp *uqp; struct mlx5_flow_table *new_root_ft = NULL; + int err = 0; + u32 qpn; if (root->root_ft != ft) return 0; new_root_ft = find_next_ft(ft); - if (new_root_ft) { - int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, - root->underlay_qpn); - if (err) { - mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", - ft->id); - return err; + if (!new_root_ft) { + root->root_ft = NULL; + return 0; + } + + if (list_empty(&root->underlay_qpns)) { + /* Don't set any QPN (zero) in case QPN list is empty */ + qpn = 0; + err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, qpn, + false); + } else { + list_for_each_entry(uqp, &root->underlay_qpns, list) { + qpn = uqp->qpn; + err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, + qpn, false); + if (err) + break; } } - root->root_ft = new_root_ft; + + if (err) + mlx5_core_warn(root->dev, + "Update root flow table of id(%u) qpn(%d) failed\n", + ft->id, qpn); + else + root->root_ft = new_root_ft; + return 0; } @@ -1817,7 +2064,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, return ERR_PTR(-ENOMEM); fs_prio->node.type = FS_TYPE_PRIO; - tree_init_node(&fs_prio->node, 1, NULL); + tree_init_node(&fs_prio->node, NULL, NULL); tree_add_node(&fs_prio->node, &ns->node); fs_prio->num_levels = num_levels; fs_prio->prio = prio; @@ -1843,7 +2090,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ERR_PTR(-ENOMEM); fs_init_namespace(ns); - tree_init_node(&ns->node, 1, NULL); + tree_init_node(&ns->node, NULL, NULL); tree_add_node(&ns->node, &prio->node); list_add_tail(&ns->node.list, &prio->node.children); @@ -1965,10 +2212,12 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering root_ns->dev = steering->dev; root_ns->table_type = table_type; + INIT_LIST_HEAD(&root_ns->underlay_qpns); + ns = &root_ns->ns; fs_init_namespace(ns); mutex_init(&root_ns->chain_lock); - tree_init_node(&ns->node, 1, NULL); + tree_init_node(&ns->node, NULL, NULL); tree_add_node(&ns->node, NULL); return root_ns; @@ -2066,8 +2315,10 @@ static void clean_tree(struct fs_node *node) struct fs_node *iter; struct fs_node *temp; + tree_get_node(node); list_for_each_entry_safe(iter, temp, &node->children, list) clean_tree(iter); + tree_put_node(node); tree_remove_node(node); } } @@ -2091,6 +2342,8 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) cleanup_root_ns(steering->sniffer_rx_root_ns); cleanup_root_ns(steering->sniffer_tx_root_ns); mlx5_cleanup_fc_stats(dev); + kmem_cache_destroy(steering->ftes_cache); + kmem_cache_destroy(steering->fgs_cache); kfree(steering); } @@ -2196,6 +2449,16 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) steering->dev = dev; dev->priv.steering = steering; + steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", + sizeof(struct mlx5_flow_group), 0, + 0, NULL); + steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, + 0, NULL); + if (!steering->ftes_cache || !steering->fgs_cache) { + err = -ENOMEM; + goto err; + } + if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && (MLX5_CAP_GEN(dev, nic_flow_table))) || ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && @@ -2245,17 +2508,76 @@ err: int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) { struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + struct mlx5_ft_underlay_qp *new_uqp; + int err = 0; + + new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL); + if (!new_uqp) + return -ENOMEM; + + mutex_lock(&root->chain_lock); + + if (!root->root_ft) { + err = -EINVAL; + goto update_ft_fail; + } + + err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, false); + if (err) { + mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n", + underlay_qpn, err); + goto update_ft_fail; + } + + new_uqp->qpn = underlay_qpn; + list_add_tail(&new_uqp->list, &root->underlay_qpns); + + mutex_unlock(&root->chain_lock); - root->underlay_qpn = underlay_qpn; return 0; + +update_ft_fail: + mutex_unlock(&root->chain_lock); + kfree(new_uqp); + return err; } EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn); int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) { struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + struct mlx5_ft_underlay_qp *uqp; + bool found = false; + int err = 0; + + mutex_lock(&root->chain_lock); + list_for_each_entry(uqp, &root->underlay_qpns, list) { + if (uqp->qpn == underlay_qpn) { + found = true; + break; + } + } + + if (!found) { + mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n", + underlay_qpn); + err = -EINVAL; + goto out; + } + + err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, true); + if (err) + mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n", + underlay_qpn, err); + + list_del(&uqp->list); + mutex_unlock(&root->chain_lock); + kfree(uqp); - root->underlay_qpn = 0; return 0; + +out: + mutex_unlock(&root->chain_lock); + return err; } EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 48dd78975062..397d24a621a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -33,6 +33,7 @@ #ifndef _MLX5_FS_CORE_ #define _MLX5_FS_CORE_ +#include <linux/refcount.h> #include <linux/mlx5/fs.h> #include <linux/rhashtable.h> @@ -66,6 +67,8 @@ enum fs_fte_status { struct mlx5_flow_steering { struct mlx5_core_dev *dev; + struct kmem_cache *fgs_cache; + struct kmem_cache *ftes_cache; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; struct mlx5_flow_root_namespace *esw_egress_root_ns; @@ -81,9 +84,12 @@ struct fs_node { struct fs_node *parent; struct fs_node *root; /* lock the node for writing and traversing */ - struct mutex lock; - atomic_t refcount; - void (*remove_func)(struct fs_node *); + struct rw_semaphore lock; + refcount_t refcount; + bool active; + void (*del_hw_func)(struct fs_node *); + void (*del_sw_func)(struct fs_node *); + atomic_t version; }; struct mlx5_flow_rule { @@ -120,7 +126,6 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; - struct ida fte_allocator; struct rhltable fgs_hash; }; @@ -147,6 +152,11 @@ struct mlx5_fc { struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; }; +struct mlx5_ft_underlay_qp { + struct list_head list; + u32 qpn; +}; + #define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_600 /* Calculate the fte_match_param length and without the reserved length. * Make sure the reserved field is the last. @@ -200,6 +210,7 @@ struct mlx5_flow_group { struct mlx5_flow_group_mask mask; u32 start_index; u32 max_ftes; + struct ida fte_allocator; u32 id; struct rhashtable ftes_hash; struct rhlist_head hash; @@ -212,7 +223,7 @@ struct mlx5_flow_root_namespace { struct mlx5_flow_table *root_ft; /* Should be held when chaining flow tables */ struct mutex chain_lock; - u32 underlay_qpn; + struct list_head underlay_qpns; }; int mlx5_init_fc_stats(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 2c71557d1cee..5ef1b56b6a96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -106,6 +106,13 @@ static int mlx5_get_mcam_reg(struct mlx5_core_dev *dev) MLX5_MCAM_REGS_FIRST_128); } +static int mlx5_get_qcam_reg(struct mlx5_core_dev *dev) +{ + return mlx5_query_qcam_reg(dev, dev->caps.qcam, + MLX5_QCAM_FEATURE_ENHANCED_FEATURES, + MLX5_QCAM_REGS_FIRST_128); +} + int mlx5_query_hca_caps(struct mlx5_core_dev *dev) { int err; @@ -182,6 +189,9 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, mcam_reg)) mlx5_get_mcam_reg(dev); + if (MLX5_CAP_GEN(dev, qcam_reg)) + mlx5_get_qcam_reg(dev); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index db86e1506c8b..1a0e797ad001 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -285,9 +285,9 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev) spin_unlock_irqrestore(&health->wq_lock, flags); } -static void poll_health(unsigned long data) +static void poll_health(struct timer_list *t) { - struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; + struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); struct mlx5_core_health *health = &dev->priv.health; u32 count; @@ -320,15 +320,13 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - init_timer(&health->timer); + timer_setup(&health->timer, poll_health, 0); health->sick = 0; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; - health->timer.data = (unsigned long)dev; - health->timer.function = poll_health; health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL); add_timer(&health->timer); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 43c126c63955..6f338a9219c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -250,3 +250,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_link_ksettings = mlx5i_get_link_ksettings, .get_link = ethtool_op_get_link, }; + +const struct ethtool_ops mlx5i_pkey_ethtool_ops = { + .get_drvinfo = mlx5i_get_drvinfo, + .get_link = ethtool_op_get_link, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 145e392ab849..d2a66dc4adc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -40,8 +40,6 @@ static int mlx5i_open(struct net_device *netdev); static int mlx5i_close(struct net_device *netdev); -static int mlx5i_dev_init(struct net_device *dev); -static void mlx5i_dev_cleanup(struct net_device *dev); static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu); static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); @@ -70,10 +68,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, } /* Called directly after IPoIB netdevice was created to initialize SW structs */ -static void mlx5i_init(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +void mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); @@ -108,11 +106,69 @@ static void mlx5i_cleanup(struct mlx5e_priv *priv) /* Do nothing .. */ } +int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_qp *qp = &ipriv->qp; + struct mlx5_qp_context *context; + int ret; + + /* QP states */ + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + context->pri_path.port = 1; + context->pri_path.pkey_index = cpu_to_be16(ipriv->pkey_index); + context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret); + goto err_qp_modify_to_err; + } + memset(context, 0, sizeof(*context)); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret); + goto err_qp_modify_to_err; + } + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret); + goto err_qp_modify_to_err; + } + + kfree(context); + return 0; + +err_qp_modify_to_err: + mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, &context, qp); + kfree(context); + return ret; +} + +void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_qp_context context; + int err; + + err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context, + &ipriv->qp); + if (err) + mlx5_core_err(mdev, "Failed to modify qp 2RST, err: %d\n", err); +} + #define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2 -static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) { - struct mlx5_qp_context *context = NULL; u32 *in = NULL; void *addr_path; int ret = 0; @@ -140,43 +196,12 @@ static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core goto out; } - /* QP states */ - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) { - ret = -ENOMEM; - goto out; - } - - context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); - context->pri_path.port = 1; - context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY); - - ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp); - if (ret) { - mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret); - goto out; - } - memset(context, 0, sizeof(*context)); - - ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp); - if (ret) { - mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret); - goto out; - } - - ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp); - if (ret) { - mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret); - goto out; - } - out: - kfree(context); kvfree(in); return ret; } -static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) { mlx5_core_destroy_qp(mdev, qp); } @@ -195,10 +220,14 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv) err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); if (err) { mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); - return err; + goto err_destroy_underlay_qp; } return 0; + +err_destroy_underlay_qp: + mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); + return err; } static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) @@ -226,15 +255,24 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } + err = mlx5e_create_inner_ttc_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); - goto err_destroy_arfs_tables; + goto err_destroy_inner_ttc_table; } return 0; +err_destroy_inner_ttc_table: + mlx5e_destroy_inner_ttc_table(priv); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -244,12 +282,12 @@ err_destroy_arfs_tables: static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) { mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); } static int mlx5i_init_rx(struct mlx5e_priv *priv) { - struct mlx5i_priv *ipriv = priv->ppriv; int err; err = mlx5e_create_indirect_rqt(priv); @@ -268,18 +306,12 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_indirect_tirs; - err = mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); - if (err) - goto err_destroy_direct_tirs; - err = mlx5i_create_flow_steering(priv); if (err) - goto err_remove_rx_underlay_qpn; + goto err_destroy_direct_tirs; return 0; -err_remove_rx_underlay_qpn: - mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_indirect_tirs: @@ -293,9 +325,6 @@ err_destroy_indirect_rqts: static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { - struct mlx5i_priv *ipriv = priv->ppriv; - - mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); mlx5i_destroy_flow_steering(priv); mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_indirect_tirs(priv); @@ -351,7 +380,7 @@ out: return err; } -static int mlx5i_dev_init(struct net_device *dev) +int mlx5i_dev_init(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); struct mlx5i_priv *ipriv = priv->ppriv; @@ -361,6 +390,9 @@ static int mlx5i_dev_init(struct net_device *dev) dev->dev_addr[2] = (ipriv->qp.qpn >> 8) & 0xff; dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff; + /* Add QPN to net-device mapping to HT */ + mlx5i_pkey_add_qpn(dev ,ipriv->qp.qpn); + return 0; } @@ -378,63 +410,84 @@ static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } } -static void mlx5i_dev_cleanup(struct net_device *dev) +void mlx5i_dev_cleanup(struct net_device *dev) { struct mlx5e_priv *priv = mlx5i_epriv(dev); - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5i_priv *ipriv = priv->ppriv; - struct mlx5_qp_context context; + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5i_uninit_underlay_qp(priv); - /* detach qp from flow-steering by reset it */ - mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context, &ipriv->qp); + /* Delete QPN to net-device mapping from HT */ + mlx5i_pkey_del_qpn(dev, ipriv->qp.qpn); } static int mlx5i_open(struct net_device *netdev) { - struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; int err; - mutex_lock(&priv->state_lock); + mutex_lock(&epriv->state_lock); - set_bit(MLX5E_STATE_OPENED, &priv->state); + set_bit(MLX5E_STATE_OPENED, &epriv->state); - err = mlx5e_open_channels(priv, &priv->channels); - if (err) + err = mlx5i_init_underlay_qp(epriv); + if (err) { + mlx5_core_warn(mdev, "prepare underlay qp state failed, %d\n", err); goto err_clear_state_opened_flag; + } + + err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn); + if (err) { + mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err); + goto err_reset_qp; + } - mlx5e_refresh_tirs(priv, false); - mlx5e_activate_priv_channels(priv); - mlx5e_timestamp_init(priv); + err = mlx5e_open_channels(epriv, &epriv->channels); + if (err) + goto err_remove_fs_underlay_qp; - mutex_unlock(&priv->state_lock); + mlx5e_refresh_tirs(epriv, false); + mlx5e_activate_priv_channels(epriv); + mlx5e_timestamp_set(epriv); + + mutex_unlock(&epriv->state_lock); return 0; +err_remove_fs_underlay_qp: + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); +err_reset_qp: + mlx5i_uninit_underlay_qp(epriv); err_clear_state_opened_flag: - clear_bit(MLX5E_STATE_OPENED, &priv->state); - mutex_unlock(&priv->state_lock); + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + mutex_unlock(&epriv->state_lock); return err; } static int mlx5i_close(struct net_device *netdev) { - struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; /* May already be CLOSED in case a previous configuration operation * (e.g RX/TX queue size change) that involves close&open failed. */ - mutex_lock(&priv->state_lock); + mutex_lock(&epriv->state_lock); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (!test_bit(MLX5E_STATE_OPENED, &epriv->state)) goto unlock; - clear_bit(MLX5E_STATE_OPENED, &priv->state); + clear_bit(MLX5E_STATE_OPENED, &epriv->state); - mlx5e_timestamp_cleanup(priv); - netif_carrier_off(priv->netdev); - mlx5e_deactivate_priv_channels(priv); - mlx5e_close_channels(&priv->channels); + netif_carrier_off(epriv->netdev); + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); + mlx5i_uninit_underlay_qp(epriv); + mlx5e_deactivate_priv_channels(epriv); + mlx5e_close_channels(&epriv->channels);; unlock: - mutex_unlock(&priv->state_lock); + mutex_unlock(&epriv->state_lock); return 0; } @@ -492,6 +545,13 @@ static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey); } +static void mlx5i_set_pkey_index(struct net_device *netdev, int id) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + + ipriv->pkey_index = (u16)id; +} + static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) { if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) @@ -510,12 +570,13 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, const char *name, void (*setup)(struct net_device *)) { - const struct mlx5e_profile *profile = &mlx5i_nic_profile; - int nch = profile->max_nch(mdev); + const struct mlx5e_profile *profile; struct net_device *netdev; struct mlx5i_priv *ipriv; struct mlx5e_priv *epriv; struct rdma_netdev *rn; + bool sub_interface; + int nch; int err; if (mlx5i_check_required_hca_cap(mdev)) { @@ -523,10 +584,15 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, return ERR_PTR(-EOPNOTSUPP); } - /* This function should only be called once per mdev */ - err = mlx5e_create_mdev_resources(mdev); - if (err) - return NULL; + /* TODO: Need to find a better way to check if child device*/ + sub_interface = (mdev->mlx5e_res.pdn != 0); + + if (sub_interface) + profile = mlx5i_pkey_get_profile(); + else + profile = &mlx5i_nic_profile; + + nch = profile->max_nch(mdev); netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv), name, NET_NAME_UNKNOWN, @@ -535,7 +601,7 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, nch); if (!netdev) { mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n"); - goto free_mdev_resources; + return NULL; } ipriv = netdev_priv(netdev); @@ -545,6 +611,20 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, if (!epriv->wq) goto err_free_netdev; + ipriv->sub_interface = sub_interface; + if (!ipriv->sub_interface) { + err = mlx5i_pkey_qpn_ht_init(netdev); + if (err) { + mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n"); + goto destroy_wq; + } + + /* This should only be called once per mdev */ + err = mlx5e_create_mdev_resources(mdev); + if (err) + goto destroy_ht; + } + profile->init(mdev, netdev, profile, ipriv); mlx5e_attach_netdev(epriv); @@ -556,13 +636,16 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, rn->send = mlx5i_xmit; rn->attach_mcast = mlx5i_attach_mcast; rn->detach_mcast = mlx5i_detach_mcast; + rn->set_id = mlx5i_set_pkey_index; return netdev; +destroy_ht: + mlx5i_pkey_qpn_ht_cleanup(netdev); +destroy_wq: + destroy_workqueue(epriv->wq); err_free_netdev: free_netdev(netdev); -free_mdev_resources: - mlx5e_destroy_mdev_resources(mdev); return NULL; } @@ -570,15 +653,18 @@ EXPORT_SYMBOL(mlx5_rdma_netdev_alloc); void mlx5_rdma_netdev_free(struct net_device *netdev) { - struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = priv->ppriv; const struct mlx5e_profile *profile = priv->profile; - struct mlx5_core_dev *mdev = priv->mdev; mlx5e_detach_netdev(priv); profile->cleanup(priv); destroy_workqueue(priv->wq); - free_netdev(netdev); - mlx5e_destroy_mdev_resources(mdev); + if (!ipriv->sub_interface) { + mlx5i_pkey_qpn_ht_cleanup(netdev); + mlx5e_destroy_mdev_resources(priv->mdev); + } + free_netdev(netdev); } EXPORT_SYMBOL(mlx5_rdma_netdev_free); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index a0f405f520f7..49008022c306 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -39,6 +39,7 @@ #define MLX5I_MAX_NUM_TC 1 extern const struct ethtool_ops mlx5i_ethtool_ops; +extern const struct ethtool_ops mlx5i_pkey_ethtool_ops; #define MLX5_IB_GRH_BYTES 40 #define MLX5_IPOIB_ENCAP_LEN 4 @@ -49,10 +50,45 @@ extern const struct ethtool_ops mlx5i_ethtool_ops; struct mlx5i_priv { struct rdma_netdev rn; /* keep this first */ struct mlx5_core_qp qp; + bool sub_interface; u32 qkey; + u16 pkey_index; + struct mlx5i_pkey_qpn_ht *qpn_htbl; char *mlx5e_priv[0]; }; +/* Underlay QP create/destroy functions */ +int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); +void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); + +/* Underlay QP state modification init/uninit functions */ +int mlx5i_init_underlay_qp(struct mlx5e_priv *priv); +void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv); + +/* Allocate/Free underlay QPN to net-device hash table */ +int mlx5i_pkey_qpn_ht_init(struct net_device *netdev); +void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev); + +/* Add/Remove an underlay QPN to net-device mapping to/from the hash table */ +int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn); +int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn); + +/* Get the net-device corresponding to the given underlay QPN */ +struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn); + +/* Shared ndo functionts */ +int mlx5i_dev_init(struct net_device *dev); +void mlx5i_dev_cleanup(struct net_device *dev); + +/* Parent profile functions */ +void mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv); + +/* Get child interface nic profile */ +const struct mlx5e_profile *mlx5i_pkey_get_profile(void); + /* Extract mlx5e_priv from IPoIB netdev */ #define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c new file mode 100644 index 000000000000..531b02cc979b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/hash.h> +#include "ipoib.h" + +#define MLX5I_MAX_LOG_PKEY_SUP 7 + +struct qpn_to_netdev { + struct net_device *netdev; + struct hlist_node hlist; + u32 underlay_qpn; +}; + +struct mlx5i_pkey_qpn_ht { + struct hlist_head buckets[1 << MLX5I_MAX_LOG_PKEY_SUP]; + spinlock_t ht_lock; /* Synchronise with NAPI */ +}; + +int mlx5i_pkey_qpn_ht_init(struct net_device *netdev) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct mlx5i_pkey_qpn_ht *qpn_htbl; + + qpn_htbl = kzalloc(sizeof(*qpn_htbl), GFP_KERNEL); + if (!qpn_htbl) + return -ENOMEM; + + ipriv->qpn_htbl = qpn_htbl; + spin_lock_init(&qpn_htbl->ht_lock); + + return 0; +} + +void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + + kfree(ipriv->qpn_htbl); +} + +static struct qpn_to_netdev *mlx5i_find_qpn_to_netdev_node(struct hlist_head *buckets, + u32 qpn) +{ + struct hlist_head *h = &buckets[hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP)]; + struct qpn_to_netdev *node; + + hlist_for_each_entry(node, h, hlist) { + if (node->underlay_qpn == qpn) + return node; + } + + return NULL; +} + +int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl; + u8 key = hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP); + struct qpn_to_netdev *new_node; + + new_node = kzalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; + + new_node->netdev = netdev; + new_node->underlay_qpn = qpn; + spin_lock_bh(&ht->ht_lock); + hlist_add_head(&new_node->hlist, &ht->buckets[key]); + spin_unlock_bh(&ht->ht_lock); + + return 0; +} + +int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl; + struct qpn_to_netdev *node; + + node = mlx5i_find_qpn_to_netdev_node(ht->buckets, qpn); + if (!node) { + mlx5_core_warn(epriv->mdev, "QPN to netdev delete from HT failed\n"); + return -EINVAL; + } + + spin_lock_bh(&ht->ht_lock); + hlist_del_init(&node->hlist); + spin_unlock_bh(&ht->ht_lock); + kfree(node); + + return 0; +} + +struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn) +{ + struct mlx5i_priv *ipriv = netdev_priv(netdev); + struct qpn_to_netdev *node; + + node = mlx5i_find_qpn_to_netdev_node(ipriv->qpn_htbl->buckets, qpn); + if (!node) + return NULL; + + return node->netdev; +} + +static int mlx5i_pkey_open(struct net_device *netdev); +static int mlx5i_pkey_close(struct net_device *netdev); +static int mlx5i_pkey_dev_init(struct net_device *dev); +static void mlx5i_pkey_dev_cleanup(struct net_device *netdev); +static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu); + +static const struct net_device_ops mlx5i_pkey_netdev_ops = { + .ndo_open = mlx5i_pkey_open, + .ndo_stop = mlx5i_pkey_close, + .ndo_init = mlx5i_pkey_dev_init, + .ndo_uninit = mlx5i_pkey_dev_cleanup, + .ndo_change_mtu = mlx5i_pkey_change_mtu, +}; + +/* Child NDOs */ +static int mlx5i_pkey_dev_init(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + int parent_ifindex; + + ipriv = priv->ppriv; + + /* Get QPN to netdevice hash table from parent */ + parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev); + parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex); + if (!parent_dev) { + mlx5_core_warn(priv->mdev, "failed to get parent device\n"); + return -EINVAL; + } + + parent_ipriv = netdev_priv(parent_dev); + ipriv->qpn_htbl = parent_ipriv->qpn_htbl; + dev_put(parent_dev); + + return mlx5i_dev_init(dev); +} + +static void mlx5i_pkey_dev_cleanup(struct net_device *netdev) +{ + return mlx5i_dev_cleanup(netdev); +} + +static int mlx5i_pkey_open(struct net_device *netdev) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = epriv->ppriv; + struct mlx5_core_dev *mdev = epriv->mdev; + int err; + + mutex_lock(&epriv->state_lock); + + set_bit(MLX5E_STATE_OPENED, &epriv->state); + + err = mlx5i_init_underlay_qp(epriv); + if (err) { + mlx5_core_warn(mdev, "prepare child underlay qp state failed, %d\n", err); + goto err_release_lock; + } + + err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn); + if (err) { + mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err); + goto err_unint_underlay_qp; + } + + err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]); + if (err) { + mlx5_core_warn(mdev, "create child tis failed, %d\n", err); + goto err_remove_rx_uderlay_qp; + } + + err = mlx5e_open_channels(epriv, &epriv->channels); + if (err) { + mlx5_core_warn(mdev, "opening child channels failed, %d\n", err); + goto err_clear_state_opened_flag; + } + mlx5e_refresh_tirs(epriv, false); + mlx5e_activate_priv_channels(epriv); + mutex_unlock(&epriv->state_lock); + + return 0; + +err_clear_state_opened_flag: + mlx5e_destroy_tis(mdev, epriv->tisn[0]); +err_remove_rx_uderlay_qp: + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); +err_unint_underlay_qp: + mlx5i_uninit_underlay_qp(epriv); +err_release_lock: + clear_bit(MLX5E_STATE_OPENED, &epriv->state); + mutex_unlock(&epriv->state_lock); + return err; +} + +static int mlx5i_pkey_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + netif_carrier_off(priv->netdev); + mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn); + mlx5i_uninit_underlay_qp(priv); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + mlx5e_destroy_tis(mdev, priv->tisn[0]); +unlock: + mutex_unlock(&priv->state_lock); + return 0; +} + +static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + mutex_lock(&priv->state_lock); + netdev->mtu = new_mtu; + mutex_unlock(&priv->state_lock); + + return 0; +} + +/* Called directly after IPoIB netdevice was created to initialize SW structs */ +static void mlx5i_pkey_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + mlx5i_init(mdev, netdev, profile, ppriv); + + /* Override parent ndo */ + netdev->netdev_ops = &mlx5i_pkey_netdev_ops; + + /* Set child limited ethtool support */ + netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops; + + /* Use dummy rqs */ + priv->channels.params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; +} + +/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ +static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv) +{ + /* Do nothing .. */ +} + +static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp); + if (err) { + mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err); + return err; + } + + return 0; +} + +static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); +} + +static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv) +{ + /* Since the rx resources are shared between child and parent, the + * parent interface is taking care of rx resource allocation and init + */ + return 0; +} + +static void mlx5i_pkey_cleanup_rx(struct mlx5e_priv *priv) +{ + /* Since the rx resources are shared between child and parent, the + * parent interface is taking care of rx resource free and de-init + */ +} + +static const struct mlx5e_profile mlx5i_pkey_nic_profile = { + .init = mlx5i_pkey_init, + .cleanup = mlx5i_pkey_cleanup, + .init_tx = mlx5i_pkey_init_tx, + .cleanup_tx = mlx5i_pkey_cleanup_tx, + .init_rx = mlx5i_pkey_init_rx, + .cleanup_rx = mlx5i_pkey_cleanup_rx, + .enable = NULL, + .disable = NULL, + .update_stats = NULL, + .max_nch = mlx5e_get_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, + .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ + .max_tc = MLX5I_MAX_NUM_TC, +}; + +const struct mlx5e_profile *mlx5i_pkey_get_profile(void) +{ + return &mlx5i_pkey_nic_profile; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c new file mode 100644 index 000000000000..fa8aed62b231 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -0,0 +1,525 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/clocksource.h> +#include "en.h" + +enum { + MLX5_CYCLES_SHIFT = 23 +}; + +enum { + MLX5_PIN_MODE_IN = 0x0, + MLX5_PIN_MODE_OUT = 0x1, +}; + +enum { + MLX5_OUT_PATTERN_PULSE = 0x0, + MLX5_OUT_PATTERN_PERIODIC = 0x1, +}; + +enum { + MLX5_EVENT_MODE_DISABLE = 0x0, + MLX5_EVENT_MODE_REPETETIVE = 0x1, + MLX5_EVENT_MODE_ONCE_TILL_ARM = 0x2, +}; + +enum { + MLX5_MTPPS_FS_ENABLE = BIT(0x0), + MLX5_MTPPS_FS_PATTERN = BIT(0x2), + MLX5_MTPPS_FS_PIN_MODE = BIT(0x3), + MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4), + MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5), + MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), +}; + +static u64 read_internal_timer(const struct cyclecounter *cc) +{ + struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles); + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); + + return mlx5_read_internal_timer(mdev) & cc->mask; +} + +static void mlx5_pps_out(struct work_struct *work) +{ + struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, + out_work); + struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock, + pps_info); + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + unsigned long flags; + int i; + + for (i = 0; i < clock->ptp_info.n_pins; i++) { + u64 tstart; + + write_lock_irqsave(&clock->lock, flags); + tstart = clock->pps_info.start[i]; + clock->pps_info.start[i] = 0; + write_unlock_irqrestore(&clock->lock, flags); + if (!tstart) + continue; + + MLX5_SET(mtpps_reg, in, pin, i); + MLX5_SET64(mtpps_reg, in, time_stamp, tstart); + MLX5_SET(mtpps_reg, in, field_select, MLX5_MTPPS_FS_TIME_STAMP); + mlx5_set_mtpps(mdev, in, sizeof(in)); + } +} + +static void mlx5_timestamp_overflow(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock, + overflow_work); + unsigned long flags; + + write_lock_irqsave(&clock->lock, flags); + timecounter_read(&clock->tc); + write_unlock_irqrestore(&clock->lock, flags); + schedule_delayed_work(&clock->overflow_work, clock->overflow_period); +} + +static int mlx5_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + u64 ns = timespec64_to_ns(ts); + unsigned long flags; + + write_lock_irqsave(&clock->lock, flags); + timecounter_init(&clock->tc, &clock->cycles, ns); + write_unlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + u64 ns; + unsigned long flags; + + write_lock_irqsave(&clock->lock, flags); + ns = timecounter_read(&clock->tc); + write_unlock_irqrestore(&clock->lock, flags); + + *ts = ns_to_timespec64(ns); + + return 0; +} + +static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + unsigned long flags; + + write_lock_irqsave(&clock->lock, flags); + timecounter_adjtime(&clock->tc, delta); + write_unlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) +{ + u64 adj; + u32 diff; + unsigned long flags; + int neg_adj = 0; + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + + if (delta < 0) { + neg_adj = 1; + delta = -delta; + } + + adj = clock->nominal_c_mult; + adj *= delta; + diff = div_u64(adj, 1000000000ULL); + + write_lock_irqsave(&clock->lock, flags); + timecounter_read(&clock->tc); + clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : + clock->nominal_c_mult + diff; + write_unlock_irqrestore(&clock->lock, flags); + + return 0; +} + +static int mlx5_extts_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev = + container_of(clock, struct mlx5_core_dev, clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u32 field_select = 0; + u8 pin_mode = 0; + u8 pattern = 0; + int pin = -1; + int err = 0; + + if (!MLX5_PPS_CAP(mdev)) + return -EOPNOTSUPP; + + if (rq->extts.index >= clock->ptp_info.n_pins) + return -EINVAL; + + if (on) { + pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index); + if (pin < 0) + return -EBUSY; + pin_mode = MLX5_PIN_MODE_IN; + pattern = !!(rq->extts.flags & PTP_FALLING_EDGE); + field_select = MLX5_MTPPS_FS_PIN_MODE | + MLX5_MTPPS_FS_PATTERN | + MLX5_MTPPS_FS_ENABLE; + } else { + pin = rq->extts.index; + field_select = MLX5_MTPPS_FS_ENABLE; + } + + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET(mtpps_reg, in, field_select, field_select); + + err = mlx5_set_mtpps(mdev, in, sizeof(in)); + if (err) + return err; + + return mlx5_set_mtppse(mdev, pin, 0, + MLX5_EVENT_MODE_REPETETIVE & on); +} + +static int mlx5_perout_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev = + container_of(clock, struct mlx5_core_dev, clock); + u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + u64 nsec_now, nsec_delta, time_stamp = 0; + u64 cycles_now, cycles_delta; + struct timespec64 ts; + unsigned long flags; + u32 field_select = 0; + u8 pin_mode = 0; + u8 pattern = 0; + int pin = -1; + int err = 0; + s64 ns; + + if (!MLX5_PPS_CAP(mdev)) + return -EOPNOTSUPP; + + if (rq->perout.index >= clock->ptp_info.n_pins) + return -EINVAL; + + if (on) { + pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, + rq->perout.index); + if (pin < 0) + return -EBUSY; + + pin_mode = MLX5_PIN_MODE_OUT; + pattern = MLX5_OUT_PATTERN_PERIODIC; + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + ns = timespec64_to_ns(&ts); + + if ((ns >> 1) != 500000000LL) + return -EINVAL; + + ts.tv_sec = rq->perout.start.sec; + ts.tv_nsec = rq->perout.start.nsec; + ns = timespec64_to_ns(&ts); + cycles_now = mlx5_read_internal_timer(mdev); + write_lock_irqsave(&clock->lock, flags); + nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); + nsec_delta = ns - nsec_now; + cycles_delta = div64_u64(nsec_delta << clock->cycles.shift, + clock->cycles.mult); + write_unlock_irqrestore(&clock->lock, flags); + time_stamp = cycles_now + cycles_delta; + field_select = MLX5_MTPPS_FS_PIN_MODE | + MLX5_MTPPS_FS_PATTERN | + MLX5_MTPPS_FS_ENABLE | + MLX5_MTPPS_FS_TIME_STAMP; + } else { + pin = rq->perout.index; + field_select = MLX5_MTPPS_FS_ENABLE; + } + + MLX5_SET(mtpps_reg, in, pin, pin); + MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); + MLX5_SET(mtpps_reg, in, pattern, pattern); + MLX5_SET(mtpps_reg, in, enable, on); + MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp); + MLX5_SET(mtpps_reg, in, field_select, field_select); + + err = mlx5_set_mtpps(mdev, in, sizeof(in)); + if (err) + return err; + + return mlx5_set_mtppse(mdev, pin, 0, + MLX5_EVENT_MODE_REPETETIVE & on); +} + +static int mlx5_pps_configure(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + + clock->pps_info.enabled = !!on; + return 0; +} + +static int mlx5_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, + int on) +{ + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + return mlx5_extts_configure(ptp, rq, on); + case PTP_CLK_REQ_PEROUT: + return mlx5_perout_configure(ptp, rq, on); + case PTP_CLK_REQ_PPS: + return mlx5_pps_configure(ptp, rq, on); + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0; +} + +static const struct ptp_clock_info mlx5_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "mlx5_p2p", + .max_adj = 100000000, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .n_pins = 0, + .pps = 0, + .adjfreq = mlx5_ptp_adjfreq, + .adjtime = mlx5_ptp_adjtime, + .gettime64 = mlx5_ptp_gettime, + .settime64 = mlx5_ptp_settime, + .enable = NULL, + .verify = NULL, +}; + +static int mlx5_init_pin_config(struct mlx5_clock *clock) +{ + int i; + + clock->ptp_info.pin_config = + kzalloc(sizeof(*clock->ptp_info.pin_config) * + clock->ptp_info.n_pins, GFP_KERNEL); + if (!clock->ptp_info.pin_config) + return -ENOMEM; + clock->ptp_info.enable = mlx5_ptp_enable; + clock->ptp_info.verify = mlx5_ptp_verify; + clock->ptp_info.pps = 1; + + for (i = 0; i < clock->ptp_info.n_pins; i++) { + snprintf(clock->ptp_info.pin_config[i].name, + sizeof(clock->ptp_info.pin_config[i].name), + "mlx5_pps%d", i); + clock->ptp_info.pin_config[i].index = i; + clock->ptp_info.pin_config[i].func = PTP_PF_NONE; + clock->ptp_info.pin_config[i].chan = i; + } + + return 0; +} + +static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + + mlx5_query_mtpps(mdev, out, sizeof(out)); + + clock->ptp_info.n_pins = MLX5_GET(mtpps_reg, out, + cap_number_of_pps_pins); + clock->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_in_pins); + clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out, + cap_max_num_of_pps_out_pins); + + clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode); + clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode); + clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode); + clock->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode); + clock->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode); + clock->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode); + clock->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode); + clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode); +} + +void mlx5_pps_event(struct mlx5_core_dev *mdev, + struct mlx5_eqe *eqe) +{ + struct mlx5_clock *clock = &mdev->clock; + struct ptp_clock_event ptp_event; + struct timespec64 ts; + u64 nsec_now, nsec_delta; + u64 cycles_now, cycles_delta; + int pin = eqe->data.pps.pin; + s64 ns; + unsigned long flags; + + switch (clock->ptp_info.pin_config[pin].func) { + case PTP_PF_EXTTS: + if (clock->pps_info.enabled) { + ptp_event.type = PTP_CLOCK_PPSUSR; + ptp_event.pps_times.ts_real = ns_to_timespec64(eqe->data.pps.time_stamp); + } else { + ptp_event.type = PTP_CLOCK_EXTTS; + } + ptp_clock_event(clock->ptp, &ptp_event); + break; + case PTP_PF_PEROUT: + mlx5_ptp_gettime(&clock->ptp_info, &ts); + cycles_now = mlx5_read_internal_timer(mdev); + ts.tv_sec += 1; + ts.tv_nsec = 0; + ns = timespec64_to_ns(&ts); + write_lock_irqsave(&clock->lock, flags); + nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); + nsec_delta = ns - nsec_now; + cycles_delta = div64_u64(nsec_delta << clock->cycles.shift, + clock->cycles.mult); + clock->pps_info.start[pin] = cycles_now + cycles_delta; + schedule_work(&clock->pps_info.out_work); + write_unlock_irqrestore(&clock->lock, flags); + break; + default: + mlx5_core_err(mdev, " Unhandled event\n"); + } +} + +void mlx5_init_clock(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + u64 ns; + u64 frac = 0; + u32 dev_freq; + + dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz); + if (!dev_freq) { + mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); + return; + } + rwlock_init(&clock->lock); + clock->cycles.read = read_internal_timer; + clock->cycles.shift = MLX5_CYCLES_SHIFT; + clock->cycles.mult = clocksource_khz2mult(dev_freq, + clock->cycles.shift); + clock->nominal_c_mult = clock->cycles.mult; + clock->cycles.mask = CLOCKSOURCE_MASK(41); + + timecounter_init(&clock->tc, &clock->cycles, + ktime_to_ns(ktime_get_real())); + + /* Calculate period in seconds to call the overflow watchdog - to make + * sure counter is checked at least once every wrap around. + */ + ns = cyclecounter_cyc2ns(&clock->cycles, clock->cycles.mask, + frac, &frac); + do_div(ns, NSEC_PER_SEC / 2 / HZ); + clock->overflow_period = ns; + + INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); + INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow); + if (clock->overflow_period) + schedule_delayed_work(&clock->overflow_work, 0); + else + mlx5_core_warn(mdev, "invalid overflow period, overflow_work is not scheduled\n"); + + /* Configure the PHC */ + clock->ptp_info = mlx5_ptp_clock_info; + + /* Initialize 1PPS data structures */ + if (MLX5_PPS_CAP(mdev)) + mlx5_get_pps_caps(mdev); + if (clock->ptp_info.n_pins) + mlx5_init_pin_config(clock); + + clock->ptp = ptp_clock_register(&clock->ptp_info, + &mdev->pdev->dev); + if (IS_ERR(clock->ptp)) { + mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n", + PTR_ERR(clock->ptp)); + clock->ptp = NULL; + } +} + +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = &mdev->clock; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + if (clock->ptp) { + ptp_clock_unregister(clock->ptp); + clock->ptp = NULL; + } + + cancel_work_sync(&clock->pps_info.out_work); + cancel_delayed_work_sync(&clock->overflow_work); + kfree(clock->ptp_info.pin_config); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h new file mode 100644 index 000000000000..a8eecedd46c2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIB_CLOCK_H__ +#define __LIB_CLOCK_H__ + +void mlx5_init_clock(struct mlx5_core_dev *mdev); +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); + +static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, + u64 timestamp) +{ + u64 nsec; + + read_lock(&clock->lock); + nsec = timecounter_cyc2time(&clock->tc, timestamp); + read_unlock(&clock->lock); + + return ns_to_ktime(nsec); +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 06562c9a6b9c..5f323442cc5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -59,6 +59,7 @@ #include "lib/mlx5.h" #include "fpga/core.h" #include "accel/ipsec.h" +#include "lib/clock.h" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); @@ -889,6 +890,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_reserved_gids(dev); + mlx5_init_clock(dev); + err = mlx5_init_rl_table(dev); if (err) { dev_err(&pdev->dev, "Failed to init rate limiting\n"); @@ -949,6 +952,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_eswitch_cleanup(dev->priv.eswitch); mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); + mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b7c2900b75f9..ff4a0b889a6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -93,6 +93,7 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_core_page_fault(struct mlx5_core_dev *dev, struct mlx5_pagefault *pfault); +void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); void mlx5_disable_device(struct mlx5_core_dev *dev); @@ -121,6 +122,8 @@ int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, u8 access_reg_group); int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcap, u8 feature_group, u8 access_reg_group); +int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, + u8 feature_group, u8 access_reg_group); void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index e07061f565d6..c37d00cd472a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -98,6 +98,18 @@ int mlx5_query_mcam_reg(struct mlx5_core_dev *dev, u32 *mcam, u8 feature_group, return mlx5_core_access_reg(dev, in, sz, mcam, sz, MLX5_REG_MCAM, 0, 0); } +int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, + u8 feature_group, u8 access_reg_group) +{ + u32 in[MLX5_ST_SZ_DW(qcam_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(qcam_reg); + + MLX5_SET(qcam_reg, in, feature_group, feature_group); + MLX5_SET(qcam_reg, in, access_reg_group, access_reg_group); + + return mlx5_core_access_reg(mdev, in, sz, qcam, sz, MLX5_REG_QCAM, 0, 0); +} + struct mlx5_reg_pcap { u8 rsvd0; u8 port_num; @@ -959,3 +971,102 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode) return mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), MLX5_REG_MTPPSE, 0, 1); } + +int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state) +{ + u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {}; + int err; + + MLX5_SET(qpts_reg, in, local_port, 1); + MLX5_SET(qpts_reg, in, trust_state, trust_state); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QPTS, 0, 1); + return err; +} + +int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state) +{ + u32 out[MLX5_ST_SZ_DW(qpts_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(qpts_reg)] = {}; + int err; + + MLX5_SET(qpts_reg, in, local_port, 1); + + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, + sizeof(out), MLX5_REG_QPTS, 0, 0); + if (!err) + *trust_state = MLX5_GET(qpts_reg, out, trust_state); + + return err; +} + +int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio) +{ + int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); + void *qpdpm_dscp; + void *out; + void *in; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpdpm_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0); + if (err) + goto out; + + memcpy(in, out, sz); + MLX5_SET(qpdpm_reg, in, local_port, 1); + + /* Update the corresponding dscp entry */ + qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, in, dscp[dscp]); + MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, prio, prio); + MLX5_SET16(qpdpm_dscp_reg, qpdpm_dscp, e, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 1); + +out: + kfree(in); + kfree(out); + return err; +} + +/* dscp2prio[i]: priority that dscp i mapped to */ +#define MLX5E_SUPPORTED_DSCP 64 +int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio) +{ + int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); + void *qpdpm_dscp; + void *out; + void *in; + int err; + int i; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(qpdpm_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_QPDPM, 0, 0); + if (err) + goto out; + + for (i = 0; i < (MLX5E_SUPPORTED_DSCP); i++) { + qpdpm_dscp = MLX5_ADDR_OF(qpdpm_reg, out, dscp[i]); + dscp2prio[i] = MLX5_GET16(qpdpm_dscp_reg, qpdpm_dscp, prio); + } + +out: + kfree(in); + kfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 9a5a1cc877b6..9463c3fa254f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -18,7 +18,9 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_kvdl.o spectrum_acl_tcam.o \ spectrum_acl.o spectrum_flower.o \ spectrum_cnt.o spectrum_fid.o \ - spectrum_ipip.o + spectrum_ipip.o spectrum_acl_flex_actions.o \ + spectrum_mr.o spectrum_mr_tcam.o \ + spectrum_qdisc.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 5ae110172c22..6a979a09ab72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -399,23 +399,25 @@ u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block) } EXPORT_SYMBOL(mlxsw_afa_block_first_set_kvdl_index); -void mlxsw_afa_block_continue(struct mlxsw_afa_block *block) +int mlxsw_afa_block_continue(struct mlxsw_afa_block *block) { - if (WARN_ON(block->finished)) - return; + if (block->finished) + return -EINVAL; mlxsw_afa_set_goto_set(block->cur_set, MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE, 0); block->finished = true; + return 0; } EXPORT_SYMBOL(mlxsw_afa_block_continue); -void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id) +int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id) { - if (WARN_ON(block->finished)) - return; + if (block->finished) + return -EINVAL; mlxsw_afa_set_goto_set(block->cur_set, MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP, group_id); block->finished = true; + return 0; } EXPORT_SYMBOL(mlxsw_afa_block_jump); @@ -674,6 +676,7 @@ enum mlxsw_afa_trapdisc_trap_action { MLXSW_ITEM32(afa, trapdisc, trap_action, 0x00, 24, 4); enum mlxsw_afa_trapdisc_forward_action { + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD = 1, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD = 3, }; @@ -712,7 +715,7 @@ int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block) } EXPORT_SYMBOL(mlxsw_afa_block_append_drop); -int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block) +int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id) { char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAPDISC_CODE, @@ -722,11 +725,27 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block) return -ENOBUFS; mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, - MLXSW_TRAP_ID_ACL0); + trap_id); return 0; } EXPORT_SYMBOL(mlxsw_afa_block_append_trap); +int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, + u16 trap_id) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_TRAPDISC_CODE, + MLXSW_AFA_TRAPDISC_SIZE); + + if (!act) + return -ENOBUFS; + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, + trap_id); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_trap_and_forward); + /* Forwarding Action * ----------------- * Forwarding Action can be used to implement Policy Based Switching (PBS) @@ -891,3 +910,74 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid) return 0; } EXPORT_SYMBOL(mlxsw_afa_block_append_fid_set); + +/* MC Routing Action + * ----------------- + * The Multicast router action. Can be used by RMFT_V2 - Router Multicast + * Forwarding Table Version 2 Register. + */ + +#define MLXSW_AFA_MCROUTER_CODE 0x10 +#define MLXSW_AFA_MCROUTER_SIZE 2 + +enum mlxsw_afa_mcrouter_rpf_action { + MLXSW_AFA_MCROUTER_RPF_ACTION_NOP, + MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP, + MLXSW_AFA_MCROUTER_RPF_ACTION_DISCARD_ERROR, +}; + +/* afa_mcrouter_rpf_action */ +MLXSW_ITEM32(afa, mcrouter, rpf_action, 0x00, 28, 3); + +/* afa_mcrouter_expected_irif */ +MLXSW_ITEM32(afa, mcrouter, expected_irif, 0x00, 0, 16); + +/* afa_mcrouter_min_mtu */ +MLXSW_ITEM32(afa, mcrouter, min_mtu, 0x08, 0, 16); + +enum mlxsw_afa_mrouter_vrmid { + MLXSW_AFA_MCROUTER_VRMID_INVALID, + MLXSW_AFA_MCROUTER_VRMID_VALID +}; + +/* afa_mcrouter_vrmid + * Valid RMID: rigr_rmid_index is used as RMID + */ +MLXSW_ITEM32(afa, mcrouter, vrmid, 0x0C, 31, 1); + +/* afa_mcrouter_rigr_rmid_index + * When the vrmid field is set to invalid, the field is used as pointer to + * Router Interface Group (RIGR) Table in the KVD linear. + * When the vrmid is set to valid, the field is used as RMID index, ranged + * from 0 to max_mid - 1. The index is to the Port Group Table. + */ +MLXSW_ITEM32(afa, mcrouter, rigr_rmid_index, 0x0C, 0, 24); + +static inline void +mlxsw_afa_mcrouter_pack(char *payload, + enum mlxsw_afa_mcrouter_rpf_action rpf_action, + u16 expected_irif, u16 min_mtu, + enum mlxsw_afa_mrouter_vrmid vrmid, u32 rigr_rmid_index) + +{ + mlxsw_afa_mcrouter_rpf_action_set(payload, rpf_action); + mlxsw_afa_mcrouter_expected_irif_set(payload, expected_irif); + mlxsw_afa_mcrouter_min_mtu_set(payload, min_mtu); + mlxsw_afa_mcrouter_vrmid_set(payload, vrmid); + mlxsw_afa_mcrouter_rigr_rmid_index_set(payload, rigr_rmid_index); +} + +int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, + u16 expected_irif, u16 min_mtu, + bool rmid_valid, u32 kvdl_index) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_MCROUTER_CODE, + MLXSW_AFA_MCROUTER_SIZE); + if (!act) + return -ENOBUFS; + mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP, + expected_irif, min_mtu, rmid_valid, kvdl_index); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_mcrouter); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index f99c341b2497..a8d3314c3a24 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -57,10 +57,12 @@ void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block); int mlxsw_afa_block_commit(struct mlxsw_afa_block *block); char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block); u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block); -void mlxsw_afa_block_continue(struct mlxsw_afa_block *block); -void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); +int mlxsw_afa_block_continue(struct mlxsw_afa_block *block); +int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); -int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block); +int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); +int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, + u16 trap_id); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port); int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, @@ -68,5 +70,8 @@ int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, u32 counter_index); int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid); +int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, + u16 expected_irif, u16 min_mtu, + bool rmid_valid, u32 kvdl_index); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 5acfbe5b8b9d..6c4e08b8058a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1758,6 +1758,191 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port, } } +/* CWTP - Congetion WRED ECN TClass Profile + * ---------------------------------------- + * Configures the profiles for queues of egress port and traffic class + */ +#define MLXSW_REG_CWTP_ID 0x2802 +#define MLXSW_REG_CWTP_BASE_LEN 0x28 +#define MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN 0x08 +#define MLXSW_REG_CWTP_LEN 0x40 + +MLXSW_REG_DEFINE(cwtp, MLXSW_REG_CWTP_ID, MLXSW_REG_CWTP_LEN); + +/* reg_cwtp_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32(reg, cwtp, local_port, 0, 16, 8); + +/* reg_cwtp_traffic_class + * Traffic Class to configure + * Access: Index + */ +MLXSW_ITEM32(reg, cwtp, traffic_class, 32, 0, 8); + +/* reg_cwtp_profile_min + * Minimum Average Queue Size of the profile in cells. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, cwtp, profile_min, MLXSW_REG_CWTP_BASE_LEN, + 0, 20, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 0, false); + +/* reg_cwtp_profile_percent + * Percentage of WRED and ECN marking for maximum Average Queue size + * Range is 0 to 100, units of integer percentage + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, cwtp, profile_percent, MLXSW_REG_CWTP_BASE_LEN, + 24, 7, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 4, false); + +/* reg_cwtp_profile_max + * Maximum Average Queue size of the profile in cells + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, cwtp, profile_max, MLXSW_REG_CWTP_BASE_LEN, + 0, 20, MLXSW_REG_CWTP_PROFILE_DATA_REC_LEN, 4, false); + +#define MLXSW_REG_CWTP_MIN_VALUE 64 +#define MLXSW_REG_CWTP_MAX_PROFILE 2 +#define MLXSW_REG_CWTP_DEFAULT_PROFILE 1 + +static inline void mlxsw_reg_cwtp_pack(char *payload, u8 local_port, + u8 traffic_class) +{ + int i; + + MLXSW_REG_ZERO(cwtp, payload); + mlxsw_reg_cwtp_local_port_set(payload, local_port); + mlxsw_reg_cwtp_traffic_class_set(payload, traffic_class); + + for (i = 0; i <= MLXSW_REG_CWTP_MAX_PROFILE; i++) { + mlxsw_reg_cwtp_profile_min_set(payload, i, + MLXSW_REG_CWTP_MIN_VALUE); + mlxsw_reg_cwtp_profile_max_set(payload, i, + MLXSW_REG_CWTP_MIN_VALUE); + } +} + +#define MLXSW_REG_CWTP_PROFILE_TO_INDEX(profile) (profile - 1) + +static inline void +mlxsw_reg_cwtp_profile_pack(char *payload, u8 profile, u32 min, u32 max, + u32 probability) +{ + u8 index = MLXSW_REG_CWTP_PROFILE_TO_INDEX(profile); + + mlxsw_reg_cwtp_profile_min_set(payload, index, min); + mlxsw_reg_cwtp_profile_max_set(payload, index, max); + mlxsw_reg_cwtp_profile_percent_set(payload, index, probability); +} + +/* CWTPM - Congestion WRED ECN TClass and Pool Mapping + * --------------------------------------------------- + * The CWTPM register maps each egress port and traffic class to profile num. + */ +#define MLXSW_REG_CWTPM_ID 0x2803 +#define MLXSW_REG_CWTPM_LEN 0x44 + +MLXSW_REG_DEFINE(cwtpm, MLXSW_REG_CWTPM_ID, MLXSW_REG_CWTPM_LEN); + +/* reg_cwtpm_local_port + * Local port number + * Not supported for CPU port + * Access: Index + */ +MLXSW_ITEM32(reg, cwtpm, local_port, 0, 16, 8); + +/* reg_cwtpm_traffic_class + * Traffic Class to configure + * Access: Index + */ +MLXSW_ITEM32(reg, cwtpm, traffic_class, 32, 0, 8); + +/* reg_cwtpm_ew + * Control enablement of WRED for traffic class: + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ew, 36, 1, 1); + +/* reg_cwtpm_ee + * Control enablement of ECN for traffic class: + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ee, 36, 0, 1); + +/* reg_cwtpm_tcp_g + * TCP Green Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, tcp_g, 52, 0, 2); + +/* reg_cwtpm_tcp_y + * TCP Yellow Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, tcp_y, 56, 16, 2); + +/* reg_cwtpm_tcp_r + * TCP Red Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, tcp_r, 56, 0, 2); + +/* reg_cwtpm_ntcp_g + * Non-TCP Green Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ntcp_g, 60, 0, 2); + +/* reg_cwtpm_ntcp_y + * Non-TCP Yellow Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ntcp_y, 64, 16, 2); + +/* reg_cwtpm_ntcp_r + * Non-TCP Red Profile. + * Index of the profile within {port, traffic class} to use. + * 0 for disabling both WRED and ECN for this type of traffic. + * Access: RW + */ +MLXSW_ITEM32(reg, cwtpm, ntcp_r, 64, 0, 2); + +#define MLXSW_REG_CWTPM_RESET_PROFILE 0 + +static inline void mlxsw_reg_cwtpm_pack(char *payload, u8 local_port, + u8 traffic_class, u8 profile, + bool wred, bool ecn) +{ + MLXSW_REG_ZERO(cwtpm, payload); + mlxsw_reg_cwtpm_local_port_set(payload, local_port); + mlxsw_reg_cwtpm_traffic_class_set(payload, traffic_class); + mlxsw_reg_cwtpm_ew_set(payload, wred); + mlxsw_reg_cwtpm_ee_set(payload, ecn); + mlxsw_reg_cwtpm_tcp_g_set(payload, profile); + mlxsw_reg_cwtpm_tcp_y_set(payload, profile); + mlxsw_reg_cwtpm_tcp_r_set(payload, profile); + mlxsw_reg_cwtpm_ntcp_g_set(payload, profile); + mlxsw_reg_cwtpm_ntcp_y_set(payload, profile); + mlxsw_reg_cwtpm_ntcp_r_set(payload, profile); +} + /* PPBT - Policy-Engine Port Binding Table * --------------------------------------- * This register is used for configuration of the Port Binding Table. @@ -2142,15 +2327,14 @@ MLXSW_REG_DEFINE(pefa, MLXSW_REG_PEFA_ID, MLXSW_REG_PEFA_LEN); */ MLXSW_ITEM32(reg, pefa, index, 0x00, 0, 24); -#define MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN 0xA8 +#define MLXSW_REG_FLEX_ACTION_SET_LEN 0xA8 /* reg_pefa_flex_action_set * Action-set to perform when rule is matched. * Must be zero padded if action set is shorter. * Access: RW */ -MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08, - MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN); +MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08, MLXSW_REG_FLEX_ACTION_SET_LEN); static inline void mlxsw_reg_pefa_pack(char *payload, u32 index, const char *flex_action_set) @@ -2243,7 +2427,7 @@ MLXSW_ITEM_BUF(reg, ptce2, mask, 0x80, * Access: RW */ MLXSW_ITEM_BUF(reg, ptce2, flex_action_set, 0xE0, - MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN); + MLXSW_REG_FLEX_ACTION_SET_LEN); static inline void mlxsw_reg_ptce2_pack(char *payload, bool valid, enum mlxsw_reg_ptce2_op op, @@ -3124,6 +3308,7 @@ static inline void mlxsw_reg_pfcc_pack(char *payload, u8 local_port) */ #define MLXSW_REG_PPCNT_ID 0x5008 #define MLXSW_REG_PPCNT_LEN 0x100 +#define MLXSW_REG_PPCNT_COUNTERS_OFFSET 0x08 MLXSW_REG_DEFINE(ppcnt, MLXSW_REG_PPCNT_ID, MLXSW_REG_PPCNT_LEN); @@ -3156,8 +3341,10 @@ MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2); enum mlxsw_reg_ppcnt_grp { MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0, + MLXSW_REG_PPCNT_EXT_CNT = 0x5, MLXSW_REG_PPCNT_PRIO_CNT = 0x10, MLXSW_REG_PPCNT_TC_CNT = 0x11, + MLXSW_REG_PPCNT_TC_CONG_TC = 0x13, }; /* reg_ppcnt_grp @@ -3173,6 +3360,7 @@ enum mlxsw_reg_ppcnt_grp { * 0x10: Per Priority Counters * 0x11: Per Traffic Class Counters * 0x12: Physical Layer Counters + * 0x13: Per Traffic Class Congestion Counters * Access: Index */ MLXSW_ITEM32(reg, ppcnt, grp, 0x00, 0, 6); @@ -3201,162 +3389,179 @@ MLXSW_ITEM32(reg, ppcnt, prio_tc, 0x04, 0, 5); * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_frames_transmitted_ok, - 0x08 + 0x00, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); /* reg_ppcnt_a_frames_received_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_frames_received_ok, - 0x08 + 0x08, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); /* reg_ppcnt_a_frame_check_sequence_errors * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_frame_check_sequence_errors, - 0x08 + 0x10, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x10, 0, 64); /* reg_ppcnt_a_alignment_errors * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_alignment_errors, - 0x08 + 0x18, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x18, 0, 64); /* reg_ppcnt_a_octets_transmitted_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_octets_transmitted_ok, - 0x08 + 0x20, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64); /* reg_ppcnt_a_octets_received_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_octets_received_ok, - 0x08 + 0x28, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64); /* reg_ppcnt_a_multicast_frames_xmitted_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_multicast_frames_xmitted_ok, - 0x08 + 0x30, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x30, 0, 64); /* reg_ppcnt_a_broadcast_frames_xmitted_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_broadcast_frames_xmitted_ok, - 0x08 + 0x38, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x38, 0, 64); /* reg_ppcnt_a_multicast_frames_received_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_multicast_frames_received_ok, - 0x08 + 0x40, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x40, 0, 64); /* reg_ppcnt_a_broadcast_frames_received_ok * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_broadcast_frames_received_ok, - 0x08 + 0x48, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x48, 0, 64); /* reg_ppcnt_a_in_range_length_errors * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_in_range_length_errors, - 0x08 + 0x50, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64); /* reg_ppcnt_a_out_of_range_length_field * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_out_of_range_length_field, - 0x08 + 0x58, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64); /* reg_ppcnt_a_frame_too_long_errors * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_frame_too_long_errors, - 0x08 + 0x60, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64); /* reg_ppcnt_a_symbol_error_during_carrier * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_symbol_error_during_carrier, - 0x08 + 0x68, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64); /* reg_ppcnt_a_mac_control_frames_transmitted * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_mac_control_frames_transmitted, - 0x08 + 0x70, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64); /* reg_ppcnt_a_mac_control_frames_received * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_mac_control_frames_received, - 0x08 + 0x78, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x78, 0, 64); /* reg_ppcnt_a_unsupported_opcodes_received * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_unsupported_opcodes_received, - 0x08 + 0x80, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x80, 0, 64); /* reg_ppcnt_a_pause_mac_ctrl_frames_received * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received, - 0x08 + 0x88, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x88, 0, 64); /* reg_ppcnt_a_pause_mac_ctrl_frames_transmitted * Access: RO */ MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted, - 0x08 + 0x90, 0, 64); + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x90, 0, 64); + +/* Ethernet Extended Counter Group Counters */ + +/* reg_ppcnt_ecn_marked + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, ecn_marked, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); /* Ethernet Per Priority Group Counters */ /* reg_ppcnt_rx_octets * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, rx_octets, 0x08 + 0x00, 0, 64); +MLXSW_ITEM64(reg, ppcnt, rx_octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); /* reg_ppcnt_rx_frames * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, rx_frames, 0x08 + 0x20, 0, 64); +MLXSW_ITEM64(reg, ppcnt, rx_frames, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x20, 0, 64); /* reg_ppcnt_tx_octets * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tx_octets, 0x08 + 0x28, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tx_octets, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x28, 0, 64); /* reg_ppcnt_tx_frames * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tx_frames, 0x08 + 0x48, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tx_frames, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x48, 0, 64); /* reg_ppcnt_rx_pause * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, rx_pause, 0x08 + 0x50, 0, 64); +MLXSW_ITEM64(reg, ppcnt, rx_pause, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x50, 0, 64); /* reg_ppcnt_rx_pause_duration * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, rx_pause_duration, 0x08 + 0x58, 0, 64); +MLXSW_ITEM64(reg, ppcnt, rx_pause_duration, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x58, 0, 64); /* reg_ppcnt_tx_pause * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tx_pause, 0x08 + 0x60, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tx_pause, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x60, 0, 64); /* reg_ppcnt_tx_pause_duration * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, 0x08 + 0x68, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x68, 0, 64); /* reg_ppcnt_rx_pause_transition * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, 0x08 + 0x70, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x70, 0, 64); /* Ethernet Per Traffic Group Counters */ @@ -3366,14 +3571,24 @@ MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, 0x08 + 0x70, 0, 64); * The field cannot be cleared. * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue, 0x08 + 0x00, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tc_transmit_queue, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); /* reg_ppcnt_tc_no_buffer_discard_uc * The number of unicast packets dropped due to lack of shared * buffer resources. * Access: RO */ -MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc, 0x08 + 0x08, 0, 64); +MLXSW_ITEM64(reg, ppcnt, tc_no_buffer_discard_uc, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x08, 0, 64); + +/* Ethernet Per Traffic Class Congestion Group Counters */ + +/* reg_ppcnt_wred_discard + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, wred_discard, + MLXSW_REG_PPCNT_COUNTERS_OFFSET + 0x00, 0, 64); static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port, enum mlxsw_reg_ppcnt_grp grp, @@ -3682,12 +3897,15 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP, MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP, MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM, + MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST, MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP, MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS, MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP, MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE, MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, + MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF, MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, @@ -3992,6 +4210,12 @@ MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1); */ MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1); +/* reg_ritr_ipv4_mc + * IPv4 multicast routing enable. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4_mc, 0x00, 27, 1); + enum mlxsw_reg_ritr_if_type { /* VLAN interface. */ MLXSW_REG_RITR_VLAN_IF, @@ -4049,6 +4273,14 @@ MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1); */ MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1); +/* reg_ritr_ipv4_mc_fe + * IPv4 Multicast Forwarding Enable. + * When disabled, forwarding is blocked but local traffic (traps and IP to me) + * will be enabled. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4_mc_fe, 0x04, 27, 1); + /* reg_ritr_lb_en * Loop-back filter enable for unicast packets. * If the flag is set then loop-back filter for unicast packets is @@ -4271,11 +4503,13 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, mlxsw_reg_ritr_enable_set(payload, enable); mlxsw_reg_ritr_ipv4_set(payload, 1); mlxsw_reg_ritr_ipv6_set(payload, 1); + mlxsw_reg_ritr_ipv4_mc_set(payload, 1); mlxsw_reg_ritr_type_set(payload, type); mlxsw_reg_ritr_op_set(payload, op); mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); mlxsw_reg_ritr_ipv6_fe_set(payload, 1); + mlxsw_reg_ritr_ipv4_mc_fe_set(payload, 1); mlxsw_reg_ritr_lb_en_set(payload, 1); mlxsw_reg_ritr_virtual_router_set(payload, vr_id); mlxsw_reg_ritr_mtu_set(payload, mtu); @@ -4311,6 +4545,57 @@ mlxsw_reg_ritr_loopback_ipip4_pack(char *payload, mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip); } +/* RTAR - Router TCAM Allocation Register + * -------------------------------------- + * This register is used for allocation of regions in the TCAM table. + */ +#define MLXSW_REG_RTAR_ID 0x8004 +#define MLXSW_REG_RTAR_LEN 0x20 + +MLXSW_REG_DEFINE(rtar, MLXSW_REG_RTAR_ID, MLXSW_REG_RTAR_LEN); + +enum mlxsw_reg_rtar_op { + MLXSW_REG_RTAR_OP_ALLOCATE, + MLXSW_REG_RTAR_OP_RESIZE, + MLXSW_REG_RTAR_OP_DEALLOCATE, +}; + +/* reg_rtar_op + * Access: WO + */ +MLXSW_ITEM32(reg, rtar, op, 0x00, 28, 4); + +enum mlxsw_reg_rtar_key_type { + MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST = 1, + MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST = 3 +}; + +/* reg_rtar_key_type + * TCAM key type for the region. + * Access: WO + */ +MLXSW_ITEM32(reg, rtar, key_type, 0x00, 0, 8); + +/* reg_rtar_region_size + * TCAM region size. When allocating/resizing this is the requested + * size, the response is the actual size. + * Note: Actual size may be larger than requested. + * Reserved for op = Deallocate + * Access: WO + */ +MLXSW_ITEM32(reg, rtar, region_size, 0x04, 0, 16); + +static inline void mlxsw_reg_rtar_pack(char *payload, + enum mlxsw_reg_rtar_op op, + enum mlxsw_reg_rtar_key_type key_type, + u16 region_size) +{ + MLXSW_REG_ZERO(rtar, payload); + mlxsw_reg_rtar_op_set(payload, op); + mlxsw_reg_rtar_key_type_set(payload, key_type); + mlxsw_reg_rtar_region_size_set(payload, region_size); +} + /* RATR - Router Adjacency Table Register * -------------------------------------- * The RATR register is used to configure the Router Adjacency (next-hop) @@ -4480,6 +4765,27 @@ MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32); */ MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24); +enum mlxsw_reg_flow_counter_set_type { + /* No count */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Count packets and bytes */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + +/* reg_ratr_counter_set_type + * Counter set type for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, counter_set_type, 0x28, 24, 8); + +/* reg_ratr_counter_index + * Counter index for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, counter_index, 0x28, 0, 24); + static inline void mlxsw_reg_ratr_pack(char *payload, enum mlxsw_reg_ratr_op op, bool valid, @@ -4507,6 +4813,20 @@ static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip) mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip); } +static inline void mlxsw_reg_ratr_counter_pack(char *payload, u64 counter_index, + bool counter_enable) +{ + enum mlxsw_reg_flow_counter_set_type set_type; + + if (counter_enable) + set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES; + else + set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT; + + mlxsw_reg_ratr_counter_index_set(payload, counter_index); + mlxsw_reg_ratr_counter_set_type_set(payload, set_type); +} + /* RICNT - Router Interface Counter Register * ----------------------------------------- * The RICNT register retrieves per port performance counters @@ -4630,6 +4950,65 @@ static inline void mlxsw_reg_ricnt_pack(char *payload, u32 index, MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC); } +/* RRCR - Router Rules Copy Register Layout + * ---------------------------------------- + * This register is used for moving and copying route entry rules. + */ +#define MLXSW_REG_RRCR_ID 0x800F +#define MLXSW_REG_RRCR_LEN 0x24 + +MLXSW_REG_DEFINE(rrcr, MLXSW_REG_RRCR_ID, MLXSW_REG_RRCR_LEN); + +enum mlxsw_reg_rrcr_op { + /* Move rules */ + MLXSW_REG_RRCR_OP_MOVE, + /* Copy rules */ + MLXSW_REG_RRCR_OP_COPY, +}; + +/* reg_rrcr_op + * Access: WO + */ +MLXSW_ITEM32(reg, rrcr, op, 0x00, 28, 4); + +/* reg_rrcr_offset + * Offset within the region from which to copy/move. + * Access: Index + */ +MLXSW_ITEM32(reg, rrcr, offset, 0x00, 0, 16); + +/* reg_rrcr_size + * The number of rules to copy/move. + * Access: WO + */ +MLXSW_ITEM32(reg, rrcr, size, 0x04, 0, 16); + +/* reg_rrcr_table_id + * Identifier of the table on which to perform the operation. Encoding is the + * same as in RTAR.key_type + * Access: Index + */ +MLXSW_ITEM32(reg, rrcr, table_id, 0x10, 0, 4); + +/* reg_rrcr_dest_offset + * Offset within the region to which to copy/move + * Access: Index + */ +MLXSW_ITEM32(reg, rrcr, dest_offset, 0x20, 0, 16); + +static inline void mlxsw_reg_rrcr_pack(char *payload, enum mlxsw_reg_rrcr_op op, + u16 offset, u16 size, + enum mlxsw_reg_rtar_key_type table_id, + u16 dest_offset) +{ + MLXSW_REG_ZERO(rrcr, payload); + mlxsw_reg_rrcr_op_set(payload, op); + mlxsw_reg_rrcr_offset_set(payload, offset); + mlxsw_reg_rrcr_size_set(payload, size); + mlxsw_reg_rrcr_table_id_set(payload, table_id); + mlxsw_reg_rrcr_dest_offset_set(payload, dest_offset); +} + /* RALTA - Router Algorithmic LPM Tree Allocation Register * ------------------------------------------------------- * RALTA is used to allocate the LPM trees of the SHSPM method. @@ -5169,15 +5548,6 @@ enum mlxsw_reg_rauht_trap_id { */ MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9); -enum mlxsw_reg_flow_counter_set_type { - /* No count */ - MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00, - /* Count packets and bytes */ - MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, - /* Count only packets */ - MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05, -}; - /* reg_rauht_counter_set_type * Counter set type for flow counters * Access: RW @@ -5596,6 +5966,360 @@ mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif, mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key); } +/* RIGR-V2 - Router Interface Group Register Version 2 + * --------------------------------------------------- + * The RIGR_V2 register is used to add, remove and query egress interface list + * of a multicast forwarding entry. + */ +#define MLXSW_REG_RIGR2_ID 0x8023 +#define MLXSW_REG_RIGR2_LEN 0xB0 + +#define MLXSW_REG_RIGR2_MAX_ERIFS 32 + +MLXSW_REG_DEFINE(rigr2, MLXSW_REG_RIGR2_ID, MLXSW_REG_RIGR2_LEN); + +/* reg_rigr2_rigr_index + * KVD Linear index. + * Access: Index + */ +MLXSW_ITEM32(reg, rigr2, rigr_index, 0x04, 0, 24); + +/* reg_rigr2_vnext + * Next RIGR Index is valid. + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, vnext, 0x08, 31, 1); + +/* reg_rigr2_next_rigr_index + * Next RIGR Index. The index is to the KVD linear. + * Reserved when vnxet = '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, next_rigr_index, 0x08, 0, 24); + +/* reg_rigr2_vrmid + * RMID Index is valid. + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, vrmid, 0x20, 31, 1); + +/* reg_rigr2_rmid_index + * RMID Index. + * Range 0 .. max_mid - 1 + * Reserved when vrmid = '0'. + * The index is to the Port Group Table (PGT) + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, rmid_index, 0x20, 0, 16); + +/* reg_rigr2_erif_entry_v + * Egress Router Interface is valid. + * Note that low-entries must be set if high-entries are set. For + * example: if erif_entry[2].v is set then erif_entry[1].v and + * erif_entry[0].v must be set. + * Index can be from 0 to cap_mc_erif_list_entries-1 + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_v, 0x24, 31, 1, 4, 0, false); + +/* reg_rigr2_erif_entry_erif + * Egress Router Interface. + * Valid range is from 0 to cap_max_router_interfaces - 1 + * Index can be from 0 to MLXSW_REG_RIGR2_MAX_ERIFS - 1 + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_erif, 0x24, 0, 16, 4, 0, false); + +static inline void mlxsw_reg_rigr2_pack(char *payload, u32 rigr_index, + bool vnext, u32 next_rigr_index) +{ + MLXSW_REG_ZERO(rigr2, payload); + mlxsw_reg_rigr2_rigr_index_set(payload, rigr_index); + mlxsw_reg_rigr2_vnext_set(payload, vnext); + mlxsw_reg_rigr2_next_rigr_index_set(payload, next_rigr_index); + mlxsw_reg_rigr2_vrmid_set(payload, 0); + mlxsw_reg_rigr2_rmid_index_set(payload, 0); +} + +static inline void mlxsw_reg_rigr2_erif_entry_pack(char *payload, int index, + bool v, u16 erif) +{ + mlxsw_reg_rigr2_erif_entry_v_set(payload, index, v); + mlxsw_reg_rigr2_erif_entry_erif_set(payload, index, erif); +} + +/* RECR-V2 - Router ECMP Configuration Version 2 Register + * ------------------------------------------------------ + */ +#define MLXSW_REG_RECR2_ID 0x8025 +#define MLXSW_REG_RECR2_LEN 0x38 + +MLXSW_REG_DEFINE(recr2, MLXSW_REG_RECR2_ID, MLXSW_REG_RECR2_LEN); + +/* reg_recr2_pp + * Per-port configuration + * Access: Index + */ +MLXSW_ITEM32(reg, recr2, pp, 0x00, 24, 1); + +/* reg_recr2_sh + * Symmetric hash + * Access: RW + */ +MLXSW_ITEM32(reg, recr2, sh, 0x00, 8, 1); + +/* reg_recr2_seed + * Seed + * Access: RW + */ +MLXSW_ITEM32(reg, recr2, seed, 0x08, 0, 32); + +enum { + /* Enable IPv4 fields if packet is not TCP and not UDP */ + MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP = 3, + /* Enable IPv4 fields if packet is TCP or UDP */ + MLXSW_REG_RECR2_IPV4_EN_TCP_UDP = 4, + /* Enable IPv6 fields if packet is not TCP and not UDP */ + MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP = 5, + /* Enable IPv6 fields if packet is TCP or UDP */ + MLXSW_REG_RECR2_IPV6_EN_TCP_UDP = 6, + /* Enable TCP/UDP header fields if packet is IPv4 */ + MLXSW_REG_RECR2_TCP_UDP_EN_IPV4 = 7, + /* Enable TCP/UDP header fields if packet is IPv6 */ + MLXSW_REG_RECR2_TCP_UDP_EN_IPV6 = 8, +}; + +/* reg_recr2_outer_header_enables + * Bit mask where each bit enables a specific layer to be included in + * the hash calculation. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_enables, 0x10, 0x04, 1); + +enum { + /* IPv4 Source IP */ + MLXSW_REG_RECR2_IPV4_SIP0 = 9, + MLXSW_REG_RECR2_IPV4_SIP3 = 12, + /* IPv4 Destination IP */ + MLXSW_REG_RECR2_IPV4_DIP0 = 13, + MLXSW_REG_RECR2_IPV4_DIP3 = 16, + /* IP Protocol */ + MLXSW_REG_RECR2_IPV4_PROTOCOL = 17, + /* IPv6 Source IP */ + MLXSW_REG_RECR2_IPV6_SIP0_7 = 21, + MLXSW_REG_RECR2_IPV6_SIP8 = 29, + MLXSW_REG_RECR2_IPV6_SIP15 = 36, + /* IPv6 Destination IP */ + MLXSW_REG_RECR2_IPV6_DIP0_7 = 37, + MLXSW_REG_RECR2_IPV6_DIP8 = 45, + MLXSW_REG_RECR2_IPV6_DIP15 = 52, + /* IPv6 Next Header */ + MLXSW_REG_RECR2_IPV6_NEXT_HEADER = 53, + /* IPv6 Flow Label */ + MLXSW_REG_RECR2_IPV6_FLOW_LABEL = 57, + /* TCP/UDP Source Port */ + MLXSW_REG_RECR2_TCP_UDP_SPORT = 74, + /* TCP/UDP Destination Port */ + MLXSW_REG_RECR2_TCP_UDP_DPORT = 75, +}; + +/* reg_recr2_outer_header_fields_enable + * Packet fields to enable for ECMP hash subject to outer_header_enable. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_fields_enable, 0x14, 0x14, 1); + +static inline void mlxsw_reg_recr2_ipv4_sip_enable(char *payload) +{ + int i; + + for (i = MLXSW_REG_RECR2_IPV4_SIP0; i <= MLXSW_REG_RECR2_IPV4_SIP3; i++) + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, + true); +} + +static inline void mlxsw_reg_recr2_ipv4_dip_enable(char *payload) +{ + int i; + + for (i = MLXSW_REG_RECR2_IPV4_DIP0; i <= MLXSW_REG_RECR2_IPV4_DIP3; i++) + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, + true); +} + +static inline void mlxsw_reg_recr2_ipv6_sip_enable(char *payload) +{ + int i = MLXSW_REG_RECR2_IPV6_SIP0_7; + + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true); + + i = MLXSW_REG_RECR2_IPV6_SIP8; + for (; i <= MLXSW_REG_RECR2_IPV6_SIP15; i++) + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, + true); +} + +static inline void mlxsw_reg_recr2_ipv6_dip_enable(char *payload) +{ + int i = MLXSW_REG_RECR2_IPV6_DIP0_7; + + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true); + + i = MLXSW_REG_RECR2_IPV6_DIP8; + for (; i <= MLXSW_REG_RECR2_IPV6_DIP15; i++) + mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, + true); +} + +static inline void mlxsw_reg_recr2_pack(char *payload, u32 seed) +{ + MLXSW_REG_ZERO(recr2, payload); + mlxsw_reg_recr2_pp_set(payload, false); + mlxsw_reg_recr2_sh_set(payload, true); + mlxsw_reg_recr2_seed_set(payload, seed); +} + +/* RMFT-V2 - Router Multicast Forwarding Table Version 2 Register + * -------------------------------------------------------------- + * The RMFT_V2 register is used to configure and query the multicast table. + */ +#define MLXSW_REG_RMFT2_ID 0x8027 +#define MLXSW_REG_RMFT2_LEN 0x174 + +MLXSW_REG_DEFINE(rmft2, MLXSW_REG_RMFT2_ID, MLXSW_REG_RMFT2_LEN); + +/* reg_rmft2_v + * Valid + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, v, 0x00, 31, 1); + +enum mlxsw_reg_rmft2_type { + MLXSW_REG_RMFT2_TYPE_IPV4, + MLXSW_REG_RMFT2_TYPE_IPV6 +}; + +/* reg_rmft2_type + * Access: Index + */ +MLXSW_ITEM32(reg, rmft2, type, 0x00, 28, 2); + +enum mlxsw_sp_reg_rmft2_op { + /* For Write: + * Write operation. Used to write a new entry to the table. All RW + * fields are relevant for new entry. Activity bit is set for new + * entries - Note write with v (Valid) 0 will delete the entry. + * For Query: + * Read operation + */ + MLXSW_REG_RMFT2_OP_READ_WRITE, +}; + +/* reg_rmft2_op + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, rmft2, op, 0x00, 20, 2); + +/* reg_rmft2_a + * Activity. Set for new entries. Set if a packet lookup has hit on the specific + * entry. + * Access: RO + */ +MLXSW_ITEM32(reg, rmft2, a, 0x00, 16, 1); + +/* reg_rmft2_offset + * Offset within the multicast forwarding table to write to. + * Access: Index + */ +MLXSW_ITEM32(reg, rmft2, offset, 0x00, 0, 16); + +/* reg_rmft2_virtual_router + * Virtual Router ID. Range from 0..cap_max_virtual_routers-1 + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, virtual_router, 0x04, 0, 16); + +enum mlxsw_reg_rmft2_irif_mask { + MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, + MLXSW_REG_RMFT2_IRIF_MASK_COMPARE +}; + +/* reg_rmft2_irif_mask + * Ingress RIF mask. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, irif_mask, 0x08, 24, 1); + +/* reg_rmft2_irif + * Ingress RIF index. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, irif, 0x08, 0, 16); + +/* reg_rmft2_dip4 + * Destination IPv4 address + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, dip4, 0x1C, 0, 32); + +/* reg_rmft2_dip4_mask + * A bit that is set directs the TCAM to compare the corresponding bit in key. A + * bit that is clear directs the TCAM to ignore the corresponding bit in key. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, dip4_mask, 0x2C, 0, 32); + +/* reg_rmft2_sip4 + * Source IPv4 address + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, sip4, 0x3C, 0, 32); + +/* reg_rmft2_sip4_mask + * A bit that is set directs the TCAM to compare the corresponding bit in key. A + * bit that is clear directs the TCAM to ignore the corresponding bit in key. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, sip4_mask, 0x4C, 0, 32); + +/* reg_rmft2_flexible_action_set + * ACL action set. The only supported action types in this field and in any + * action-set pointed from here are as follows: + * 00h: ACTION_NULL + * 01h: ACTION_MAC_TTL, only TTL configuration is supported. + * 03h: ACTION_TRAP + * 06h: ACTION_QOS + * 08h: ACTION_POLICING_MONITORING + * 10h: ACTION_ROUTER_MC + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rmft2, flexible_action_set, 0x80, + MLXSW_REG_FLEX_ACTION_SET_LEN); + +static inline void +mlxsw_reg_rmft2_ipv4_pack(char *payload, bool v, u16 offset, u16 virtual_router, + enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif, + u32 dip4, u32 dip4_mask, u32 sip4, u32 sip4_mask, + const char *flexible_action_set) +{ + MLXSW_REG_ZERO(rmft2, payload); + mlxsw_reg_rmft2_v_set(payload, v); + mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV4); + mlxsw_reg_rmft2_op_set(payload, MLXSW_REG_RMFT2_OP_READ_WRITE); + mlxsw_reg_rmft2_offset_set(payload, offset); + mlxsw_reg_rmft2_virtual_router_set(payload, virtual_router); + mlxsw_reg_rmft2_irif_mask_set(payload, irif_mask); + mlxsw_reg_rmft2_irif_set(payload, irif); + mlxsw_reg_rmft2_dip4_set(payload, dip4); + mlxsw_reg_rmft2_dip4_mask_set(payload, dip4_mask); + mlxsw_reg_rmft2_sip4_set(payload, sip4); + mlxsw_reg_rmft2_sip4_mask_set(payload, sip4_mask); + if (flexible_action_set) + mlxsw_reg_rmft2_flexible_action_set_memcpy_to(payload, + flexible_action_set); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -6885,6 +7609,8 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(svpe), MLXSW_REG(sfmr), MLXSW_REG(spvmlr), + MLXSW_REG(cwtp), + MLXSW_REG(cwtpm), MLXSW_REG(ppbt), MLXSW_REG(pacl), MLXSW_REG(pagt), @@ -6911,9 +7637,11 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(hpkt), MLXSW_REG(rgcr), MLXSW_REG(ritr), + MLXSW_REG(rtar), MLXSW_REG(ratr), MLXSW_REG(rtdp), MLXSW_REG(ricnt), + MLXSW_REG(rrcr), MLXSW_REG(ralta), MLXSW_REG(ralst), MLXSW_REG(raltb), @@ -6921,6 +7649,9 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rauht), MLXSW_REG(raleu), MLXSW_REG(rauhtd), + MLXSW_REG(rigr2), + MLXSW_REG(recr2), + MLXSW_REG(rmft2), MLXSW_REG(mfcr), MLXSW_REG(mfsc), MLXSW_REG(mfsm), diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 9556d934714b..087aad52c195 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -63,6 +63,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_MAX_CPU_POLICERS, MLXSW_RES_ID_MAX_VRS, MLXSW_RES_ID_MAX_RIFS, + MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES, MLXSW_RES_ID_MAX_LPM_TREES, /* Internal resources. @@ -100,6 +101,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13, [MLXSW_RES_ID_MAX_VRS] = 0x2C01, [MLXSW_RES_ID_MAX_RIFS] = 0x2C02, + [MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES] = 0x2C10, [MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 696b99e65a5a..2d46ec84ebdf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -53,6 +53,7 @@ #include <linux/notifier.h> #include <linux/dcbnl.h> #include <linux/inetdevice.h> +#include <linux/netlink.h> #include <net/switchdev.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_mirred.h> @@ -69,11 +70,12 @@ #include "txheader.h" #include "spectrum_cnt.h" #include "spectrum_dpipe.h" +#include "spectrum_acl_flex_actions.h" #include "../mlxfw/mlxfw.h" #define MLXSW_FWREV_MAJOR 13 -#define MLXSW_FWREV_MINOR 1420 -#define MLXSW_FWREV_SUBMINOR 122 +#define MLXSW_FWREV_MINOR 1530 +#define MLXSW_FWREV_SUBMINOR 152 static const struct mlxsw_fw_rev mlxsw_sp_supported_fw_rev = { .major = MLXSW_FWREV_MAJOR, @@ -1322,20 +1324,54 @@ out: return err; } +static void +mlxsw_sp_port_get_hw_xstats(struct net_device *dev, + struct mlxsw_sp_port_xstats *xstats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err, i; + + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_EXT_CNT, 0, + ppcnt_pl); + if (!err) + xstats->ecn = mlxsw_reg_ppcnt_ecn_marked_get(ppcnt_pl); + + for (i = 0; i < TC_MAX_QUEUE; i++) { + err = mlxsw_sp_port_get_stats_raw(dev, + MLXSW_REG_PPCNT_TC_CONG_TC, + i, ppcnt_pl); + if (!err) + xstats->wred_drop[i] = + mlxsw_reg_ppcnt_wred_discard_get(ppcnt_pl); + + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_TC_CNT, + i, ppcnt_pl); + if (err) + continue; + + xstats->backlog[i] = + mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl); + xstats->tail_drop[i] = + mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl); + } +} + static void update_stats_cache(struct work_struct *work) { struct mlxsw_sp_port *mlxsw_sp_port = container_of(work, struct mlxsw_sp_port, - hw_stats.update_dw.work); + periodic_hw_stats.update_dw.work); if (!netif_carrier_ok(mlxsw_sp_port->dev)) goto out; mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev, - mlxsw_sp_port->hw_stats.cache); + &mlxsw_sp_port->periodic_hw_stats.stats); + mlxsw_sp_port_get_hw_xstats(mlxsw_sp_port->dev, + &mlxsw_sp_port->periodic_hw_stats.xstats); out: - mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, + mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, MLXSW_HW_STATS_UPDATE_TIME); } @@ -1348,7 +1384,7 @@ mlxsw_sp_port_get_stats64(struct net_device *dev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - memcpy(stats, mlxsw_sp_port->hw_stats.cache, sizeof(*stats)); + memcpy(stats, &mlxsw_sp_port->periodic_hw_stats.stats, sizeof(*stats)); } static int __mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -1695,17 +1731,9 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_cls_matchall_offload *f) + struct tc_cls_matchall_offload *f, + bool ingress) { - bool ingress; - - if (is_classid_clsact_ingress(f->common.classid)) - ingress = true; - else if (is_classid_clsact_egress(f->common.classid)) - ingress = false; - else - return -EOPNOTSUPP; - if (f->common.chain_index) return -EOPNOTSUPP; @@ -1723,17 +1751,9 @@ static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f, + bool ingress) { - bool ingress; - - if (is_classid_clsact_ingress(f->common.classid)) - ingress = true; - else if (is_classid_clsact_egress(f->common.classid)) - ingress = false; - else - return -EOPNOTSUPP; - switch (f->command) { case TC_CLSFLOWER_REPLACE: return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, f); @@ -1747,16 +1767,72 @@ mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port, } } +static int mlxsw_sp_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv, bool ingress) +{ + struct mlxsw_sp_port *mlxsw_sp_port = cb_priv; + + if (!tc_can_offload(mlxsw_sp_port->dev)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSMATCHALL: + return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data, + ingress); + case TC_SETUP_CLSFLOWER: + return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data, + ingress); + default: + return -EOPNOTSUPP; + } +} + +static int mlxsw_sp_setup_tc_block_cb_ig(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + return mlxsw_sp_setup_tc_block_cb(type, type_data, cb_priv, true); +} + +static int mlxsw_sp_setup_tc_block_cb_eg(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + return mlxsw_sp_setup_tc_block_cb(type, type_data, cb_priv, false); +} + +static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_block_offload *f) +{ + tc_setup_cb_t *cb; + + if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + cb = mlxsw_sp_setup_tc_block_cb_ig; + else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) + cb = mlxsw_sp_setup_tc_block_cb_eg; + else + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, cb, mlxsw_sp_port, + mlxsw_sp_port); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port); + return 0; + default: + return -EOPNOTSUPP; + } +} + static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); switch (type) { - case TC_SETUP_CLSMATCHALL: - return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data); - case TC_SETUP_CLSFLOWER: - return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data); + case TC_SETUP_BLOCK: + return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_RED: + return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } @@ -2868,14 +2944,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_alloc_sample; } - mlxsw_sp_port->hw_stats.cache = - kzalloc(sizeof(*mlxsw_sp_port->hw_stats.cache), GFP_KERNEL); - - if (!mlxsw_sp_port->hw_stats.cache) { - err = -ENOMEM; - goto err_alloc_hw_stats; - } - INIT_DELAYED_WORK(&mlxsw_sp_port->hw_stats.update_dw, + INIT_DELAYED_WORK(&mlxsw_sp_port->periodic_hw_stats.update_dw, &update_stats_cache); dev->netdev_ops = &mlxsw_sp_port_netdev_ops; @@ -2974,6 +3043,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, if (IS_ERR(mlxsw_sp_port_vlan)) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n", mlxsw_sp_port->local_port); + err = PTR_ERR(mlxsw_sp_port_vlan); goto err_port_vlan_get; } @@ -2989,7 +3059,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_core_port_eth_set(mlxsw_sp->core, mlxsw_sp_port->local_port, mlxsw_sp_port, dev, mlxsw_sp_port->split, module); - mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, 0); + mlxsw_core_schedule_dw(&mlxsw_sp_port->periodic_hw_stats.update_dw, 0); return 0; err_register_netdev: @@ -3012,8 +3082,6 @@ err_dev_addr_init: err_port_swid_set: mlxsw_sp_port_module_unmap(mlxsw_sp_port); err_port_module_map: - kfree(mlxsw_sp_port->hw_stats.cache); -err_alloc_hw_stats: kfree(mlxsw_sp_port->sample); err_alloc_sample: free_percpu(mlxsw_sp_port->pcpu_stats); @@ -3028,7 +3096,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; - cancel_delayed_work_sync(&mlxsw_sp_port->hw_stats.update_dw); + cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw); mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; @@ -3038,7 +3106,6 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp_port_dcb_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp_port); - kfree(mlxsw_sp_port->hw_stats.cache); kfree(mlxsw_sp_port->sample); free_percpu(mlxsw_sp_port->pcpu_stats); WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vlans_list)); @@ -3311,6 +3378,14 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port, return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); } +static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb, + u8 local_port, void *priv) +{ + skb->offload_mr_fwd_mark = 1; + skb->offload_fwd_mark = 1; + return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); +} + static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port, void *priv) { @@ -3354,6 +3429,10 @@ out: MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) +#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action, \ + _is_ctrl, SP_##_trap_group, DISCARD) + #define MLXSW_SP_EVENTL(_func, _trap_id) \ MLXSW_EVENTL(_func, _trap_id, SP_EVENT) @@ -3420,6 +3499,11 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { false, SP_IP2ME, DISCARD), /* ACL trap */ MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, IP2ME, false), + /* Multicast Router Traps */ + MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false), + MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false), + MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), + MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) @@ -3445,6 +3529,8 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: rate = 128; burst_size = 7; break; @@ -3460,6 +3546,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST: rate = 1024; burst_size = 7; break; @@ -3505,6 +3592,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: priority = 5; tc = 5; break; @@ -3521,12 +3609,14 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: priority = 2; tc = 2; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST: priority = 1; tc = 1; break; @@ -3642,6 +3732,9 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } +static int mlxsw_sp_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr); + static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { @@ -3663,10 +3756,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, return err; } + err = mlxsw_sp_kvdl_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize KVDL\n"); + return err; + } + err = mlxsw_sp_fids_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize FIDs\n"); - return err; + goto err_fids_init; } err = mlxsw_sp_traps_init(mlxsw_sp); @@ -3693,12 +3792,34 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_switchdev_init; } + err = mlxsw_sp_counter_pool_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n"); + goto err_counter_pool_init; + } + + err = mlxsw_sp_afa_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL actions\n"); + goto err_afa_init; + } + err = mlxsw_sp_router_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); goto err_router_init; } + /* Initialize netdevice notifier after router is initialized, so that + * the event handler can use router structures. + */ + mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event; + err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n"); + goto err_netdev_notifier; + } + err = mlxsw_sp_span_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n"); @@ -3711,12 +3832,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_acl_init; } - err = mlxsw_sp_counter_pool_init(mlxsw_sp); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n"); - goto err_counter_pool_init; - } - err = mlxsw_sp_dpipe_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to init pipeline debug\n"); @@ -3734,14 +3849,18 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, err_ports_create: mlxsw_sp_dpipe_fini(mlxsw_sp); err_dpipe_init: - mlxsw_sp_counter_pool_fini(mlxsw_sp); -err_counter_pool_init: mlxsw_sp_acl_fini(mlxsw_sp); err_acl_init: mlxsw_sp_span_fini(mlxsw_sp); err_span_init: + unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); +err_netdev_notifier: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + mlxsw_sp_afa_fini(mlxsw_sp); +err_afa_init: + mlxsw_sp_counter_pool_fini(mlxsw_sp); +err_counter_pool_init: mlxsw_sp_switchdev_fini(mlxsw_sp); err_switchdev_init: mlxsw_sp_lag_fini(mlxsw_sp); @@ -3751,6 +3870,8 @@ err_buffers_init: mlxsw_sp_traps_fini(mlxsw_sp); err_traps_init: mlxsw_sp_fids_fini(mlxsw_sp); +err_fids_init: + mlxsw_sp_kvdl_fini(mlxsw_sp); return err; } @@ -3760,15 +3881,18 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); - mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); + unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_afa_fini(mlxsw_sp); + mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_fids_fini(mlxsw_sp); + mlxsw_sp_kvdl_fini(mlxsw_sp); } static const struct mlxsw_config_profile mlxsw_sp_config_profile = { @@ -3791,8 +3915,8 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_pkey = 0, .used_kvd_split_data = 1, .kvd_hash_granularity = MLXSW_SP_KVD_GRANULARITY, - .kvd_hash_single_parts = 2, - .kvd_hash_double_parts = 1, + .kvd_hash_single_parts = 59, + .kvd_hash_double_parts = 41, .kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE, .swid_config = { { @@ -3986,14 +4110,21 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev, - struct netdev_lag_upper_info *lag_upper_info) + struct netdev_lag_upper_info *lag_upper_info, + struct netlink_ext_ack *extack) { u16 lag_id; - if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) + if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) { + NL_SET_ERR_MSG(extack, + "spectrum: Exceeded number of supported LAG devices"); return false; - if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + } + if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { + NL_SET_ERR_MSG(extack, + "spectrum: LAG device using unsupported Tx type"); return false; + } return true; } @@ -4198,6 +4329,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, { struct netdev_notifier_changeupper_info *info; struct mlxsw_sp_port *mlxsw_sp_port; + struct netlink_ext_ack *extack; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; int err = 0; @@ -4205,6 +4337,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, mlxsw_sp_port = netdev_priv(dev); mlxsw_sp = mlxsw_sp_port->mlxsw_sp; info = ptr; + extack = netdev_notifier_info_to_extack(&info->info); switch (event) { case NETDEV_PRECHANGEUPPER: @@ -4212,25 +4345,43 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, if (!is_vlan_dev(upper_dev) && !netif_is_lag_master(upper_dev) && !netif_is_bridge_master(upper_dev) && - !netif_is_ovs_master(upper_dev)) + !netif_is_ovs_master(upper_dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Unknown upper device type"); return -EINVAL; + } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) + if (netdev_has_any_upper_dev(upper_dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; + } if (netif_is_lag_master(upper_dev) && !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, - info->upper_info)) + info->upper_info, extack)) return -EINVAL; - if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) + if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Master device is a LAG master and this device has a VLAN"); return -EINVAL; + } if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && - !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) + !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) { + NL_SET_ERR_MSG(extack, + "spectrum: Can not put a VLAN on a LAG port"); return -EINVAL; - if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) + } + if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Master device is an OVS master and this device has a VLAN"); return -EINVAL; - if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) + } + if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Can not put a VLAN on an OVS port"); return -EINVAL; + } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; @@ -4238,7 +4389,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, if (info->linking) err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, lower_dev, - upper_dev); + upper_dev, + extack); else mlxsw_sp_port_bridge_leave(mlxsw_sp_port, lower_dev, @@ -4329,18 +4481,25 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct netdev_notifier_changeupper_info *info = ptr; + struct netlink_ext_ack *extack; struct net_device *upper_dev; int err = 0; + extack = netdev_notifier_info_to_extack(&info->info); + switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; - if (!netif_is_bridge_master(upper_dev)) + if (!netif_is_bridge_master(upper_dev)) { + NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers"); return -EINVAL; + } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) + if (netdev_has_any_upper_dev(upper_dev)) { + NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; + } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; @@ -4348,7 +4507,8 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, if (info->linking) err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, vlan_dev, - upper_dev); + upper_dev, + extack); else mlxsw_sp_port_bridge_leave(mlxsw_sp_port, vlan_dev, @@ -4411,13 +4571,21 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) return netif_is_l3_master(info->upper_dev); } -static int mlxsw_sp_netdevice_event(struct notifier_block *unused, +static int mlxsw_sp_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mlxsw_sp *mlxsw_sp; int err = 0; - if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) + mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); + if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev, + event, ptr); + else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev, + event, ptr); + else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) err = mlxsw_sp_netdevice_router_port_event(dev); else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); @@ -4431,21 +4599,20 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, return notifier_from_errno(err); } -static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { - .notifier_call = mlxsw_sp_netdevice_event, +static struct notifier_block mlxsw_sp_inetaddr_valid_nb __read_mostly = { + .notifier_call = mlxsw_sp_inetaddr_valid_event, }; static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { .notifier_call = mlxsw_sp_inetaddr_event, - .priority = 10, /* Must be called before FIB notifier block */ }; -static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = { - .notifier_call = mlxsw_sp_inet6addr_event, +static struct notifier_block mlxsw_sp_inet6addr_valid_nb __read_mostly = { + .notifier_call = mlxsw_sp_inet6addr_valid_event, }; -static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { - .notifier_call = mlxsw_sp_router_netevent_event, +static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = { + .notifier_call = mlxsw_sp_inet6addr_event, }; static const struct pci_device_id mlxsw_sp_pci_id_table[] = { @@ -4462,10 +4629,10 @@ static int __init mlxsw_sp_module_init(void) { int err; - register_netdevice_notifier(&mlxsw_sp_netdevice_nb); + register_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); + register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); - register_netevent_notifier(&mlxsw_sp_router_netevent_nb); err = mlxsw_core_driver_register(&mlxsw_sp_driver); if (err) @@ -4480,10 +4647,10 @@ static int __init mlxsw_sp_module_init(void) err_pci_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp_driver); err_core_driver_register: - unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); + unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); - unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); + unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); return err; } @@ -4491,10 +4658,10 @@ static void __exit mlxsw_sp_module_exit(void) { mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver); mlxsw_core_driver_unregister(&mlxsw_sp_driver); - unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); + unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); - unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); + unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); } module_init(mlxsw_sp_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 84ce83acdc19..58cf222fb985 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -48,6 +48,7 @@ #include <linux/notifier.h> #include <net/psample.h> #include <net/pkt_cls.h> +#include <net/red.h> #include "port.h" #include "core.h" @@ -62,7 +63,7 @@ #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ -#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */ +#define MLXSW_SP_KVD_LINEAR_SIZE 98304 /* entries */ #define MLXSW_SP_KVD_GRANULARITY 128 struct mlxsw_sp_port; @@ -94,7 +95,8 @@ struct mlxsw_sp_mid { unsigned char addr[ETH_ALEN]; u16 fid; u16 mid; - unsigned int ref_count; + bool in_hw; + unsigned long *ports_in_mid; /* bits array */ }; enum mlxsw_sp_span_type { @@ -138,9 +140,11 @@ struct mlxsw_sp_port_mall_tc_entry { struct mlxsw_sp_sb; struct mlxsw_sp_bridge; struct mlxsw_sp_router; +struct mlxsw_sp_mr; struct mlxsw_sp_acl; struct mlxsw_sp_counter_pool; struct mlxsw_sp_fid_core; +struct mlxsw_sp_kvdl; struct mlxsw_sp { struct mlxsw_sp_port **ports; @@ -152,11 +156,12 @@ struct mlxsw_sp { struct mlxsw_sp_sb *sb; struct mlxsw_sp_bridge *bridge; struct mlxsw_sp_router *router; + struct mlxsw_sp_mr *mr; + struct mlxsw_afa *afa; struct mlxsw_sp_acl *acl; struct mlxsw_sp_fid_core *fid_core; - struct { - DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); - } kvdl; + struct mlxsw_sp_kvdl *kvdl; + struct notifier_block netdevice_nb; struct mlxsw_sp_counter_pool *counter_pool; struct { @@ -199,6 +204,37 @@ struct mlxsw_sp_port_vlan { struct list_head bridge_vlan_node; }; +enum mlxsw_sp_qdisc_type { + MLXSW_SP_QDISC_NO_QDISC, + MLXSW_SP_QDISC_RED, +}; + +struct mlxsw_sp_qdisc { + u32 handle; + enum mlxsw_sp_qdisc_type type; + struct red_stats xstats_base; + union { + struct { + u64 tail_drop_base; + u64 ecn_base; + u64 wred_drop_base; + } red; + } xstats; + + u64 tx_bytes; + u64 tx_packets; + u64 drops; + u64 overlimits; +}; + +/* No need an internal lock; At worse - miss a single periodic iteration */ +struct mlxsw_sp_port_xstats { + u64 ecn; + u64 wred_drop[TC_MAX_QUEUE]; + u64 tail_drop[TC_MAX_QUEUE]; + u64 backlog[TC_MAX_QUEUE]; +}; + struct mlxsw_sp_port { struct net_device *dev; struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; @@ -227,11 +263,13 @@ struct mlxsw_sp_port { struct list_head mall_tc_list; struct { #define MLXSW_HW_STATS_UPDATE_TIME HZ - struct rtnl_link_stats64 *cache; + struct rtnl_link_stats64 stats; + struct mlxsw_sp_port_xstats xstats; struct delayed_work update_dw; - } hw_stats; + } periodic_hw_stats; struct mlxsw_sp_port_sample *sample; struct list_head vlans_list; + struct mlxsw_sp_qdisc root_qdisc; }; static inline bool @@ -322,7 +360,8 @@ void mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, - struct net_device *br_dev); + struct net_device *br_dev, + struct netlink_ext_ack *extack); void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, struct net_device *br_dev); @@ -380,23 +419,43 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) /* spectrum_router.c */ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_router_netevent_event(struct notifier_block *unused, - unsigned long event, void *ptr); int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); int mlxsw_sp_inetaddr_event(struct notifier_block *unused, unsigned long event, void *ptr); +int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr); int mlxsw_sp_inet6addr_event(struct notifier_block *unused, unsigned long event, void *ptr); +int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr); int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); +bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); +bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); +int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + unsigned long event, + struct netdev_notifier_info *info); +int +mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + unsigned long event, + struct netdev_notifier_info *info); void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); /* spectrum_kvdl.c */ +int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, u32 *p_entry_index); void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); +int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp, + unsigned int entry_count, + unsigned int *p_alloc_size); struct mlxsw_sp_acl_rule_info { unsigned int priority; @@ -466,9 +525,9 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, enum mlxsw_afk_element element, const char *key_value, const char *mask_value, unsigned int len); -void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); -void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, - u16 group_id); +int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, + u16 group_id); int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, @@ -521,6 +580,10 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct tc_cls_flower_offload *f); +/* spectrum_qdisc.c */ +int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p); + /* spectrum_fid.c */ int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_flood_type packet_type, u8 local_port, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 4b2455e3e079..93dcd315f7d6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -52,7 +52,6 @@ struct mlxsw_sp_acl { struct mlxsw_sp *mlxsw_sp; struct mlxsw_afk *afk; - struct mlxsw_afa *afa; struct mlxsw_sp_fid *dummy_fid; const struct mlxsw_sp_acl_ops *ops; struct rhashtable ruleset_ht; @@ -333,7 +332,7 @@ mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl) rulei = kzalloc(sizeof(*rulei), GFP_KERNEL); if (!rulei) return NULL; - rulei->act_block = mlxsw_afa_block_create(acl->afa); + rulei->act_block = mlxsw_afa_block_create(acl->mlxsw_sp->afa); if (IS_ERR(rulei->act_block)) { err = PTR_ERR(rulei->act_block); goto err_afa_block_create; @@ -379,15 +378,15 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, key_value, mask_value, len); } -void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei) +int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei) { - mlxsw_afa_block_continue(rulei->act_block); + return mlxsw_afa_block_continue(rulei->act_block); } -void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, - u16 group_id) +int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, + u16 group_id) { - mlxsw_afa_block_jump(rulei->act_block, group_id); + return mlxsw_afa_block_jump(rulei->act_block, group_id); } int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) @@ -397,7 +396,8 @@ int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei) { - return mlxsw_afa_block_append_trap(rulei->act_block); + return mlxsw_afa_block_append_trap(rulei->act_block, + MLXSW_TRAP_ID_ACL0); } int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, @@ -653,85 +653,6 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, return 0; } -#define MLXSW_SP_KDVL_ACT_EXT_SIZE 1 - -static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, - char *enc_actions, bool is_first) -{ - struct mlxsw_sp *mlxsw_sp = priv; - char pefa_pl[MLXSW_REG_PEFA_LEN]; - u32 kvdl_index; - int err; - - /* The first action set of a TCAM entry is stored directly in TCAM, - * not KVD linear area. - */ - if (is_first) - return 0; - - err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KDVL_ACT_EXT_SIZE, - &kvdl_index); - if (err) - return err; - mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); - if (err) - goto err_pefa_write; - *p_kvdl_index = kvdl_index; - return 0; - -err_pefa_write: - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); - return err; -} - -static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index, - bool is_first) -{ - struct mlxsw_sp *mlxsw_sp = priv; - - if (is_first) - return; - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); -} - -static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, - u8 local_port) -{ - struct mlxsw_sp *mlxsw_sp = priv; - char ppbs_pl[MLXSW_REG_PPBS_LEN]; - u32 kvdl_index; - int err; - - err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index); - if (err) - return err; - mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl); - if (err) - goto err_ppbs_write; - *p_kvdl_index = kvdl_index; - return 0; - -err_ppbs_write: - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); - return err; -} - -static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index) -{ - struct mlxsw_sp *mlxsw_sp = priv; - - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); -} - -static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = { - .kvdl_set_add = mlxsw_sp_act_kvdl_set_add, - .kvdl_set_del = mlxsw_sp_act_kvdl_set_del, - .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add, - .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del, -}; - int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) { const struct mlxsw_sp_acl_ops *acl_ops = &mlxsw_sp_acl_tcam_ops; @@ -753,14 +674,6 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) goto err_afk_create; } - acl->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, - ACL_ACTIONS_PER_SET), - &mlxsw_sp_act_afa_ops, mlxsw_sp); - if (IS_ERR(acl->afa)) { - err = PTR_ERR(acl->afa); - goto err_afa_create; - } - err = rhashtable_init(&acl->ruleset_ht, &mlxsw_sp_acl_ruleset_ht_params); if (err) @@ -792,8 +705,6 @@ err_acl_ops_init: err_fid_get: rhashtable_destroy(&acl->ruleset_ht); err_rhashtable_init: - mlxsw_afa_destroy(acl->afa); -err_afa_create: mlxsw_afk_destroy(acl->afk); err_afk_create: kfree(acl); @@ -810,7 +721,6 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) WARN_ON(!list_empty(&acl->rules)); mlxsw_sp_fid_put(acl->dummy_fid); rhashtable_destroy(&acl->ruleset_ht); - mlxsw_afa_destroy(acl->afa); mlxsw_afk_destroy(acl->afk); kfree(acl); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c new file mode 100644 index 000000000000..4d3340ed0291 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c @@ -0,0 +1,129 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spectrum_acl_flex_actions.h" +#include "core_acl_flex_actions.h" + +#define MLXSW_SP_KVDL_ACT_EXT_SIZE 1 + +static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, + char *enc_actions, bool is_first) +{ + struct mlxsw_sp *mlxsw_sp = priv; + char pefa_pl[MLXSW_REG_PEFA_LEN]; + u32 kvdl_index; + int err; + + /* The first action set of a TCAM entry is stored directly in TCAM, + * not KVD linear area. + */ + if (is_first) + return 0; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ACT_EXT_SIZE, + &kvdl_index); + if (err) + return err; + mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); + if (err) + goto err_pefa_write; + *p_kvdl_index = kvdl_index; + return 0; + +err_pefa_write: + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); + return err; +} + +static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index, + bool is_first) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + if (is_first) + return; + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); +} + +static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, + u8 local_port) +{ + struct mlxsw_sp *mlxsw_sp = priv; + char ppbs_pl[MLXSW_REG_PPBS_LEN]; + u32 kvdl_index; + int err; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index); + if (err) + return err; + mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl); + if (err) + goto err_ppbs_write; + *p_kvdl_index = kvdl_index; + return 0; + +err_ppbs_write: + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); + return err; +} + +static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); +} + +static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = { + .kvdl_set_add = mlxsw_sp_act_kvdl_set_add, + .kvdl_set_del = mlxsw_sp_act_kvdl_set_del, + .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add, + .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del, +}; + +int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_ACTIONS_PER_SET), + &mlxsw_sp_act_afa_ops, mlxsw_sp); + return PTR_ERR_OR_ZERO(mlxsw_sp->afa); +} + +void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_afa_destroy(mlxsw_sp->afa); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h new file mode 100644 index 000000000000..2726192836ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h @@ -0,0 +1,44 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com> + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H +#define _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H + +#include "spectrum.h" + +int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 50b40de1fb91..7e8284b46968 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -608,7 +608,10 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, goto err_rulei_create; } - mlxsw_sp_acl_rulei_act_continue(rulei); + err = mlxsw_sp_acl_rulei_act_continue(rulei); + if (WARN_ON(err)) + goto err_rulei_act_continue; + err = mlxsw_sp_acl_rulei_commit(rulei); if (err) goto err_rulei_commit; @@ -623,6 +626,7 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, err_rule_insert: err_rulei_commit: +err_rulei_act_continue: mlxsw_sp_acl_rulei_destroy(rulei); err_rulei_create: parman_item_remove(region->parman, parman_prio, parman_item); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 51e6846da72b..96fdba78acab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -43,21 +43,42 @@ enum mlxsw_sp_field_metadata_id { MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX, }; static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = { - { .name = "erif_port", - .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, - .bitwidth = 32, - .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, + { + .name = "erif_port", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, + .bitwidth = 32, + .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, }, - { .name = "l3_forward", - .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, - .bitwidth = 1, + { + .name = "l3_forward", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, + .bitwidth = 1, }, - { .name = "l3_drop", - .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, - .bitwidth = 1, + { + .name = "l3_drop", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, + .bitwidth = 1, + }, + { + .name = "adj_index", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX, + .bitwidth = 32, + }, + { + .name = "adj_size", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE, + .bitwidth = 32, + }, + { + .name = "adj_hash_index", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX, + .bitwidth = 32, }, }; @@ -826,6 +847,390 @@ static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp) MLXSW_SP_DPIPE_TABLE_NAME_HOST6); } +static int mlxsw_sp_dpipe_table_adj_matches_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_match match = {0}; + int err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX; + + err = devlink_dpipe_match_put(skb, &match); + if (err) + return err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE; + + err = devlink_dpipe_match_put(skb, &match); + if (err) + return err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX; + + return devlink_dpipe_match_put(skb, &match); +} + +static int mlxsw_sp_dpipe_table_adj_actions_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_action action = {0}; + int err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &devlink_dpipe_header_ethernet; + action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + err = devlink_dpipe_action_put(skb, &action); + if (err) + return err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &mlxsw_sp_dpipe_header_metadata; + action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + return devlink_dpipe_action_put(skb, &action); +} + +static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nexthop *nh; + u64 size = 0; + + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) + if (mlxsw_sp_nexthop_offload(nh) && + !mlxsw_sp_nexthop_group_has_ipip(nh)) + size++; + return size; +} + +enum mlxsw_sp_dpipe_table_adj_match { + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT, +}; + +enum mlxsw_sp_dpipe_table_adj_action { + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC, + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT, + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT, +}; + +static void +mlxsw_sp_dpipe_table_adj_match_action_prepare(struct devlink_dpipe_match *matches, + struct devlink_dpipe_action *actions) +{ + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_SIZE; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &devlink_dpipe_header_ethernet; + action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &mlxsw_sp_dpipe_header_metadata; + action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; +} + +static int +mlxsw_sp_dpipe_table_adj_entry_prepare(struct devlink_dpipe_entry *entry, + struct devlink_dpipe_value *match_values, + struct devlink_dpipe_match *matches, + struct devlink_dpipe_value *action_values, + struct devlink_dpipe_action *actions) +{ struct devlink_dpipe_value *action_value; + struct devlink_dpipe_value *match_value; + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + + entry->match_values = match_values; + entry->match_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT; + + entry->action_values = action_values; + entry->action_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + + action_value->action = action; + action_value->value_size = sizeof(u64); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + + action_value->action = action; + action_value->value_size = sizeof(u32); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + return 0; +} + +static void +__mlxsw_sp_dpipe_table_adj_entry_fill(struct devlink_dpipe_entry *entry, + u32 adj_index, u32 adj_size, + u32 adj_hash_index, unsigned char *ha, + struct mlxsw_sp_rif *rif) +{ + struct devlink_dpipe_value *value; + u32 *p_rif_value; + u32 *p_index; + + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + p_index = value->value; + *p_index = adj_index; + + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_SIZE]; + p_index = value->value; + *p_index = adj_size; + + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + p_index = value->value; + *p_index = adj_hash_index; + + value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + ether_addr_copy(value->value, ha); + + value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + p_rif_value = value->value; + *p_rif_value = mlxsw_sp_rif_index(rif); + value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif); + value->mapping_valid = true; +} + +static void mlxsw_sp_dpipe_table_adj_entry_fill(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct devlink_dpipe_entry *entry) +{ + struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh); + unsigned char *ha = mlxsw_sp_nexthop_ha(nh); + u32 adj_hash_index = 0; + u32 adj_index = 0; + u32 adj_size = 0; + int err; + + mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size, &adj_hash_index); + __mlxsw_sp_dpipe_table_adj_entry_fill(entry, adj_index, adj_size, + adj_hash_index, ha, rif); + err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &entry->counter); + if (!err) + entry->counter_valid = true; +} + +static int +mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp_nexthop *nh; + int entry_index = 0; + int nh_count_max; + int nh_count = 0; + int nh_skip; + int j; + int err; + + rtnl_lock(); + nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); +start_again: + err = devlink_dpipe_entry_ctx_prepare(dump_ctx); + if (err) + goto err_ctx_prepare; + j = 0; + nh_skip = nh_count; + nh_count = 0; + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { + if (!mlxsw_sp_nexthop_offload(nh) || + mlxsw_sp_nexthop_group_has_ipip(nh)) + continue; + + if (nh_count < nh_skip) + goto skip; + + mlxsw_sp_dpipe_table_adj_entry_fill(mlxsw_sp, nh, entry); + entry->index = entry_index; + err = devlink_dpipe_entry_ctx_append(dump_ctx, entry); + if (err) { + if (err == -EMSGSIZE) { + if (!j) + goto err_entry_append; + break; + } + goto err_entry_append; + } + entry_index++; + j++; +skip: + nh_count++; + } + + devlink_dpipe_entry_ctx_close(dump_ctx); + if (nh_count != nh_count_max) + goto start_again; + rtnl_unlock(); + + return 0; + +err_ctx_prepare: +err_entry_append: + rtnl_unlock(); + return err; +} + +static int +mlxsw_sp_dpipe_table_adj_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct devlink_dpipe_value action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT]; + struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT]; + struct devlink_dpipe_action actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT]; + struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT]; + struct devlink_dpipe_entry entry = {0}; + struct mlxsw_sp *mlxsw_sp = priv; + int err; + + memset(matches, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT * + sizeof(matches[0])); + memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT * + sizeof(match_values[0])); + memset(actions, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT * + sizeof(actions[0])); + memset(action_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT * + sizeof(action_values[0])); + + mlxsw_sp_dpipe_table_adj_match_action_prepare(matches, actions); + err = mlxsw_sp_dpipe_table_adj_entry_prepare(&entry, + match_values, matches, + action_values, actions); + if (err) + goto out; + + err = mlxsw_sp_dpipe_table_adj_entries_get(mlxsw_sp, &entry, + counters_enabled, dump_ctx); +out: + devlink_dpipe_entry_clear(&entry); + return err; +} + +static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_nexthop *nh; + u32 adj_hash_index = 0; + u32 adj_index = 0; + u32 adj_size = 0; + + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { + if (!mlxsw_sp_nexthop_offload(nh) || + mlxsw_sp_nexthop_group_has_ipip(nh)) + continue; + + mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size, + &adj_hash_index); + if (enable) + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + else + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + mlxsw_sp_nexthop_update(mlxsw_sp, + adj_index + adj_hash_index, nh); + } + return 0; +} + +static u64 +mlxsw_sp_dpipe_table_adj_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + u64 size; + + rtnl_lock(); + size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); + rtnl_unlock(); + + return size; +} + +static struct devlink_dpipe_table_ops mlxsw_sp_dpipe_table_adj_ops = { + .matches_dump = mlxsw_sp_dpipe_table_adj_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_adj_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_adj_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_adj_counters_update, + .size_get = mlxsw_sp_dpipe_table_adj_size_get, +}; + +static int mlxsw_sp_dpipe_adj_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ, + &mlxsw_sp_dpipe_table_adj_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_adj_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ); +} + int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); @@ -846,8 +1251,14 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp); if (err) goto err_host6_table_init; - return 0; + err = mlxsw_sp_dpipe_adj_table_init(mlxsw_sp); + if (err) + goto err_adj_table_init; + + return 0; +err_adj_table_init: + mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); err_host6_table_init: mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); err_host4_table_init: @@ -861,6 +1272,7 @@ void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_dpipe_adj_table_fini(mlxsw_sp); mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h index 283fde4e6783..815d543cf114 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h @@ -56,5 +56,6 @@ static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) #define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif" #define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4" #define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6" +#define MLXSW_SP_DPIPE_TABLE_NAME_ADJ "mlxsw_adj" #endif /* _MLXSW_PIPELINE_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 8aace9a06a5d..2f0e57857ea4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -63,7 +63,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { - if (is_tcf_gact_shot(a)) { + if (is_tcf_gact_ok(a)) { + err = mlxsw_sp_acl_rulei_act_continue(rulei); + if (err) + return err; + } else if (is_tcf_gact_shot(a)) { err = mlxsw_sp_acl_rulei_act_drop(rulei); if (err) return err; @@ -84,7 +88,9 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(ruleset); group_id = mlxsw_sp_acl_ruleset_group_id(ruleset); - mlxsw_sp_acl_rulei_act_jump(rulei, group_id); + err = mlxsw_sp_acl_rulei_act_jump(rulei, group_id); + if (err) + return err; } else if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 702fe945227c..7502e53447bd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -36,36 +36,123 @@ #include "spectrum_ipip.h" -static bool -mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev) +struct ip_tunnel_parm +mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev) { struct ip_tunnel *tun = netdev_priv(ol_dev); - return !!(tun->parms.i_flags & TUNNEL_KEY); + return tun->parms; } -static bool -mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev) +static bool mlxsw_sp_ipip_parms_has_ikey(struct ip_tunnel_parm parms) { - struct ip_tunnel *tun = netdev_priv(ol_dev); + return !!(parms.i_flags & TUNNEL_KEY); +} - return !!(tun->parms.o_flags & TUNNEL_KEY); +static bool mlxsw_sp_ipip_parms_has_okey(struct ip_tunnel_parm parms) +{ + return !!(parms.o_flags & TUNNEL_KEY); } -static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev) +static u32 mlxsw_sp_ipip_parms_ikey(struct ip_tunnel_parm parms) { - struct ip_tunnel *tun = netdev_priv(ol_dev); + return mlxsw_sp_ipip_parms_has_ikey(parms) ? + be32_to_cpu(parms.i_key) : 0; +} + +static u32 mlxsw_sp_ipip_parms_okey(struct ip_tunnel_parm parms) +{ + return mlxsw_sp_ipip_parms_has_okey(parms) ? + be32_to_cpu(parms.o_key) : 0; +} - return mlxsw_sp_ipip_netdev_has_ikey(ol_dev) ? - be32_to_cpu(tun->parms.i_key) : 0; +static __be32 mlxsw_sp_ipip_parms_saddr4(struct ip_tunnel_parm parms) +{ + return parms.iph.saddr; +} + +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_parms_saddr(enum mlxsw_sp_l3proto proto, + struct ip_tunnel_parm parms) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return (union mlxsw_sp_l3addr) { + .addr4 = mlxsw_sp_ipip_parms_saddr4(parms), + }; + case MLXSW_SP_L3_PROTO_IPV6: + break; + } + + WARN_ON(1); + return (union mlxsw_sp_l3addr) { + .addr4 = 0, + }; +} + +static __be32 mlxsw_sp_ipip_parms_daddr4(struct ip_tunnel_parm parms) +{ + return parms.iph.daddr; +} + +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_parms_daddr(enum mlxsw_sp_l3proto proto, + struct ip_tunnel_parm parms) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return (union mlxsw_sp_l3addr) { + .addr4 = mlxsw_sp_ipip_parms_daddr4(parms), + }; + case MLXSW_SP_L3_PROTO_IPV6: + break; + } + + WARN_ON(1); + return (union mlxsw_sp_l3addr) { + .addr4 = 0, + }; +} + +static bool mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev) +{ + return mlxsw_sp_ipip_parms_has_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev)); +} + +static bool mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev) +{ + return mlxsw_sp_ipip_parms_has_okey(mlxsw_sp_ipip_netdev_parms(ol_dev)); +} + +static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev) +{ + return mlxsw_sp_ipip_parms_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev)); } static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev) { - struct ip_tunnel *tun = netdev_priv(ol_dev); + return mlxsw_sp_ipip_parms_okey(mlxsw_sp_ipip_netdev_parms(ol_dev)); +} + +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + return mlxsw_sp_ipip_parms_saddr(proto, + mlxsw_sp_ipip_netdev_parms(ol_dev)); +} + +static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) +{ + return mlxsw_sp_ipip_parms_daddr4(mlxsw_sp_ipip_netdev_parms(ol_dev)); +} - return mlxsw_sp_ipip_netdev_has_okey(ol_dev) ? - be32_to_cpu(tun->parms.o_key) : 0; +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + return mlxsw_sp_ipip_parms_daddr(proto, + mlxsw_sp_ipip_netdev_parms(ol_dev)); } static int @@ -200,6 +287,73 @@ mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp, }; } +static int +mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct netlink_ext_ack *extack) +{ + union mlxsw_sp_l3addr old_saddr, new_saddr; + union mlxsw_sp_l3addr old_daddr, new_daddr; + struct ip_tunnel_parm new_parms; + bool update_tunnel = false; + bool update_decap = false; + bool update_nhs = false; + int err = 0; + + new_parms = mlxsw_sp_ipip_netdev_parms(ipip_entry->ol_dev); + + new_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4, + new_parms); + old_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4, + ipip_entry->parms); + new_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4, + new_parms); + old_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4, + ipip_entry->parms); + + if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) { + u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); + + /* Since the local address has changed, if there is another + * tunnel with a matching saddr, both need to be demoted. + */ + if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, + MLXSW_SP_L3_PROTO_IPV4, + new_saddr, ul_tb_id, + ipip_entry)) { + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + return 0; + } + + update_tunnel = true; + } else if ((mlxsw_sp_ipip_parms_okey(ipip_entry->parms) != + mlxsw_sp_ipip_parms_okey(new_parms)) || + ipip_entry->parms.link != new_parms.link) { + update_tunnel = true; + } else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) { + update_nhs = true; + } else if (mlxsw_sp_ipip_parms_ikey(ipip_entry->parms) != + mlxsw_sp_ipip_parms_ikey(new_parms)) { + update_decap = true; + } + + if (update_tunnel) + err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + true, true, true, + extack); + else if (update_nhs) + err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + false, false, true, + extack); + else if (update_decap) + err = __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + false, false, false, + extack); + + ipip_entry->parms = new_parms; + return err; +} + static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = { .dev_type = ARPHRD_IPGRE, .ul_proto = MLXSW_SP_L3_PROTO_IPV4, @@ -207,6 +361,7 @@ static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = { .fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4, .can_offload = mlxsw_sp_ipip_can_offload_gre4, .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4, + .ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre4, }; const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 1c2db831d83b..04b08d9d76e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -38,6 +38,13 @@ #include "spectrum_router.h" #include <net/ip_fib.h> +struct ip_tunnel_parm +mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev); + +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev); + enum mlxsw_sp_ipip_type { MLXSW_SP_IPIP_TYPE_GRE4, MLXSW_SP_IPIP_TYPE_MAX, @@ -47,9 +54,9 @@ struct mlxsw_sp_ipip_entry { enum mlxsw_sp_ipip_type ipipt; struct net_device *ol_dev; /* Overlay. */ struct mlxsw_sp_rif_ipip_lb *ol_lb; - unsigned int ref_count; /* Number of next hops using the tunnel. */ struct mlxsw_sp_fib_entry *decap_fib_entry; struct list_head ipip_list_node; + struct ip_tunnel_parm parms; }; struct mlxsw_sp_ipip_ops { @@ -72,6 +79,10 @@ struct mlxsw_sp_ipip_ops { struct mlxsw_sp_ipip_entry *ipip_entry, enum mlxsw_reg_ralue_op op, u32 tunnel_index); + + int (*ol_netdev_change)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct netlink_ext_ack *extack); }; extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[]; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c index 26c26cd30c3d..310c38247b5c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -39,55 +39,276 @@ #define MLXSW_SP_KVDL_SINGLE_BASE 0 #define MLXSW_SP_KVDL_SINGLE_SIZE 16384 +#define MLXSW_SP_KVDL_SINGLE_END \ + (MLXSW_SP_KVDL_SINGLE_SIZE + MLXSW_SP_KVDL_SINGLE_BASE - 1) + #define MLXSW_SP_KVDL_CHUNKS_BASE \ (MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE) -#define MLXSW_SP_KVDL_CHUNKS_SIZE \ - (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE) +#define MLXSW_SP_KVDL_CHUNKS_SIZE 49152 +#define MLXSW_SP_KVDL_CHUNKS_END \ + (MLXSW_SP_KVDL_CHUNKS_SIZE + MLXSW_SP_KVDL_CHUNKS_BASE - 1) + +#define MLXSW_SP_KVDL_LARGE_CHUNKS_BASE \ + (MLXSW_SP_KVDL_CHUNKS_BASE + MLXSW_SP_KVDL_CHUNKS_SIZE) +#define MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE \ + (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_LARGE_CHUNKS_BASE) +#define MLXSW_SP_KVDL_LARGE_CHUNKS_END \ + (MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE + MLXSW_SP_KVDL_LARGE_CHUNKS_BASE - 1) + #define MLXSW_SP_CHUNK_MAX 32 +#define MLXSW_SP_LARGE_CHUNK_MAX 512 + +struct mlxsw_sp_kvdl_part_info { + unsigned int part_index; + unsigned int start_index; + unsigned int end_index; + unsigned int alloc_size; +}; + +struct mlxsw_sp_kvdl_part { + struct list_head list; + const struct mlxsw_sp_kvdl_part_info *info; + unsigned long usage[0]; /* Entries */ +}; + +struct mlxsw_sp_kvdl { + struct list_head parts_list; +}; + +static struct mlxsw_sp_kvdl_part * +mlxsw_sp_kvdl_alloc_size_part(struct mlxsw_sp_kvdl *kvdl, + unsigned int alloc_size) +{ + struct mlxsw_sp_kvdl_part *part, *min_part = NULL; + + list_for_each_entry(part, &kvdl->parts_list, list) { + if (alloc_size <= part->info->alloc_size && + (!min_part || + part->info->alloc_size <= min_part->info->alloc_size)) + min_part = part; + } + + return min_part ?: ERR_PTR(-ENOBUFS); +} + +static struct mlxsw_sp_kvdl_part * +mlxsw_sp_kvdl_index_part(struct mlxsw_sp_kvdl *kvdl, u32 kvdl_index) +{ + struct mlxsw_sp_kvdl_part *part; + + list_for_each_entry(part, &kvdl->parts_list, list) { + if (kvdl_index >= part->info->start_index && + kvdl_index <= part->info->end_index) + return part; + } + + return ERR_PTR(-EINVAL); +} + +static u32 +mlxsw_sp_entry_index_kvdl_index(const struct mlxsw_sp_kvdl_part_info *info, + unsigned int entry_index) +{ + return info->start_index + entry_index * info->alloc_size; +} + +static unsigned int +mlxsw_sp_kvdl_index_entry_index(const struct mlxsw_sp_kvdl_part_info *info, + u32 kvdl_index) +{ + return (kvdl_index - info->start_index) / info->alloc_size; +} + +static int mlxsw_sp_kvdl_part_alloc(struct mlxsw_sp_kvdl_part *part, + u32 *p_kvdl_index) +{ + const struct mlxsw_sp_kvdl_part_info *info = part->info; + unsigned int entry_index, nr_entries; + + nr_entries = (info->end_index - info->start_index + 1) / + info->alloc_size; + entry_index = find_first_zero_bit(part->usage, nr_entries); + if (entry_index == nr_entries) + return -ENOBUFS; + __set_bit(entry_index, part->usage); + + *p_kvdl_index = mlxsw_sp_entry_index_kvdl_index(part->info, + entry_index); + + return 0; +} + +static void mlxsw_sp_kvdl_part_free(struct mlxsw_sp_kvdl_part *part, + u32 kvdl_index) +{ + unsigned int entry_index; + + entry_index = mlxsw_sp_kvdl_index_entry_index(part->info, + kvdl_index); + __clear_bit(entry_index, part->usage); +} int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, u32 *p_entry_index) { - int entry_index; - int size; - int type_base; - int type_size; - int type_entries; - - if (entry_count == 0 || entry_count > MLXSW_SP_CHUNK_MAX) { - return -EINVAL; - } else if (entry_count == 1) { - type_base = MLXSW_SP_KVDL_SINGLE_BASE; - type_size = MLXSW_SP_KVDL_SINGLE_SIZE; - type_entries = 1; - } else { - type_base = MLXSW_SP_KVDL_CHUNKS_BASE; - type_size = MLXSW_SP_KVDL_CHUNKS_SIZE; - type_entries = MLXSW_SP_CHUNK_MAX; + struct mlxsw_sp_kvdl_part *part; + + /* Find partition with smallest allocation size satisfying the + * requested size. + */ + part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count); + if (IS_ERR(part)) + return PTR_ERR(part); + + return mlxsw_sp_kvdl_part_alloc(part, p_entry_index); +} + +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index) +{ + struct mlxsw_sp_kvdl_part *part; + + part = mlxsw_sp_kvdl_index_part(mlxsw_sp->kvdl, entry_index); + if (IS_ERR(part)) + return; + mlxsw_sp_kvdl_part_free(part, entry_index); +} + +int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp, + unsigned int entry_count, + unsigned int *p_alloc_size) +{ + struct mlxsw_sp_kvdl_part *part; + + part = mlxsw_sp_kvdl_alloc_size_part(mlxsw_sp->kvdl, entry_count); + if (IS_ERR(part)) + return PTR_ERR(part); + + *p_alloc_size = part->info->alloc_size; + + return 0; +} + +static const struct mlxsw_sp_kvdl_part_info kvdl_parts_info[] = { + { + .part_index = 0, + .start_index = MLXSW_SP_KVDL_SINGLE_BASE, + .end_index = MLXSW_SP_KVDL_SINGLE_END, + .alloc_size = 1, + }, + { + .part_index = 1, + .start_index = MLXSW_SP_KVDL_CHUNKS_BASE, + .end_index = MLXSW_SP_KVDL_CHUNKS_END, + .alloc_size = MLXSW_SP_CHUNK_MAX, + }, + { + .part_index = 2, + .start_index = MLXSW_SP_KVDL_LARGE_CHUNKS_BASE, + .end_index = MLXSW_SP_KVDL_LARGE_CHUNKS_END, + .alloc_size = MLXSW_SP_LARGE_CHUNK_MAX, + }, +}; + +static struct mlxsw_sp_kvdl_part * +mlxsw_sp_kvdl_part_find(struct mlxsw_sp *mlxsw_sp, unsigned int part_index) +{ + struct mlxsw_sp_kvdl_part *part; + + list_for_each_entry(part, &mlxsw_sp->kvdl->parts_list, list) { + if (part->info->part_index == part_index) + return part; } - entry_index = type_base; - size = type_base + type_size; - for_each_clear_bit_from(entry_index, mlxsw_sp->kvdl.usage, size) { - int i; + return NULL; +} + +static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, + unsigned int part_index) +{ + const struct mlxsw_sp_kvdl_part_info *info; + struct mlxsw_sp_kvdl_part *part; + unsigned int nr_entries; + size_t usage_size; + + info = &kvdl_parts_info[part_index]; + + nr_entries = (info->end_index - info->start_index + 1) / + info->alloc_size; + usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long); + part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL); + if (!part) + return -ENOMEM; + + part->info = info; + list_add(&part->list, &mlxsw_sp->kvdl->parts_list); + + return 0; +} + +static void mlxsw_sp_kvdl_part_fini(struct mlxsw_sp *mlxsw_sp, + unsigned int part_index) +{ + struct mlxsw_sp_kvdl_part *part; + + part = mlxsw_sp_kvdl_part_find(mlxsw_sp, part_index); + if (!part) + return; + + list_del(&part->list); + kfree(part); +} + +static int mlxsw_sp_kvdl_parts_init(struct mlxsw_sp *mlxsw_sp) +{ + int err, i; + + INIT_LIST_HEAD(&mlxsw_sp->kvdl->parts_list); - for (i = 0; i < type_entries; i++) - set_bit(entry_index + i, mlxsw_sp->kvdl.usage); - *p_entry_index = entry_index; - return 0; + for (i = 0; i < ARRAY_SIZE(kvdl_parts_info); i++) { + err = mlxsw_sp_kvdl_part_init(mlxsw_sp, i); + if (err) + goto err_kvdl_part_init; } - return -ENOBUFS; + + return 0; + +err_kvdl_part_init: + for (i--; i >= 0; i--) + mlxsw_sp_kvdl_part_fini(mlxsw_sp, i); + return err; } -void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index) +static void mlxsw_sp_kvdl_parts_fini(struct mlxsw_sp *mlxsw_sp) { - int type_entries; int i; - if (entry_index < MLXSW_SP_KVDL_CHUNKS_BASE) - type_entries = 1; - else - type_entries = MLXSW_SP_CHUNK_MAX; - for (i = 0; i < type_entries; i++) - clear_bit(entry_index + i, mlxsw_sp->kvdl.usage); + for (i = ARRAY_SIZE(kvdl_parts_info) - 1; i >= 0; i--) + mlxsw_sp_kvdl_part_fini(mlxsw_sp, i); +} + +int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_kvdl *kvdl; + int err; + + kvdl = kzalloc(sizeof(*mlxsw_sp->kvdl), GFP_KERNEL); + if (!kvdl) + return -ENOMEM; + mlxsw_sp->kvdl = kvdl; + + err = mlxsw_sp_kvdl_parts_init(mlxsw_sp); + if (err) + goto err_kvdl_parts_init; + + return 0; + +err_kvdl_parts_init: + kfree(mlxsw_sp->kvdl); + return err; +} + +void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_kvdl_parts_fini(mlxsw_sp); + kfree(mlxsw_sp->kvdl); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c new file mode 100644 index 000000000000..d20b143de3b4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -0,0 +1,1012 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/rhashtable.h> + +#include "spectrum_mr.h" +#include "spectrum_router.h" + +struct mlxsw_sp_mr { + const struct mlxsw_sp_mr_ops *mr_ops; + void *catchall_route_priv; + struct delayed_work stats_update_dw; + struct list_head table_list; +#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */ + unsigned long priv[0]; + /* priv has to be always the last item */ +}; + +struct mlxsw_sp_mr_vif { + struct net_device *dev; + const struct mlxsw_sp_rif *rif; + unsigned long vif_flags; + + /* A list of route_vif_entry structs that point to routes that the VIF + * instance is used as one of the egress VIFs + */ + struct list_head route_evif_list; + + /* A list of route_vif_entry structs that point to routes that the VIF + * instance is used as an ingress VIF + */ + struct list_head route_ivif_list; +}; + +struct mlxsw_sp_mr_route_vif_entry { + struct list_head vif_node; + struct list_head route_node; + struct mlxsw_sp_mr_vif *mr_vif; + struct mlxsw_sp_mr_route *mr_route; +}; + +struct mlxsw_sp_mr_table { + struct list_head node; + enum mlxsw_sp_l3proto proto; + struct mlxsw_sp *mlxsw_sp; + u32 vr_id; + struct mlxsw_sp_mr_vif vifs[MAXVIFS]; + struct list_head route_list; + struct rhashtable route_ht; + char catchall_route_priv[0]; + /* catchall_route_priv has to be always the last item */ +}; + +struct mlxsw_sp_mr_route { + struct list_head node; + struct rhash_head ht_node; + struct mlxsw_sp_mr_route_key key; + enum mlxsw_sp_mr_route_action route_action; + u16 min_mtu; + struct mfc_cache *mfc4; + void *route_priv; + const struct mlxsw_sp_mr_table *mr_table; + /* A list of route_vif_entry structs that point to the egress VIFs */ + struct list_head evif_list; + /* A route_vif_entry struct that point to the ingress VIF */ + struct mlxsw_sp_mr_route_vif_entry ivif; +}; + +static const struct rhashtable_params mlxsw_sp_mr_route_ht_params = { + .key_len = sizeof(struct mlxsw_sp_mr_route_key), + .key_offset = offsetof(struct mlxsw_sp_mr_route, key), + .head_offset = offsetof(struct mlxsw_sp_mr_route, ht_node), + .automatic_shrinking = true, +}; + +static bool mlxsw_sp_mr_vif_regular(const struct mlxsw_sp_mr_vif *vif) +{ + return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER)); +} + +static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif) +{ + return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif; +} + +static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif) +{ + return vif->dev; +} + +static bool +mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route) +{ + vifi_t ivif; + + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + ivif = mr_route->mfc4->mfc_parent; + return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } + return false; +} + +static int +mlxsw_sp_mr_route_valid_evifs_num(const struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + int valid_evifs; + + valid_evifs = 0; + list_for_each_entry(rve, &mr_route->evif_list, route_node) + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) + valid_evifs++; + return valid_evifs; +} + +static bool mlxsw_sp_mr_route_starg(const struct mlxsw_sp_mr_route *mr_route) +{ + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mr_route->key.source_mask.addr4 == htonl(INADDR_ANY); + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } + return false; +} + +static enum mlxsw_sp_mr_route_action +mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + /* If the ingress port is not regular and resolved, trap the route */ + if (!mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* The kernel does not match a (*,G) route that the ingress interface is + * not one of the egress interfaces, so trap these kind of routes. + */ + if (mlxsw_sp_mr_route_starg(mr_route) && + !mlxsw_sp_mr_route_ivif_in_evifs(mr_route)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* If the route has no valid eVIFs, trap it. */ + if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* If one of the eVIFs has no RIF, trap-and-forward the route as there + * is some more routing to do in software too. + */ + list_for_each_entry(rve, &mr_route->evif_list, route_node) + if (mlxsw_sp_mr_vif_exists(rve->mr_vif) && !rve->mr_vif->rif) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD; + + return MLXSW_SP_MR_ROUTE_ACTION_FORWARD; +} + +static enum mlxsw_sp_mr_route_prio +mlxsw_sp_mr_route_prio(const struct mlxsw_sp_mr_route *mr_route) +{ + return mlxsw_sp_mr_route_starg(mr_route) ? + MLXSW_SP_MR_ROUTE_PRIO_STARG : MLXSW_SP_MR_ROUTE_PRIO_SG; +} + +static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_key *key, + const struct mfc_cache *mfc) +{ + bool starg = (mfc->mfc_origin == htonl(INADDR_ANY)); + + memset(key, 0, sizeof(*key)); + key->vrid = mr_table->vr_id; + key->proto = mr_table->proto; + key->group.addr4 = mfc->mfc_mcastgrp; + key->group_mask.addr4 = htonl(0xffffffff); + key->source.addr4 = mfc->mfc_origin; + key->source_mask.addr4 = htonl(starg ? 0 : 0xffffffff); +} + +static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_vif *mr_vif) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + rve = kzalloc(sizeof(*rve), GFP_KERNEL); + if (!rve) + return -ENOMEM; + rve->mr_route = mr_route; + rve->mr_vif = mr_vif; + list_add_tail(&rve->route_node, &mr_route->evif_list); + list_add_tail(&rve->vif_node, &mr_vif->route_evif_list); + return 0; +} + +static void +mlxsw_sp_mr_route_evif_unlink(struct mlxsw_sp_mr_route_vif_entry *rve) +{ + list_del(&rve->route_node); + list_del(&rve->vif_node); + kfree(rve); +} + +static void mlxsw_sp_mr_route_ivif_link(struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_vif *mr_vif) +{ + mr_route->ivif.mr_route = mr_route; + mr_route->ivif.mr_vif = mr_vif; + list_add_tail(&mr_route->ivif.vif_node, &mr_vif->route_ivif_list); +} + +static void mlxsw_sp_mr_route_ivif_unlink(struct mlxsw_sp_mr_route *mr_route) +{ + list_del(&mr_route->ivif.vif_node); +} + +static int +mlxsw_sp_mr_route_info_create(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_route_info *route_info) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + u16 *erif_indices; + u16 irif_index; + u16 erif = 0; + + erif_indices = kmalloc_array(MAXVIFS, sizeof(*erif_indices), + GFP_KERNEL); + if (!erif_indices) + return -ENOMEM; + + list_for_each_entry(rve, &mr_route->evif_list, route_node) { + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) { + u16 rifi = mlxsw_sp_rif_index(rve->mr_vif->rif); + + erif_indices[erif++] = rifi; + } + } + + if (mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif)) + irif_index = mlxsw_sp_rif_index(mr_route->ivif.mr_vif->rif); + else + irif_index = 0; + + route_info->irif_index = irif_index; + route_info->erif_indices = erif_indices; + route_info->min_mtu = mr_route->min_mtu; + route_info->route_action = mr_route->route_action; + route_info->erif_num = erif; + return 0; +} + +static void +mlxsw_sp_mr_route_info_destroy(struct mlxsw_sp_mr_route_info *route_info) +{ + kfree(route_info->erif_indices); +} + +static int mlxsw_sp_mr_route_write(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route, + bool replace) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr_route_info route_info; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + int err; + + err = mlxsw_sp_mr_route_info_create(mr_table, mr_route, &route_info); + if (err) + return err; + + if (!replace) { + struct mlxsw_sp_mr_route_params route_params; + + mr_route->route_priv = kzalloc(mr->mr_ops->route_priv_size, + GFP_KERNEL); + if (!mr_route->route_priv) { + err = -ENOMEM; + goto out; + } + + route_params.key = mr_route->key; + route_params.value = route_info; + route_params.prio = mlxsw_sp_mr_route_prio(mr_route); + err = mr->mr_ops->route_create(mlxsw_sp, mr->priv, + mr_route->route_priv, + &route_params); + if (err) + kfree(mr_route->route_priv); + } else { + err = mr->mr_ops->route_update(mlxsw_sp, mr_route->route_priv, + &route_info); + } +out: + mlxsw_sp_mr_route_info_destroy(&route_info); + return err; +} + +static void mlxsw_sp_mr_route_erase(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, mr_route->route_priv); + kfree(mr_route->route_priv); +} + +static struct mlxsw_sp_mr_route * +mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc) +{ + struct mlxsw_sp_mr_route_vif_entry *rve, *tmp; + struct mlxsw_sp_mr_route *mr_route; + int err = 0; + int i; + + /* Allocate and init a new route and fill it with parameters */ + mr_route = kzalloc(sizeof(*mr_route), GFP_KERNEL); + if (!mr_route) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&mr_route->evif_list); + mlxsw_sp_mr_route4_key(mr_table, &mr_route->key, mfc); + + /* Find min_mtu and link iVIF and eVIFs */ + mr_route->min_mtu = ETH_MAX_MTU; + ipmr_cache_hold(mfc); + mr_route->mfc4 = mfc; + mr_route->mr_table = mr_table; + for (i = 0; i < MAXVIFS; i++) { + if (mfc->mfc_un.res.ttls[i] != 255) { + err = mlxsw_sp_mr_route_evif_link(mr_route, + &mr_table->vifs[i]); + if (err) + goto err; + if (mr_table->vifs[i].dev && + mr_table->vifs[i].dev->mtu < mr_route->min_mtu) + mr_route->min_mtu = mr_table->vifs[i].dev->mtu; + } + } + mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]); + + mr_route->route_action = mlxsw_sp_mr_route_action(mr_route); + return mr_route; +err: + ipmr_cache_put(mfc); + list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node) + mlxsw_sp_mr_route_evif_unlink(rve); + kfree(mr_route); + return ERR_PTR(err); +} + +static void mlxsw_sp_mr_route4_destroy(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve, *tmp; + + mlxsw_sp_mr_route_ivif_unlink(mr_route); + ipmr_cache_put(mr_route->mfc4); + list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node) + mlxsw_sp_mr_route_evif_unlink(rve); + kfree(mr_route); +} + +static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + switch (mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_sp_mr_route4_destroy(mr_table, mr_route); + break; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } +} + +static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route, + bool offload) +{ + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (offload) + mr_route->mfc4->mfc_flags |= MFC_OFFLOAD; + else + mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD; + break; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } +} + +static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route) +{ + bool offload; + + offload = mr_route->route_action != MLXSW_SP_MR_ROUTE_ACTION_TRAP; + mlxsw_sp_mr_mfc_offload_set(mr_route, offload); +} + +static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + mlxsw_sp_mr_mfc_offload_set(mr_route, false); + mlxsw_sp_mr_route_erase(mr_table, mr_route); + rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); + list_del(&mr_route->node); + mlxsw_sp_mr_route_destroy(mr_table, mr_route); +} + +int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc, bool replace) +{ + struct mlxsw_sp_mr_route *mr_orig_route = NULL; + struct mlxsw_sp_mr_route *mr_route; + int err; + + /* If the route is a (*,*) route, abort, as these kind of routes are + * used for proxy routes. + */ + if (mfc->mfc_origin == htonl(INADDR_ANY) && + mfc->mfc_mcastgrp == htonl(INADDR_ANY)) { + dev_warn(mr_table->mlxsw_sp->bus_info->dev, + "Offloading proxy routes is not supported.\n"); + return -EINVAL; + } + + /* Create a new route */ + mr_route = mlxsw_sp_mr_route4_create(mr_table, mfc); + if (IS_ERR(mr_route)) + return PTR_ERR(mr_route); + + /* Find any route with a matching key */ + mr_orig_route = rhashtable_lookup_fast(&mr_table->route_ht, + &mr_route->key, + mlxsw_sp_mr_route_ht_params); + if (replace) { + /* On replace case, make the route point to the new route_priv. + */ + if (WARN_ON(!mr_orig_route)) { + err = -ENOENT; + goto err_no_orig_route; + } + mr_route->route_priv = mr_orig_route->route_priv; + } else if (mr_orig_route) { + /* On non replace case, if another route with the same key was + * found, abort, as duplicate routes are used for proxy routes. + */ + dev_warn(mr_table->mlxsw_sp->bus_info->dev, + "Offloading proxy routes is not supported.\n"); + err = -EINVAL; + goto err_duplicate_route; + } + + /* Put it in the table data-structures */ + list_add_tail(&mr_route->node, &mr_table->route_list); + err = rhashtable_insert_fast(&mr_table->route_ht, + &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); + if (err) + goto err_rhashtable_insert; + + /* Write the route to the hardware */ + err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace); + if (err) + goto err_mr_route_write; + + /* Destroy the original route */ + if (replace) { + rhashtable_remove_fast(&mr_table->route_ht, + &mr_orig_route->ht_node, + mlxsw_sp_mr_route_ht_params); + list_del(&mr_orig_route->node); + mlxsw_sp_mr_route4_destroy(mr_table, mr_orig_route); + } + + mlxsw_sp_mr_mfc_offload_update(mr_route); + return 0; + +err_mr_route_write: + rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); +err_rhashtable_insert: + list_del(&mr_route->node); +err_no_orig_route: +err_duplicate_route: + mlxsw_sp_mr_route4_destroy(mr_table, mr_route); + return err; +} + +void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc) +{ + struct mlxsw_sp_mr_route *mr_route; + struct mlxsw_sp_mr_route_key key; + + mlxsw_sp_mr_route4_key(mr_table, &key, mfc); + mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key, + mlxsw_sp_mr_route_ht_params); + if (mr_route) + __mlxsw_sp_mr_route_del(mr_table, mr_route); +} + +/* Should be called after the VIF struct is updated */ +static int +mlxsw_sp_mr_route_ivif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 irif_index; + int err; + + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return 0; + + /* rve->mr_vif->rif is guaranteed to be valid at this stage */ + irif_index = mlxsw_sp_rif_index(rve->mr_vif->rif); + err = mr->mr_ops->route_irif_update(mlxsw_sp, rve->mr_route->route_priv, + irif_index); + if (err) + return err; + + err = mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + if (err) + /* No need to rollback here because the iRIF change only takes + * place after the action has been updated. + */ + return err; + + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); + return 0; +} + +static void +mlxsw_sp_mr_route_ivif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + mr->mr_ops->route_action_update(mlxsw_sp, rve->mr_route->route_priv, + MLXSW_SP_MR_ROUTE_ACTION_TRAP); + rve->mr_route->route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); +} + +/* Should be called after the RIF struct is updated */ +static int +mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 erif_index = 0; + int err; + + /* Update the route action, as the new eVIF can be a tunnel or a pimreg + * device which will require updating the action. + */ + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action != rve->mr_route->route_action) { + err = mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + if (err) + return err; + } + + /* Add the eRIF */ + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) { + erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif); + err = mr->mr_ops->route_erif_add(mlxsw_sp, + rve->mr_route->route_priv, + erif_index); + if (err) + goto err_route_erif_add; + } + + /* Update the minimum MTU */ + if (rve->mr_vif->dev->mtu < rve->mr_route->min_mtu) { + rve->mr_route->min_mtu = rve->mr_vif->dev->mtu; + err = mr->mr_ops->route_min_mtu_update(mlxsw_sp, + rve->mr_route->route_priv, + rve->mr_route->min_mtu); + if (err) + goto err_route_min_mtu_update; + } + + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); + return 0; + +err_route_min_mtu_update: + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) + mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, + erif_index); +err_route_erif_add: + if (route_action != rve->mr_route->route_action) + mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + rve->mr_route->route_action); + return err; +} + +/* Should be called before the RIF struct is updated */ +static void +mlxsw_sp_mr_route_evif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 rifi; + + /* If the unresolved RIF was not valid, no need to delete it */ + if (!mlxsw_sp_mr_vif_valid(rve->mr_vif)) + return; + + /* Update the route action: if there is only one valid eVIF in the + * route, set the action to trap as the VIF deletion will lead to zero + * valid eVIFs. On any other case, use the mlxsw_sp_mr_route_action to + * determine the route action. + */ + if (mlxsw_sp_mr_route_valid_evifs_num(rve->mr_route) == 1) + route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP; + else + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action != rve->mr_route->route_action) + mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + + /* Delete the erif from the route */ + rifi = mlxsw_sp_rif_index(rve->mr_vif->rif); + mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, rifi); + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); +} + +static int mlxsw_sp_mr_vif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, + struct mlxsw_sp_mr_vif *mr_vif, + unsigned long vif_flags, + const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_mr_route_vif_entry *irve, *erve; + int err; + + /* Update the VIF */ + mr_vif->dev = dev; + mr_vif->rif = rif; + mr_vif->vif_flags = vif_flags; + + /* Update all routes where this VIF is used as an unresolved iRIF */ + list_for_each_entry(irve, &mr_vif->route_ivif_list, vif_node) { + err = mlxsw_sp_mr_route_ivif_resolve(mr_table, irve); + if (err) + goto err_irif_unresolve; + } + + /* Update all routes where this VIF is used as an unresolved eRIF */ + list_for_each_entry(erve, &mr_vif->route_evif_list, vif_node) { + err = mlxsw_sp_mr_route_evif_resolve(mr_table, erve); + if (err) + goto err_erif_unresolve; + } + return 0; + +err_erif_unresolve: + list_for_each_entry_from_reverse(erve, &mr_vif->route_evif_list, + vif_node) + mlxsw_sp_mr_route_evif_unresolve(mr_table, erve); +err_irif_unresolve: + list_for_each_entry_from_reverse(irve, &mr_vif->route_ivif_list, + vif_node) + mlxsw_sp_mr_route_ivif_unresolve(mr_table, irve); + mr_vif->rif = NULL; + return err; +} + +static void mlxsw_sp_mr_vif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, + struct mlxsw_sp_mr_vif *mr_vif) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + /* Update all routes where this VIF is used as an unresolved eRIF */ + list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) + mlxsw_sp_mr_route_evif_unresolve(mr_table, rve); + + /* Update all routes where this VIF is used as an unresolved iRIF */ + list_for_each_entry(rve, &mr_vif->route_ivif_list, vif_node) + mlxsw_sp_mr_route_ivif_unresolve(mr_table, rve); + + /* Update the VIF */ + mr_vif->dev = dev; + mr_vif->rif = NULL; +} + +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, vifi_t vif_index, + unsigned long vif_flags, const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index]; + + if (WARN_ON(vif_index >= MAXVIFS)) + return -EINVAL; + if (mr_vif->dev) + return -EEXIST; + return mlxsw_sp_mr_vif_resolve(mr_table, dev, mr_vif, vif_flags, rif); +} + +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index) +{ + struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index]; + + if (WARN_ON(vif_index >= MAXVIFS)) + return; + if (WARN_ON(!mr_vif->dev)) + return; + mlxsw_sp_mr_vif_unresolve(mr_table, NULL, mr_vif); +} + +static struct mlxsw_sp_mr_vif * +mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table, + const struct net_device *dev) +{ + vifi_t vif_index; + + for (vif_index = 0; vif_index < MAXVIFS; vif_index++) + if (mr_table->vifs[vif_index].dev == dev) + return &mr_table->vifs[vif_index]; + return NULL; +} + +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return 0; + + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return 0; + return mlxsw_sp_mr_vif_resolve(mr_table, mr_vif->dev, mr_vif, + mr_vif->vif_flags, rif); +} + +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return; + + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return; + mlxsw_sp_mr_vif_unresolve(mr_table, mr_vif->dev, mr_vif); +} + +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif, int mtu) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr_route_vif_entry *rve; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return; + + /* Search for a VIF that use that RIF */ + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return; + + /* Update all the routes that uses that VIF as eVIF */ + list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) { + if (mtu < rve->mr_route->min_mtu) { + rve->mr_route->min_mtu = mtu; + mr->mr_ops->route_min_mtu_update(mlxsw_sp, + rve->mr_route->route_priv, + mtu); + } + } +} + +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, + u32 vr_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_mr_route_params catchall_route_params = { + .prio = MLXSW_SP_MR_ROUTE_PRIO_CATCHALL, + .key = { + .vrid = vr_id, + }, + .value = { + .route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP, + } + }; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + struct mlxsw_sp_mr_table *mr_table; + int err; + int i; + + mr_table = kzalloc(sizeof(*mr_table) + mr->mr_ops->route_priv_size, + GFP_KERNEL); + if (!mr_table) + return ERR_PTR(-ENOMEM); + + mr_table->vr_id = vr_id; + mr_table->mlxsw_sp = mlxsw_sp; + mr_table->proto = proto; + INIT_LIST_HEAD(&mr_table->route_list); + + err = rhashtable_init(&mr_table->route_ht, + &mlxsw_sp_mr_route_ht_params); + if (err) + goto err_route_rhashtable_init; + + for (i = 0; i < MAXVIFS; i++) { + INIT_LIST_HEAD(&mr_table->vifs[i].route_evif_list); + INIT_LIST_HEAD(&mr_table->vifs[i].route_ivif_list); + } + + err = mr->mr_ops->route_create(mlxsw_sp, mr->priv, + mr_table->catchall_route_priv, + &catchall_route_params); + if (err) + goto err_ops_route_create; + list_add_tail(&mr_table->node, &mr->table_list); + return mr_table; + +err_ops_route_create: + rhashtable_destroy(&mr_table->route_ht); +err_route_rhashtable_init: + kfree(mr_table); + return ERR_PTR(err); +} + +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + WARN_ON(!mlxsw_sp_mr_table_empty(mr_table)); + list_del(&mr_table->node); + mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, + &mr_table->catchall_route_priv); + rhashtable_destroy(&mr_table->route_ht); + kfree(mr_table); +} + +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table) +{ + struct mlxsw_sp_mr_route *mr_route, *tmp; + int i; + + list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node) + __mlxsw_sp_mr_route_del(mr_table, mr_route); + + for (i = 0; i < MAXVIFS; i++) { + mr_table->vifs[i].dev = NULL; + mr_table->vifs[i].rif = NULL; + } +} + +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table) +{ + int i; + + for (i = 0; i < MAXVIFS; i++) + if (mr_table->vifs[i].dev) + return false; + return list_empty(&mr_table->route_list); +} + +static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u64 packets, bytes; + + if (mr_route->route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return; + + mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets, + &bytes); + + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (mr_route->mfc4->mfc_un.res.pkt != packets) + mr_route->mfc4->mfc_un.res.lastuse = jiffies; + mr_route->mfc4->mfc_un.res.pkt = packets; + mr_route->mfc4->mfc_un.res.bytes = bytes; + break; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } +} + +static void mlxsw_sp_mr_stats_update(struct work_struct *work) +{ + struct mlxsw_sp_mr *mr = container_of(work, struct mlxsw_sp_mr, + stats_update_dw.work); + struct mlxsw_sp_mr_table *mr_table; + struct mlxsw_sp_mr_route *mr_route; + unsigned long interval; + + rtnl_lock(); + list_for_each_entry(mr_table, &mr->table_list, node) + list_for_each_entry(mr_route, &mr_table->route_list, node) + mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp, + mr_route); + rtnl_unlock(); + + interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL); + mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); +} + +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_mr_ops *mr_ops) +{ + struct mlxsw_sp_mr *mr; + unsigned long interval; + int err; + + mr = kzalloc(sizeof(*mr) + mr_ops->priv_size, GFP_KERNEL); + if (!mr) + return -ENOMEM; + mr->mr_ops = mr_ops; + mlxsw_sp->mr = mr; + INIT_LIST_HEAD(&mr->table_list); + + err = mr_ops->init(mlxsw_sp, mr->priv); + if (err) + goto err; + + /* Create the delayed work for counter updates */ + INIT_DELAYED_WORK(&mr->stats_update_dw, mlxsw_sp_mr_stats_update); + interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL); + mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); + return 0; +err: + kfree(mr); + return err; +} + +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + cancel_delayed_work_sync(&mr->stats_update_dw); + mr->mr_ops->fini(mr->priv); + kfree(mr); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h new file mode 100644 index 000000000000..5d26a122af49 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h @@ -0,0 +1,134 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_MCROUTER_H +#define _MLXSW_SPECTRUM_MCROUTER_H + +#include <linux/mroute.h> +#include "spectrum_router.h" +#include "spectrum.h" + +enum mlxsw_sp_mr_route_action { + MLXSW_SP_MR_ROUTE_ACTION_FORWARD, + MLXSW_SP_MR_ROUTE_ACTION_TRAP, + MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD, +}; + +enum mlxsw_sp_mr_route_prio { + MLXSW_SP_MR_ROUTE_PRIO_SG, + MLXSW_SP_MR_ROUTE_PRIO_STARG, + MLXSW_SP_MR_ROUTE_PRIO_CATCHALL, + __MLXSW_SP_MR_ROUTE_PRIO_MAX +}; + +#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1) + +struct mlxsw_sp_mr_route_key { + int vrid; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr group; + union mlxsw_sp_l3addr group_mask; + union mlxsw_sp_l3addr source; + union mlxsw_sp_l3addr source_mask; +}; + +struct mlxsw_sp_mr_route_info { + enum mlxsw_sp_mr_route_action route_action; + u16 irif_index; + u16 *erif_indices; + size_t erif_num; + u16 min_mtu; +}; + +struct mlxsw_sp_mr_route_params { + struct mlxsw_sp_mr_route_key key; + struct mlxsw_sp_mr_route_info value; + enum mlxsw_sp_mr_route_prio prio; +}; + +struct mlxsw_sp_mr_ops { + int priv_size; + int route_priv_size; + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv); + int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_params *route_params); + int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + struct mlxsw_sp_mr_route_info *route_info); + int (*route_stats)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u64 *packets, u64 *bytes); + int (*route_action_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + enum mlxsw_sp_mr_route_action route_action); + int (*route_min_mtu_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 min_mtu); + int (*route_irif_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 irif_index); + int (*route_erif_add)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 erif_index); + int (*route_erif_del)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 erif_index); + void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv); + void (*fini)(void *priv); +}; + +struct mlxsw_sp_mr; +struct mlxsw_sp_mr_table; + +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_mr_ops *mr_ops); +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc, bool replace); +void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc); +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, vifi_t vif_index, + unsigned long vif_flags, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index); +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif, int mtu); +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, + u32 tb_id, + enum mlxsw_sp_l3proto proto); +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table); +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table); +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c new file mode 100644 index 000000000000..34a0b632e5dd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -0,0 +1,839 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/parman.h> + +#include "spectrum_mr_tcam.h" +#include "reg.h" +#include "spectrum.h" +#include "core_acl_flex_actions.h" +#include "spectrum_mr.h" + +struct mlxsw_sp_mr_tcam_region { + struct mlxsw_sp *mlxsw_sp; + enum mlxsw_reg_rtar_key_type rtar_key_type; + struct parman *parman; + struct parman_prio *parman_prios; +}; + +struct mlxsw_sp_mr_tcam { + struct mlxsw_sp_mr_tcam_region ipv4_tcam_region; +}; + +/* This struct maps to one RIGR2 register entry */ +struct mlxsw_sp_mr_erif_sublist { + struct list_head list; + u32 rigr2_kvdl_index; + int num_erifs; + u16 erif_indices[MLXSW_REG_RIGR2_MAX_ERIFS]; + bool synced; +}; + +struct mlxsw_sp_mr_tcam_erif_list { + struct list_head erif_sublists; + u32 kvdl_index; +}; + +static bool +mlxsw_sp_mr_erif_sublist_full(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_erif_sublist *erif_sublist) +{ + int erif_list_entries = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MC_ERIF_LIST_ENTRIES); + + return erif_sublist->num_erifs == erif_list_entries; +} + +static void +mlxsw_sp_mr_erif_list_init(struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + INIT_LIST_HEAD(&erif_list->erif_sublists); +} + +#define MLXSW_SP_KVDL_RIGR2_SIZE 1 + +static struct mlxsw_sp_mr_erif_sublist * +mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *erif_sublist; + int err; + + erif_sublist = kzalloc(sizeof(*erif_sublist), GFP_KERNEL); + if (!erif_sublist) + return ERR_PTR(-ENOMEM); + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_RIGR2_SIZE, + &erif_sublist->rigr2_kvdl_index); + if (err) { + kfree(erif_sublist); + return ERR_PTR(err); + } + + list_add_tail(&erif_sublist->list, &erif_list->erif_sublists); + return erif_sublist; +} + +static void +mlxsw_sp_mr_erif_sublist_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_erif_sublist *erif_sublist) +{ + list_del(&erif_sublist->list); + mlxsw_sp_kvdl_free(mlxsw_sp, erif_sublist->rigr2_kvdl_index); + kfree(erif_sublist); +} + +static int +mlxsw_sp_mr_erif_list_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list, + u16 erif_index) +{ + struct mlxsw_sp_mr_erif_sublist *sublist; + + /* If either there is no erif_entry or the last one is full, allocate a + * new one. + */ + if (list_empty(&erif_list->erif_sublists)) { + sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, erif_list); + if (IS_ERR(sublist)) + return PTR_ERR(sublist); + erif_list->kvdl_index = sublist->rigr2_kvdl_index; + } else { + sublist = list_last_entry(&erif_list->erif_sublists, + struct mlxsw_sp_mr_erif_sublist, + list); + sublist->synced = false; + if (mlxsw_sp_mr_erif_sublist_full(mlxsw_sp, sublist)) { + sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, + erif_list); + if (IS_ERR(sublist)) + return PTR_ERR(sublist); + } + } + + /* Add the eRIF to the last entry's last index */ + sublist->erif_indices[sublist->num_erifs++] = erif_index; + return 0; +} + +static void +mlxsw_sp_mr_erif_list_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *erif_sublist, *tmp; + + list_for_each_entry_safe(erif_sublist, tmp, &erif_list->erif_sublists, + list) + mlxsw_sp_mr_erif_sublist_destroy(mlxsw_sp, erif_sublist); +} + +static int +mlxsw_sp_mr_erif_list_commit(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *curr_sublist; + char rigr2_pl[MLXSW_REG_RIGR2_LEN]; + int err; + int i; + + list_for_each_entry(curr_sublist, &erif_list->erif_sublists, list) { + if (curr_sublist->synced) + continue; + + /* If the sublist is not the last one, pack the next index */ + if (list_is_last(&curr_sublist->list, + &erif_list->erif_sublists)) { + mlxsw_reg_rigr2_pack(rigr2_pl, + curr_sublist->rigr2_kvdl_index, + false, 0); + } else { + struct mlxsw_sp_mr_erif_sublist *next_sublist; + + next_sublist = list_next_entry(curr_sublist, list); + mlxsw_reg_rigr2_pack(rigr2_pl, + curr_sublist->rigr2_kvdl_index, + true, + next_sublist->rigr2_kvdl_index); + } + + /* Pack all the erifs */ + for (i = 0; i < curr_sublist->num_erifs; i++) { + u16 erif_index = curr_sublist->erif_indices[i]; + + mlxsw_reg_rigr2_erif_entry_pack(rigr2_pl, i, true, + erif_index); + } + + /* Write the entry */ + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rigr2), + rigr2_pl); + if (err) + /* No need of a rollback here because this + * hardware entry should not be pointed yet. + */ + return err; + curr_sublist->synced = true; + } + return 0; +} + +static void mlxsw_sp_mr_erif_list_move(struct mlxsw_sp_mr_tcam_erif_list *to, + struct mlxsw_sp_mr_tcam_erif_list *from) +{ + list_splice(&from->erif_sublists, &to->erif_sublists); + to->kvdl_index = from->kvdl_index; +} + +struct mlxsw_sp_mr_tcam_route { + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + u32 counter_index; + struct parman_item parman_item; + struct parman_prio *parman_prio; + enum mlxsw_sp_mr_route_action action; + struct mlxsw_sp_mr_route_key key; + u16 irif_index; + u16 min_mtu; +}; + +static struct mlxsw_afa_block * +mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_mr_route_action route_action, + u16 irif_index, u32 counter_index, + u16 min_mtu, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_afa_block *afa_block; + int err; + + afa_block = mlxsw_afa_block_create(mlxsw_sp->afa); + if (!afa_block) + return ERR_PTR(-ENOMEM); + + err = mlxsw_afa_block_append_counter(afa_block, counter_index); + if (err) + goto err; + + switch (route_action) { + case MLXSW_SP_MR_ROUTE_ACTION_TRAP: + err = mlxsw_afa_block_append_trap(afa_block, + MLXSW_TRAP_ID_ACL1); + if (err) + goto err; + break; + case MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD: + case MLXSW_SP_MR_ROUTE_ACTION_FORWARD: + /* If we are about to append a multicast router action, commit + * the erif_list. + */ + err = mlxsw_sp_mr_erif_list_commit(mlxsw_sp, erif_list); + if (err) + goto err; + + err = mlxsw_afa_block_append_mcrouter(afa_block, irif_index, + min_mtu, false, + erif_list->kvdl_index); + if (err) + goto err; + + if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD) { + err = mlxsw_afa_block_append_trap_and_forward(afa_block, + MLXSW_TRAP_ID_ACL2); + if (err) + goto err; + } + break; + default: + err = -EINVAL; + goto err; + } + + err = mlxsw_afa_block_commit(afa_block); + if (err) + goto err; + return afa_block; +err: + mlxsw_afa_block_destroy(afa_block); + return ERR_PTR(err); +} + +static void +mlxsw_sp_mr_tcam_afa_block_destroy(struct mlxsw_afa_block *afa_block) +{ + mlxsw_afa_block_destroy(afa_block); +} + +static int mlxsw_sp_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp, + struct parman_item *parman_item, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block) +{ + char rmft2_pl[MLXSW_REG_RMFT2_LEN]; + + switch (key->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index, + key->vrid, + MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0, + ntohl(key->group.addr4), + ntohl(key->group_mask.addr4), + ntohl(key->source.addr4), + ntohl(key->source_mask.addr4), + mlxsw_afa_block_first_set(afa_block)); + break; + case MLXSW_SP_L3_PROTO_IPV6: + default: + WARN_ON_ONCE(1); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); +} + +static int mlxsw_sp_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, int vrid, + struct parman_item *parman_item) +{ + char rmft2_pl[MLXSW_REG_RMFT2_LEN]; + + mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index, vrid, + 0, 0, 0, 0, 0, 0, NULL); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); +} + +static int +mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list, + struct mlxsw_sp_mr_route_info *route_info) +{ + int err; + int i; + + for (i = 0; i < route_info->erif_num; i++) { + u16 erif_index = route_info->erif_indices[i]; + + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, erif_list, + erif_index); + if (err) + return err; + } + return 0; +} + +static int +mlxsw_sp_mr_tcam_route_parman_item_add(struct mlxsw_sp_mr_tcam *mr_tcam, + struct mlxsw_sp_mr_tcam_route *route, + enum mlxsw_sp_mr_route_prio prio) +{ + struct parman_prio *parman_prio = NULL; + int err; + + switch (route->key.proto) { + case MLXSW_SP_L3_PROTO_IPV4: + parman_prio = &mr_tcam->ipv4_tcam_region.parman_prios[prio]; + err = parman_item_add(mr_tcam->ipv4_tcam_region.parman, + parman_prio, &route->parman_item); + if (err) + return err; + break; + case MLXSW_SP_L3_PROTO_IPV6: + default: + WARN_ON_ONCE(1); + } + route->parman_prio = parman_prio; + return 0; +} + +static void +mlxsw_sp_mr_tcam_route_parman_item_remove(struct mlxsw_sp_mr_tcam *mr_tcam, + struct mlxsw_sp_mr_tcam_route *route) +{ + switch (route->key.proto) { + case MLXSW_SP_L3_PROTO_IPV4: + parman_item_remove(mr_tcam->ipv4_tcam_region.parman, + route->parman_prio, &route->parman_item); + break; + case MLXSW_SP_L3_PROTO_IPV6: + default: + WARN_ON_ONCE(1); + } +} + +static int +mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_params *route_params) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + int err; + + route->key = route_params->key; + route->irif_index = route_params->value.irif_index; + route->min_mtu = route_params->value.min_mtu; + route->action = route_params->value.route_action; + + /* Create the egress RIFs list */ + mlxsw_sp_mr_erif_list_init(&route->erif_list); + err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &route->erif_list, + &route_params->value); + if (err) + goto err_erif_populate; + + /* Create the flow counter */ + err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &route->counter_index); + if (err) + goto err_counter_alloc; + + /* Create the flexible action block */ + route->afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route->action, + route->irif_index, + route->counter_index, + route->min_mtu, + &route->erif_list); + if (IS_ERR(route->afa_block)) { + err = PTR_ERR(route->afa_block); + goto err_afa_block_create; + } + + /* Allocate place in the TCAM */ + err = mlxsw_sp_mr_tcam_route_parman_item_add(mr_tcam, route, + route_params->prio); + if (err) + goto err_parman_item_add; + + /* Write the route to the TCAM */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, route->afa_block); + if (err) + goto err_route_replace; + return 0; + +err_route_replace: + mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route); +err_parman_item_add: + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); +err_afa_block_create: + mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index); +err_erif_populate: +err_counter_alloc: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + return err; +} + +static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, + void *priv, void *route_priv) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + mlxsw_sp_mr_tcam_route_remove(mlxsw_sp, route->key.vrid, + &route->parman_item); + mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route); + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); +} + +static int mlxsw_sp_mr_tcam_route_stats(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u64 *packets, + u64 *bytes) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, route->counter_index, + packets, bytes); +} + +static int +mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, + enum mlxsw_sp_mr_route_action route_action) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new flexible action block */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route_action, + route->irif_index, + route->counter_index, + route->min_mtu, + &route->erif_list); + if (IS_ERR(afa_block)) + return PTR_ERR(afa_block); + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err; + + /* Delete the old one */ + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + route->afa_block = afa_block; + route->action = route_action; + return 0; +err: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); + return err; +} + +static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 min_mtu) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new flexible action block */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route->action, + route->irif_index, + route->counter_index, + min_mtu, + &route->erif_list); + if (IS_ERR(afa_block)) + return PTR_ERR(afa_block); + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err; + + /* Delete the old one */ + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + route->afa_block = afa_block; + route->min_mtu = min_mtu; + return 0; +err: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); + return err; +} + +static int mlxsw_sp_mr_tcam_route_irif_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 irif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + + if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return -EINVAL; + route->irif_index = irif_index; + return 0; +} + +static int mlxsw_sp_mr_tcam_route_erif_add(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 erif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + int err; + + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &route->erif_list, + erif_index); + if (err) + return err; + + /* Commit the action only if the route action is not TRAP */ + if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return mlxsw_sp_mr_erif_list_commit(mlxsw_sp, + &route->erif_list); + return 0; +} + +static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 erif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_erif_sublist *erif_sublist; + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + int err; + int i; + + /* Create a copy of the original erif_list without the deleted entry */ + mlxsw_sp_mr_erif_list_init(&erif_list); + list_for_each_entry(erif_sublist, &route->erif_list.erif_sublists, list) { + for (i = 0; i < erif_sublist->num_erifs; i++) { + u16 curr_erif = erif_sublist->erif_indices[i]; + + if (curr_erif == erif_index) + continue; + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &erif_list, + curr_erif); + if (err) + goto err_erif_list_add; + } + } + + /* Create the flexible action block pointing to the new erif_list */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route->action, + route->irif_index, + route->counter_index, + route->min_mtu, + &erif_list); + if (IS_ERR(afa_block)) { + err = PTR_ERR(afa_block); + goto err_afa_block_create; + } + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err_route_write; + + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + route->afa_block = afa_block; + mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list); + return 0; + +err_route_write: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); +err_afa_block_create: +err_erif_list_add: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list); + return err; +} + +static int +mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv, + struct mlxsw_sp_mr_route_info *route_info) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new erif_list */ + mlxsw_sp_mr_erif_list_init(&erif_list); + err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &erif_list, route_info); + if (err) + goto err_erif_populate; + + /* Create the flexible action block pointing to the new erif_list */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route_info->route_action, + route_info->irif_index, + route->counter_index, + route_info->min_mtu, + &erif_list); + if (IS_ERR(afa_block)) { + err = PTR_ERR(afa_block); + goto err_afa_block_create; + } + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err_route_write; + + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + route->afa_block = afa_block; + mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list); + route->action = route_info->route_action; + route->irif_index = route_info->irif_index; + route->min_mtu = route_info->min_mtu; + return 0; + +err_route_write: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); +err_afa_block_create: +err_erif_populate: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list); + return err; +} + +#define MLXSW_SP_MR_TCAM_REGION_BASE_COUNT 16 +#define MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP 16 + +static int +mlxsw_sp_mr_tcam_region_alloc(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) +{ + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE, + mr_tcam_region->rtar_key_type, + MLXSW_SP_MR_TCAM_REGION_BASE_COUNT); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static void +mlxsw_sp_mr_tcam_region_free(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) +{ + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE, + mr_tcam_region->rtar_key_type, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static int mlxsw_sp_mr_tcam_region_parman_resize(void *priv, + unsigned long new_count) +{ + struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv; + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + u64 max_tcam_rules; + + max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES); + if (new_count > max_tcam_rules) + return -EINVAL; + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE, + mr_tcam_region->rtar_key_type, new_count); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static void mlxsw_sp_mr_tcam_region_parman_move(void *priv, + unsigned long from_index, + unsigned long to_index, + unsigned long count) +{ + struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv; + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rrcr_pl[MLXSW_REG_RRCR_LEN]; + + mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE, + from_index, count, + mr_tcam_region->rtar_key_type, to_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl); +} + +static const struct parman_ops mlxsw_sp_mr_tcam_region_parman_ops = { + .base_count = MLXSW_SP_MR_TCAM_REGION_BASE_COUNT, + .resize_step = MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP, + .resize = mlxsw_sp_mr_tcam_region_parman_resize, + .move = mlxsw_sp_mr_tcam_region_parman_move, + .algo = PARMAN_ALGO_TYPE_LSORT, +}; + +static int +mlxsw_sp_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_region *mr_tcam_region, + enum mlxsw_reg_rtar_key_type rtar_key_type) +{ + struct parman_prio *parman_prios; + struct parman *parman; + int err; + int i; + + mr_tcam_region->rtar_key_type = rtar_key_type; + mr_tcam_region->mlxsw_sp = mlxsw_sp; + + err = mlxsw_sp_mr_tcam_region_alloc(mr_tcam_region); + if (err) + return err; + + parman = parman_create(&mlxsw_sp_mr_tcam_region_parman_ops, + mr_tcam_region); + if (!parman) { + err = -ENOMEM; + goto err_parman_create; + } + mr_tcam_region->parman = parman; + + parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1, + sizeof(*parman_prios), GFP_KERNEL); + if (!parman_prios) { + err = -ENOMEM; + goto err_parman_prios_alloc; + } + mr_tcam_region->parman_prios = parman_prios; + + for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) + parman_prio_init(mr_tcam_region->parman, + &mr_tcam_region->parman_prios[i], i); + return 0; + +err_parman_prios_alloc: + parman_destroy(parman); +err_parman_create: + mlxsw_sp_mr_tcam_region_free(mr_tcam_region); + return err; +} + +static void +mlxsw_sp_mr_tcam_region_fini(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) +{ + int i; + + for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) + parman_prio_fini(&mr_tcam_region->parman_prios[i]); + kfree(mr_tcam_region->parman_prios); + parman_destroy(mr_tcam_region->parman); + mlxsw_sp_mr_tcam_region_free(mr_tcam_region); +} + +static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES)) + return -EIO; + + return mlxsw_sp_mr_tcam_region_init(mlxsw_sp, + &mr_tcam->ipv4_tcam_region, + MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST); +} + +static void mlxsw_sp_mr_tcam_fini(void *priv) +{ + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + mlxsw_sp_mr_tcam_region_fini(&mr_tcam->ipv4_tcam_region); +} + +const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = { + .priv_size = sizeof(struct mlxsw_sp_mr_tcam), + .route_priv_size = sizeof(struct mlxsw_sp_mr_tcam_route), + .init = mlxsw_sp_mr_tcam_init, + .route_create = mlxsw_sp_mr_tcam_route_create, + .route_update = mlxsw_sp_mr_tcam_route_update, + .route_stats = mlxsw_sp_mr_tcam_route_stats, + .route_action_update = mlxsw_sp_mr_tcam_route_action_update, + .route_min_mtu_update = mlxsw_sp_mr_tcam_route_min_mtu_update, + .route_irif_update = mlxsw_sp_mr_tcam_route_irif_update, + .route_erif_add = mlxsw_sp_mr_tcam_route_erif_add, + .route_erif_del = mlxsw_sp_mr_tcam_route_erif_del, + .route_destroy = mlxsw_sp_mr_tcam_route_destroy, + .fini = mlxsw_sp_mr_tcam_fini, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h new file mode 100644 index 000000000000..f9b59ee25406 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h @@ -0,0 +1,43 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_MCROUTER_TCAM_H +#define _MLXSW_SPECTRUM_MCROUTER_TCAM_H + +#include "spectrum.h" +#include "spectrum_mr.h" + +extern const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c new file mode 100644 index 000000000000..c33beac5def0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -0,0 +1,276 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Nogah Frankel <nogahf@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/netdevice.h> +#include <net/pkt_cls.h> +#include <net/red.h> + +#include "spectrum.h" +#include "reg.h" + +static int +mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, + int tclass_num, u32 min, u32 max, + u32 probability, bool is_ecn) +{ + char cwtp_cmd[max_t(u8, MLXSW_REG_CWTP_LEN, MLXSW_REG_CWTPM_LEN)]; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + int err; + + mlxsw_reg_cwtp_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num); + mlxsw_reg_cwtp_profile_pack(cwtp_cmd, MLXSW_REG_CWTP_DEFAULT_PROFILE, + roundup(min, MLXSW_REG_CWTP_MIN_VALUE), + roundup(max, MLXSW_REG_CWTP_MIN_VALUE), + probability); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtp), cwtp_cmd); + if (err) + return err; + + mlxsw_reg_cwtpm_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num, + MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtp_cmd); +} + +static int +mlxsw_sp_tclass_congestion_disable(struct mlxsw_sp_port *mlxsw_sp_port, + int tclass_num) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char cwtpm_cmd[MLXSW_REG_CWTPM_LEN]; + + mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num, + MLXSW_REG_CWTPM_RESET_PROFILE, false, false); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd); +} + +static void +mlxsw_sp_setup_tc_qdisc_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num) +{ + struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base; + struct mlxsw_sp_port_xstats *xstats; + struct rtnl_link_stats64 *stats; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats = &mlxsw_sp_port->periodic_hw_stats.stats; + + mlxsw_sp_qdisc->tx_packets = stats->tx_packets; + mlxsw_sp_qdisc->tx_bytes = stats->tx_bytes; + + switch (mlxsw_sp_qdisc->type) { + case MLXSW_SP_QDISC_RED: + xstats_base->prob_mark = xstats->ecn; + xstats_base->prob_drop = xstats->wred_drop[tclass_num]; + xstats_base->pdrop = xstats->tail_drop[tclass_num]; + + mlxsw_sp_qdisc->overlimits = xstats_base->prob_drop + + xstats_base->prob_mark; + mlxsw_sp_qdisc->drops = xstats_base->prob_drop + + xstats_base->pdrop; + break; + default: + break; + } +} + +static int +mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num) +{ + int err; + + if (mlxsw_sp_qdisc->handle != handle) + return 0; + + err = mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, tclass_num); + mlxsw_sp_qdisc->handle = TC_H_UNSPEC; + mlxsw_sp_qdisc->type = MLXSW_SP_QDISC_NO_QDISC; + + return err; +} + +static int +mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num, + struct tc_red_qopt_offload_params *p) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u32 min, max; + u64 prob; + int err = 0; + + if (p->min > p->max) { + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: RED: min %u is bigger then max %u\n", p->min, + p->max); + goto err_bad_param; + } + if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) { + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: RED: max value %u is too big\n", p->max); + goto err_bad_param; + } + if (p->min == 0 || p->max == 0) { + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: RED: 0 value is illegal for min and max\n"); + goto err_bad_param; + } + + /* calculate probability in percentage */ + prob = p->probability; + prob *= 100; + prob = DIV_ROUND_UP(prob, 1 << 16); + prob = DIV_ROUND_UP(prob, 1 << 16); + min = mlxsw_sp_bytes_cells(mlxsw_sp, p->min); + max = mlxsw_sp_bytes_cells(mlxsw_sp, p->max); + err = mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, min, + max, prob, p->is_ecn); + if (err) + goto err_config; + + mlxsw_sp_qdisc->type = MLXSW_SP_QDISC_RED; + if (mlxsw_sp_qdisc->handle != handle) + mlxsw_sp_setup_tc_qdisc_clean_stats(mlxsw_sp_port, + mlxsw_sp_qdisc, + tclass_num); + + mlxsw_sp_qdisc->handle = handle; + return 0; + +err_bad_param: + err = -EINVAL; +err_config: + mlxsw_sp_qdisc_red_destroy(mlxsw_sp_port, mlxsw_sp_qdisc->handle, + mlxsw_sp_qdisc, tclass_num); + return err; +} + +static int +mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num, struct red_stats *res) +{ + struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base; + struct mlxsw_sp_port_xstats *xstats; + + if (mlxsw_sp_qdisc->handle != handle || + mlxsw_sp_qdisc->type != MLXSW_SP_QDISC_RED) + return -EOPNOTSUPP; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + + res->prob_drop = xstats->wred_drop[tclass_num] - xstats_base->prob_drop; + res->prob_mark = xstats->ecn - xstats_base->prob_mark; + res->pdrop = xstats->tail_drop[tclass_num] - xstats_base->pdrop; + return 0; +} + +static int +mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + int tclass_num, + struct tc_red_qopt_offload_stats *res) +{ + u64 tx_bytes, tx_packets, overlimits, drops; + struct mlxsw_sp_port_xstats *xstats; + struct rtnl_link_stats64 *stats; + + if (mlxsw_sp_qdisc->handle != handle || + mlxsw_sp_qdisc->type != MLXSW_SP_QDISC_RED) + return -EOPNOTSUPP; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats = &mlxsw_sp_port->periodic_hw_stats.stats; + + tx_bytes = stats->tx_bytes - mlxsw_sp_qdisc->tx_bytes; + tx_packets = stats->tx_packets - mlxsw_sp_qdisc->tx_packets; + overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - + mlxsw_sp_qdisc->overlimits; + drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] - + mlxsw_sp_qdisc->drops; + + _bstats_update(res->bstats, tx_bytes, tx_packets); + res->qstats->overlimits += overlimits; + res->qstats->drops += drops; + res->qstats->backlog += mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + xstats->backlog[tclass_num]); + + mlxsw_sp_qdisc->drops += drops; + mlxsw_sp_qdisc->overlimits += overlimits; + mlxsw_sp_qdisc->tx_bytes += tx_bytes; + mlxsw_sp_qdisc->tx_packets += tx_packets; + return 0; +} + +#define MLXSW_SP_PORT_DEFAULT_TCLASS 0 + +int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_red_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + int tclass_num; + + if (p->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + mlxsw_sp_qdisc = &mlxsw_sp_port->root_qdisc; + tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; + + switch (p->command) { + case TC_RED_REPLACE: + return mlxsw_sp_qdisc_red_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, tclass_num, + &p->set); + case TC_RED_DESTROY: + return mlxsw_sp_qdisc_red_destroy(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, tclass_num); + case TC_RED_XSTATS: + return mlxsw_sp_qdisc_get_red_xstats(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, tclass_num, + p->xstats); + case TC_RED_STATS: + return mlxsw_sp_qdisc_get_red_stats(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, tclass_num, + &p->stats); + default: + return -EOPNOTSUPP; + } +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 5189022a1c8c..632c7b229054 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -46,6 +46,8 @@ #include <linux/if_bridge.h> #include <linux/socket.h> #include <linux/route.h> +#include <linux/gcd.h> +#include <linux/random.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> @@ -65,6 +67,8 @@ #include "spectrum_cnt.h" #include "spectrum_dpipe.h" #include "spectrum_ipip.h" +#include "spectrum_mr.h" +#include "spectrum_mr_tcam.h" #include "spectrum_router.h" struct mlxsw_sp_vr; @@ -78,6 +82,7 @@ struct mlxsw_sp_router { struct rhashtable neigh_ht; struct rhashtable nexthop_group_ht; struct rhashtable nexthop_ht; + struct list_head nexthop_list; struct { struct mlxsw_sp_lpm_tree *trees; unsigned int tree_count; @@ -92,6 +97,7 @@ struct mlxsw_sp_router { struct list_head ipip_list; bool aborted; struct notifier_block fib_nb; + struct notifier_block netevent_nb; const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_ipip_ops **ipip_ops_arr; }; @@ -458,6 +464,7 @@ struct mlxsw_sp_vr { unsigned int rif_count; struct mlxsw_sp_fib *fib4; struct mlxsw_sp_fib *fib6; + struct mlxsw_sp_mr_table *mr4_table; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; @@ -652,7 +659,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) { - return !!vr->fib4 || !!vr->fib6; + return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table; } static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) @@ -692,8 +699,8 @@ static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, static u32 mlxsw_sp_fix_tb_id(u32 tb_id) { - /* For our purpose, squash main and local table into one */ - if (tb_id == RT_TABLE_LOCAL) + /* For our purpose, squash main, default and local tables into one */ + if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT) tb_id = RT_TABLE_MAIN; return tb_id; } @@ -727,14 +734,17 @@ static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, } static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, - u32 tb_id) + u32 tb_id, + struct netlink_ext_ack *extack) { struct mlxsw_sp_vr *vr; int err; vr = mlxsw_sp_vr_find_unused(mlxsw_sp); - if (!vr) + if (!vr) { + NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers"); return ERR_PTR(-EBUSY); + } vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(vr->fib4)) return ERR_CAST(vr->fib4); @@ -743,9 +753,18 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, err = PTR_ERR(vr->fib6); goto err_fib6_create; } + vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id, + MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(vr->mr4_table)) { + err = PTR_ERR(vr->mr4_table); + goto err_mr_table_create; + } vr->tb_id = tb_id; return vr; +err_mr_table_create: + mlxsw_sp_fib_destroy(vr->fib6); + vr->fib6 = NULL; err_fib6_create: mlxsw_sp_fib_destroy(vr->fib4); vr->fib4 = NULL; @@ -754,27 +773,31 @@ err_fib6_create: static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) { + mlxsw_sp_mr_table_destroy(vr->mr4_table); + vr->mr4_table = NULL; mlxsw_sp_fib_destroy(vr->fib6); vr->fib6 = NULL; mlxsw_sp_fib_destroy(vr->fib4); vr->fib4 = NULL; } -static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) +static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + struct netlink_ext_ack *extack) { struct mlxsw_sp_vr *vr; tb_id = mlxsw_sp_fix_tb_id(tb_id); vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); if (!vr) - vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id); + vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack); return vr; } static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr) { if (!vr->rif_count && list_empty(&vr->fib4->node_list) && - list_empty(&vr->fib6->node_list)) + list_empty(&vr->fib6->node_list) && + mlxsw_sp_mr_table_empty(vr->mr4_table)) mlxsw_sp_vr_destroy(vr); } @@ -920,7 +943,7 @@ __mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev) return __dev_get_by_index(net, tun->parms.link); } -static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) +u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) { struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); @@ -932,12 +955,14 @@ static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_rif_params *params); + const struct mlxsw_sp_rif_params *params, + struct netlink_ext_ack *extack); static struct mlxsw_sp_rif_ipip_lb * mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_ipip_type ipipt, - struct net_device *ol_dev) + struct net_device *ol_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp_rif_params_ipip_lb lb_params; const struct mlxsw_sp_ipip_ops *ipip_ops; @@ -950,7 +975,7 @@ mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev), }; - rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common); + rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack); if (IS_ERR(rif)) return ERR_CAST(rif); return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common); @@ -969,7 +994,7 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, return ERR_PTR(-ENOMEM); ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt, - ol_dev); + ol_dev, NULL); if (IS_ERR(ipip_entry->ol_lb)) { ret = ERR_CAST(ipip_entry->ol_lb); goto err_ol_ipip_lb_create; @@ -977,6 +1002,7 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, ipip_entry->ipipt = ipipt; ipip_entry->ol_dev = ol_dev; + ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev); return ipip_entry; @@ -986,72 +1012,12 @@ err_ol_ipip_lb_create: } static void -mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp_ipip_entry *ipip_entry) +mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry) { - WARN_ON(ipip_entry->ref_count > 0); mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); kfree(ipip_entry); } -static __be32 -mlxsw_sp_ipip_netdev_saddr4(const struct net_device *ol_dev) -{ - struct ip_tunnel *tun = netdev_priv(ol_dev); - - return tun->parms.iph.saddr; -} - -union mlxsw_sp_l3addr -mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, - const struct net_device *ol_dev) -{ - switch (proto) { - case MLXSW_SP_L3_PROTO_IPV4: - return (union mlxsw_sp_l3addr) { - .addr4 = mlxsw_sp_ipip_netdev_saddr4(ol_dev), - }; - case MLXSW_SP_L3_PROTO_IPV6: - break; - }; - - WARN_ON(1); - return (union mlxsw_sp_l3addr) { - .addr4 = 0, - }; -} - -__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) -{ - struct ip_tunnel *tun = netdev_priv(ol_dev); - - return tun->parms.iph.daddr; -} - -union mlxsw_sp_l3addr -mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, - const struct net_device *ol_dev) -{ - switch (proto) { - case MLXSW_SP_L3_PROTO_IPV4: - return (union mlxsw_sp_l3addr) { - .addr4 = mlxsw_sp_ipip_netdev_daddr4(ol_dev), - }; - case MLXSW_SP_L3_PROTO_IPV6: - break; - }; - - WARN_ON(1); - return (union mlxsw_sp_l3addr) { - .addr4 = 0, - }; -} - -static bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1, - const union mlxsw_sp_l3addr *addr2) -{ - return !memcmp(addr1, addr2, sizeof(*addr1)); -} - static bool mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp, const enum mlxsw_sp_l3proto ul_proto, @@ -1184,60 +1150,28 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_ipip_entry * -mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_ipip_type ipipt, - struct net_device *ol_dev) +mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) { - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); - struct mlxsw_sp_router *router = mlxsw_sp->router; - struct mlxsw_sp_fib_entry *decap_fib_entry; struct mlxsw_sp_ipip_entry *ipip_entry; - enum mlxsw_sp_l3proto ul_proto; - union mlxsw_sp_l3addr saddr; - - list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, - ipip_list_node) { - if (ipip_entry->ol_dev == ol_dev) - goto inc_ref_count; - - /* The configuration where several tunnels have the same local - * address in the same underlay table needs special treatment in - * the HW. That is currently not implemented in the driver. - */ - ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; - saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); - if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr, - ul_tb_id, ipip_entry)) - return ERR_PTR(-EEXIST); - } ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev); if (IS_ERR(ipip_entry)) return ipip_entry; - decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry); - if (decap_fib_entry) - mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, - decap_fib_entry); - list_add_tail(&ipip_entry->ipip_list_node, &mlxsw_sp->router->ipip_list); -inc_ref_count: - ++ipip_entry->ref_count; return ipip_entry; } static void -mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry) +mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) { - if (--ipip_entry->ref_count == 0) { - list_del(&ipip_entry->ipip_list_node); - if (ipip_entry->decap_fib_entry) - mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); - mlxsw_sp_ipip_entry_destroy(ipip_entry); - } + list_del(&ipip_entry->ipip_list_node); + mlxsw_sp_ipip_entry_dealloc(ipip_entry); } static bool @@ -1279,6 +1213,455 @@ mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, return NULL; } +static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev, + enum mlxsw_sp_ipip_type *p_type) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; + enum mlxsw_sp_ipip_type ipipt; + + for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) { + ipip_ops = router->ipip_ops_arr[ipipt]; + if (dev->type == ipip_ops->dev_type) { + if (p_type) + *p_type = ipipt; + return true; + } + } + return false; +} + +bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) + if (ipip_entry->ol_dev == ol_dev) + return ipip_entry; + + return NULL; +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ul_dev, + struct mlxsw_sp_ipip_entry *start) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list, + ipip_list_node); + list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) { + struct net_device *ipip_ul_dev = + __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + + if (ipip_ul_dev == ul_dev) + return ipip_entry; + } + + return NULL; +} + +bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL); +} + +static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev, + enum mlxsw_sp_ipip_type ipipt) +{ + const struct mlxsw_sp_ipip_ops *ops + = mlxsw_sp->router->ipip_ops_arr[ipipt]; + + /* For deciding whether decap should be offloaded, we don't care about + * overlay protocol, so ask whether either one is supported. + */ + return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) || + ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6); +} + +static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + enum mlxsw_sp_l3proto ul_proto; + enum mlxsw_sp_ipip_type ipipt; + union mlxsw_sp_l3addr saddr; + u32 ul_tb_id; + + mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt); + if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) { + ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); + ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); + if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto, + saddr, ul_tb_id, + NULL)) { + ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt, + ol_dev); + if (IS_ERR(ipip_entry)) + return PTR_ERR(ipip_entry); + } + } + + return 0; +} + +static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) + mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry); +} + +static void +mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + struct mlxsw_sp_fib_entry *decap_fib_entry; + + decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry); + if (decap_fib_entry) + mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, + decap_fib_entry); +} + +static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) + mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry); +} + +static void +mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + if (ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); +} + +static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) + mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); +} + +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); +static int +mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool keep_encap, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb; + struct mlxsw_sp_rif_ipip_lb *new_lb_rif; + + new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, + ipip_entry->ipipt, + ipip_entry->ol_dev, + extack); + if (IS_ERR(new_lb_rif)) + return PTR_ERR(new_lb_rif); + ipip_entry->ol_lb = new_lb_rif; + + if (keep_encap) { + list_splice_init(&old_lb_rif->common.nexthop_list, + &new_lb_rif->common.nexthop_list); + mlxsw_sp_nexthop_rif_update(mlxsw_sp, &new_lb_rif->common); + } + + mlxsw_sp_rif_destroy(&old_lb_rif->common); + + return 0; +} + +/** + * Update the offload related to an IPIP entry. This always updates decap, and + * in addition to that it also: + * @recreate_loopback: recreates the associated loopback RIF + * @keep_encap: updates next hops that use the tunnel netdevice. This is only + * relevant when recreate_loopback is true. + * @update_nexthops: updates next hops, keeping the current loopback RIF. This + * is only relevant when recreate_loopback is false. + */ +int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool recreate_loopback, + bool keep_encap, + bool update_nexthops, + struct netlink_ext_ack *extack) +{ + int err; + + /* RIFs can't be edited, so to update loopback, we need to destroy and + * recreate it. That creates a window of opportunity where RALUE and + * RATR registers end up referencing a RIF that's already gone. RATRs + * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care + * of RALUE, demote the decap route back. + */ + if (ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); + + if (recreate_loopback) { + err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry, + keep_encap, extack); + if (err) + return err; + } else if (update_nexthops) { + mlxsw_sp_nexthop_rif_update(mlxsw_sp, + &ipip_entry->ol_lb->common); + } + + if (ipip_entry->ol_dev->flags & IFF_UP) + mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry); + + return 0; +} + +static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = + mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + + if (!ipip_entry) + return 0; + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + true, false, false, extack); +} + +static int +mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct net_device *ul_dev, + struct netlink_ext_ack *extack) +{ + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + true, true, false, extack); +} + +static int +mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct net_device *ul_dev) +{ + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + false, false, true, NULL); +} + +static int +mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct net_device *ul_dev) +{ + /* A down underlay device causes encapsulated packets to not be + * forwarded, but decap still works. So refresh next hops without + * touching anything else. + */ + return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry, + false, false, true, NULL); +} + +static int +mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + struct netlink_ext_ack *extack) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_ipip_entry *ipip_entry; + int err; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (!ipip_entry) + /* A change might make a tunnel eligible for offloading, but + * that is currently not implemented. What falls to slow path + * stays there. + */ + return 0; + + /* A change might make a tunnel not eligible for offloading. */ + if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, + ipip_entry->ipipt)) { + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + return 0; + } + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack); + return err; +} + +void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + struct net_device *ol_dev = ipip_entry->ol_dev; + + if (ol_dev->flags & IFF_UP) + mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); + mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry); +} + +/* The configuration where several tunnels have the same local address in the + * same underlay table needs special treatment in the HW. That is currently not + * implemented in the driver. This function finds and demotes the first tunnel + * with a given source address, except the one passed in in the argument + * `except'. + */ +bool +mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr saddr, + u32 ul_tb_id, + const struct mlxsw_sp_ipip_entry *except) +{ + struct mlxsw_sp_ipip_entry *ipip_entry, *tmp; + + list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list, + ipip_list_node) { + if (ipip_entry != except && + mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr, + ul_tb_id, ipip_entry)) { + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + return true; + } + } + + return false; +} + +static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp, + struct net_device *ul_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry, *tmp; + + list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list, + ipip_list_node) { + struct net_device *ipip_ul_dev = + __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + + if (ipip_ul_dev == ul_dev) + mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry); + } +} + +int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + unsigned long event, + struct netdev_notifier_info *info) +{ + struct netdev_notifier_changeupper_info *chup; + struct netlink_ext_ack *extack; + + switch (event) { + case NETDEV_REGISTER: + return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev); + case NETDEV_UNREGISTER: + mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev); + return 0; + case NETDEV_UP: + mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev); + return 0; + case NETDEV_DOWN: + mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev); + return 0; + case NETDEV_CHANGEUPPER: + chup = container_of(info, typeof(*chup), info); + extack = info->extack; + if (netif_is_l3_master(chup->upper_dev)) + return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp, + ol_dev, + extack); + return 0; + case NETDEV_CHANGE: + extack = info->extack; + return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp, + ol_dev, extack); + } + return 0; +} + +static int +__mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct net_device *ul_dev, + unsigned long event, + struct netdev_notifier_info *info) +{ + struct netdev_notifier_changeupper_info *chup; + struct netlink_ext_ack *extack; + + switch (event) { + case NETDEV_CHANGEUPPER: + chup = container_of(info, typeof(*chup), info); + extack = info->extack; + if (netif_is_l3_master(chup->upper_dev)) + return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp, + ipip_entry, + ul_dev, + extack); + break; + + case NETDEV_UP: + return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry, + ul_dev); + case NETDEV_DOWN: + return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp, + ipip_entry, + ul_dev); + } + return 0; +} + +int +mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ul_dev, + unsigned long event, + struct netdev_notifier_info *info) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = NULL; + int err; + + while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, + ul_dev, + ipip_entry))) { + err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry, + ul_dev, event, info); + if (err) { + mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp, + ul_dev); + return err; + } + } + + return 0; +} + struct mlxsw_sp_neigh_key { struct neighbour *n; }; @@ -1316,7 +1699,7 @@ mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, typeof(*neigh_entry), rif_list_node); } - if (neigh_entry->rif_list_node.next == &rif->neigh_list) + if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list)) return NULL; return list_next_entry(neigh_entry, rif_list_node); } @@ -1664,7 +2047,7 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), rauhtd_pl); if (err) { - dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n"); break; } num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); @@ -1857,7 +2240,7 @@ mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true); } -struct mlxsw_sp_neigh_event_work { +struct mlxsw_sp_netevent_work { struct work_struct work; struct mlxsw_sp *mlxsw_sp; struct neighbour *n; @@ -1865,11 +2248,11 @@ struct mlxsw_sp_neigh_event_work { static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) { - struct mlxsw_sp_neigh_event_work *neigh_work = - container_of(work, struct mlxsw_sp_neigh_event_work, work); - struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp; + struct mlxsw_sp_netevent_work *net_work = + container_of(work, struct mlxsw_sp_netevent_work, work); + struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp; struct mlxsw_sp_neigh_entry *neigh_entry; - struct neighbour *n = neigh_work->n; + struct neighbour *n = net_work->n; unsigned char ha[ETH_ALEN]; bool entry_connected; u8 nud_state, dead; @@ -1905,18 +2288,32 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) out: rtnl_unlock(); neigh_release(n); - kfree(neigh_work); + kfree(net_work); } -int mlxsw_sp_router_netevent_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp); + +static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work) +{ + struct mlxsw_sp_netevent_work *net_work = + container_of(work, struct mlxsw_sp_netevent_work, work); + struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp; + + mlxsw_sp_mp_hash_init(mlxsw_sp); + kfree(net_work); +} + +static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) { - struct mlxsw_sp_neigh_event_work *neigh_work; + struct mlxsw_sp_netevent_work *net_work; struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp_router *router; struct mlxsw_sp *mlxsw_sp; unsigned long interval; struct neigh_parms *p; struct neighbour *n; + struct net *net; switch (event) { case NETEVENT_DELAY_PROBE_TIME_UPDATE: @@ -1950,24 +2347,39 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, if (!mlxsw_sp_port) return NOTIFY_DONE; - neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC); - if (!neigh_work) { + net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); + if (!net_work) { mlxsw_sp_port_dev_put(mlxsw_sp_port); return NOTIFY_BAD; } - INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work); - neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - neigh_work->n = n; + INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work); + net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + net_work->n = n; /* Take a reference to ensure the neighbour won't be * destructed until we drop the reference in delayed * work. */ neigh_clone(n); - mlxsw_core_schedule_work(&neigh_work->work); + mlxsw_core_schedule_work(&net_work->work); mlxsw_sp_port_dev_put(mlxsw_sp_port); break; + case NETEVENT_MULTIPATH_HASH_UPDATE: + net = ptr; + + if (!net_eq(net, &init_net)) + return NOTIFY_DONE; + + net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); + if (!net_work) + return NOTIFY_BAD; + + router = container_of(nb, struct mlxsw_sp_router, netevent_nb); + INIT_WORK(&net_work->work, mlxsw_sp_router_mp_hash_event_work); + net_work->mlxsw_sp = router->mlxsw_sp; + mlxsw_core_schedule_work(&net_work->work); + break; } return NOTIFY_DONE; @@ -2004,16 +2416,25 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) rhashtable_destroy(&mlxsw_sp->router->neigh_ht); } +static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_rif *rif) +{ + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + + mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, + rif->rif_index, rif->addr); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); +} + static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; + mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif); list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, - rif_list_node) { - mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false); + rif_list_node) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); - } } enum mlxsw_sp_nexthop_type { @@ -2028,6 +2449,7 @@ struct mlxsw_sp_nexthop_key { struct mlxsw_sp_nexthop { struct list_head neigh_list_node; /* member of neigh entry list */ struct list_head rif_list_node; + struct list_head router_list_node; struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group * this belongs to */ @@ -2035,6 +2457,9 @@ struct mlxsw_sp_nexthop { struct mlxsw_sp_nexthop_key key; unsigned char gw_addr[sizeof(struct in6_addr)]; int ifindex; + int nh_weight; + int norm_nh_weight; + int num_adj_entries; struct mlxsw_sp_rif *rif; u8 should_offload:1, /* set indicates this neigh is connected and * should be put to KVD linear area of this group. @@ -2050,6 +2475,8 @@ struct mlxsw_sp_nexthop { struct mlxsw_sp_neigh_entry *neigh_entry; struct mlxsw_sp_ipip_entry *ipip_entry; }; + unsigned int counter_index; + bool counter_valid; }; struct mlxsw_sp_nexthop_group { @@ -2062,10 +2489,118 @@ struct mlxsw_sp_nexthop_group { u32 adj_index; u16 ecmp_size; u16 count; + int sum_norm_weight; struct mlxsw_sp_nexthop nexthops[0]; #define nh_rif nexthops[0].rif }; +void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct devlink *devlink; + + devlink = priv_to_devlink(mlxsw_sp->core); + if (!devlink_dpipe_table_counter_enabled(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ)) + return; + + if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index)) + return; + + nh->counter_valid = true; +} + +void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + if (!nh->counter_valid) + return; + mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index); + nh->counter_valid = false; +} + +int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, u64 *p_counter) +{ + if (!nh->counter_valid) + return -EINVAL; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index, + p_counter, NULL); +} + +struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, + struct mlxsw_sp_nexthop *nh) +{ + if (!nh) { + if (list_empty(&router->nexthop_list)) + return NULL; + else + return list_first_entry(&router->nexthop_list, + typeof(*nh), router_list_node); + } + if (list_is_last(&nh->router_list_node, &router->nexthop_list)) + return NULL; + return list_next_entry(nh, router_list_node); +} + +bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh) +{ + return nh->offloaded; +} + +unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh) +{ + if (!nh->offloaded) + return NULL; + return nh->neigh_entry->ha; +} + +int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, + u32 *p_adj_size, u32 *p_adj_hash_index) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + u32 adj_hash_index = 0; + int i; + + if (!nh->offloaded || !nh_grp->adj_index_valid) + return -EINVAL; + + *p_adj_index = nh_grp->adj_index; + *p_adj_size = nh_grp->ecmp_size; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + + if (nh_iter == nh) + break; + if (nh_iter->offloaded) + adj_hash_index += nh_iter->num_adj_entries; + } + + *p_adj_hash_index = adj_hash_index; + return 0; +} + +struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) +{ + return nh->rif; +} + +bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + + if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP) + return true; + } + return false; +} + static struct fib_info * mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp) { @@ -2323,8 +2858,8 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) +static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; char ratr_pl[MLXSW_REG_RATR_LEN]; @@ -2333,12 +2868,33 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, true, MLXSW_REG_RATR_TYPE_ETHERNET, adj_index, neigh_entry->rif); mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); + if (nh->counter_valid) + mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true); + else + mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); } -static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, - u32 adj_index, - struct mlxsw_sp_nexthop *nh) +int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) +{ + int i; + + for (i = 0; i < nh->num_adj_entries; i++) { + int err; + + err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh); + if (err) + return err; + } + + return 0; +} + +static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, + struct mlxsw_sp_nexthop *nh) { const struct mlxsw_sp_ipip_ops *ipip_ops; @@ -2346,6 +2902,24 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry); } +static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, + struct mlxsw_sp_nexthop *nh) +{ + int i; + + for (i = 0; i < nh->num_adj_entries; i++) { + int err; + + err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i, + nh); + if (err) + return err; + } + + return 0; +} + static int mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, @@ -2367,7 +2941,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, if (nh->update || reallocate) { switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: - err = mlxsw_sp_nexthop_mac_update + err = mlxsw_sp_nexthop_update (mlxsw_sp, adj_index, nh); break; case MLXSW_SP_NEXTHOP_TYPE_IPIP: @@ -2380,7 +2954,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, nh->update = 0; nh->offloaded = 1; } - adj_index++; + adj_index += nh->num_adj_entries; } return 0; } @@ -2425,17 +2999,118 @@ mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp) } } +static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size) +{ + /* Valid sizes for an adjacency group are: + * 1-64, 512, 1024, 2048 and 4096. + */ + if (*p_adj_grp_size <= 64) + return; + else if (*p_adj_grp_size <= 512) + *p_adj_grp_size = 512; + else if (*p_adj_grp_size <= 1024) + *p_adj_grp_size = 1024; + else if (*p_adj_grp_size <= 2048) + *p_adj_grp_size = 2048; + else + *p_adj_grp_size = 4096; +} + +static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size, + unsigned int alloc_size) +{ + if (alloc_size >= 4096) + *p_adj_grp_size = 4096; + else if (alloc_size >= 2048) + *p_adj_grp_size = 2048; + else if (alloc_size >= 1024) + *p_adj_grp_size = 1024; + else if (alloc_size >= 512) + *p_adj_grp_size = 512; +} + +static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, + u16 *p_adj_grp_size) +{ + unsigned int alloc_size; + int err; + + /* Round up the requested group size to the next size supported + * by the device and make sure the request can be satisfied. + */ + mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size); + err = mlxsw_sp_kvdl_alloc_size_query(mlxsw_sp, *p_adj_grp_size, + &alloc_size); + if (err) + return err; + /* It is possible the allocation results in more allocated + * entries than requested. Try to use as much of them as + * possible. + */ + mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size); + + return 0; +} + +static void +mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp) +{ + int i, g = 0, sum_norm_weight = 0; + struct mlxsw_sp_nexthop *nh; + + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + + if (!nh->should_offload) + continue; + if (g > 0) + g = gcd(nh->nh_weight, g); + else + g = nh->nh_weight; + } + + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + + if (!nh->should_offload) + continue; + nh->norm_nh_weight = nh->nh_weight / g; + sum_norm_weight += nh->norm_nh_weight; + } + + nh_grp->sum_norm_weight = sum_norm_weight; +} + +static void +mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp) +{ + int total = nh_grp->sum_norm_weight; + u16 ecmp_size = nh_grp->ecmp_size; + int i, weight = 0, lower_bound = 0; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + int upper_bound; + + if (!nh->should_offload) + continue; + weight += nh->norm_nh_weight; + upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total); + nh->num_adj_entries = upper_bound - lower_bound; + lower_bound = upper_bound; + } +} + static void mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { + u16 ecmp_size, old_ecmp_size; struct mlxsw_sp_nexthop *nh; bool offload_change = false; u32 adj_index; - u16 ecmp_size = 0; bool old_adj_index_valid; u32 old_adj_index; - u16 old_ecmp_size; int i; int err; @@ -2452,8 +3127,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, if (nh->should_offload) nh->update = 1; } - if (nh->should_offload) - ecmp_size++; } if (!offload_change) { /* Nothing was added or removed, so no need to reallocate. Just @@ -2466,12 +3139,19 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, } return; } - if (!ecmp_size) + mlxsw_sp_nexthop_group_normalize(nh_grp); + if (!nh_grp->sum_norm_weight) /* No neigh of this group is connected so we just set * the trap and let everthing flow through kernel. */ goto set_trap; + ecmp_size = nh_grp->sum_norm_weight; + err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size); + if (err) + /* No valid allocation size available. */ + goto set_trap; + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index); if (err) { /* We ran out of KVD linear space, just set the @@ -2486,6 +3166,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, nh_grp->adj_index_valid = 1; nh_grp->adj_index = adj_index; nh_grp->ecmp_size = ecmp_size; + mlxsw_sp_nexthop_group_rebalance(nh_grp); err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); @@ -2655,38 +3336,28 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } -static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev, - enum mlxsw_sp_ipip_type *p_type) +static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev) { - struct mlxsw_sp_router *router = mlxsw_sp->router; - const struct mlxsw_sp_ipip_ops *ipip_ops; - enum mlxsw_sp_ipip_type ipipt; + struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); - for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) { - ipip_ops = router->ipip_ops_arr[ipipt]; - if (dev->type == ipip_ops->dev_type) { - if (p_type) - *p_type = ipipt; - return true; - } - } - return false; + return ul_dev ? (ul_dev->flags & IFF_UP) : true; } static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_ipip_type ipipt, struct mlxsw_sp_nexthop *nh, struct net_device *ol_dev) { + bool removing; + if (!nh->nh_grp->gateway || nh->ipip_entry) return 0; - nh->ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev); - if (IS_ERR(nh->ipip_entry)) - return PTR_ERR(nh->ipip_entry); + nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (!nh->ipip_entry) + return -ENOENT; - __mlxsw_sp_nexthop_neigh_update(nh, false); + removing = !mlxsw_sp_ipip_netdev_ul_up(ol_dev); + __mlxsw_sp_nexthop_neigh_update(nh, removing); return 0; } @@ -2699,7 +3370,6 @@ static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, return; __mlxsw_sp_nexthop_neigh_update(nh, true); - mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry); nh->ipip_entry = NULL; } @@ -2743,7 +3413,7 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV4)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); if (err) return err; mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); @@ -2784,11 +3454,19 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, nh->nh_grp = nh_grp; nh->key.fib_nh = fib_nh; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + nh->nh_weight = fib_nh->nh_weight; +#else + nh->nh_weight = 1; +#endif memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw)); err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); if (err) return err; + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); + if (!dev) return 0; @@ -2812,6 +3490,8 @@ static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); } @@ -2841,6 +3521,30 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); } +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_nexthop *nh; + bool removing; + + list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) { + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + removing = false; + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev); + break; + default: + WARN_ON(1); + continue; + } + + __mlxsw_sp_nexthop_neigh_update(nh, removing); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + } +} + static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { @@ -3121,7 +3825,7 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, return; if (mlxsw_sp_fib_entry_should_offload(fib_entry)) mlxsw_sp_fib_entry_offload_set(fib_entry); - else if (!mlxsw_sp_fib_entry_should_offload(fib_entry)) + else mlxsw_sp_fib_entry_offload_unset(fib_entry); return; default: @@ -3576,7 +4280,7 @@ mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr, struct mlxsw_sp_vr *vr; int err; - vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id); + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL); if (IS_ERR(vr)) return ERR_CAST(vr); fib = mlxsw_sp_vr_fib(vr, proto); @@ -4000,7 +4704,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV6)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); if (err) return err; mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); @@ -4038,7 +4742,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, struct net_device *dev = rt->dst.dev; nh->nh_grp = nh_grp; + nh->nh_weight = 1; memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); if (!dev) return 0; @@ -4051,6 +4759,8 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); } static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, @@ -4601,6 +5311,75 @@ static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp, + struct mfc_entry_notifier_info *men_info, + bool replace) +{ + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return 0; + + vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace); +} + +static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp, + struct mfc_entry_notifier_info *men_info) +{ + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return; + + vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id); + if (WARN_ON(!vr)) + return; + + mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc); + mlxsw_sp_vr_put(vr); +} + +static int +mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp, + struct vif_entry_notifier_info *ven_info) +{ + struct mlxsw_sp_rif *rif; + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return 0; + + vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev); + return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev, + ven_info->vif_index, + ven_info->vif_flags, rif); +} + +static void +mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp, + struct vif_entry_notifier_info *ven_info) +{ + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return; + + vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id); + if (WARN_ON(!vr)) + return; + + mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index); + mlxsw_sp_vr_put(vr); +} + static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) { enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4; @@ -4611,6 +5390,10 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) if (err) return err; + /* The multicast router code does not need an abort trap as by default, + * packets that don't match any routes are trapped to the CPU. + */ + proto = MLXSW_REG_RALXX_PROTOCOL_IPV6; return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, MLXSW_SP_LPM_TREE_MIN + 1); @@ -4692,6 +5475,8 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp_vr_is_used(vr)) continue; + + mlxsw_sp_mr_table_flush(vr->mr4_table); mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); /* If virtual router was only used for IPv4, then it's no @@ -4724,6 +5509,8 @@ struct mlxsw_sp_fib_event_work { struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; + struct mfc_entry_notifier_info men_info; + struct vif_entry_notifier_info ven_info; }; struct mlxsw_sp *mlxsw_sp; unsigned long event; @@ -4734,7 +5521,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - struct fib_rule *rule; bool replace, append; int err; @@ -4756,12 +5542,11 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); fib_info_put(fib_work->fen_info.fi); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - rule = fib_work->fr_info.rule; - if (!fib4_rule_default(rule) && !rule->l3mdev) - mlxsw_sp_router_fib_abort(mlxsw_sp); - fib_rule_put(rule); + case FIB_EVENT_RULE_ADD: + /* if we get here, a rule was added that we do not support. + * just do the fib_abort + */ + mlxsw_sp_router_fib_abort(mlxsw_sp); break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: @@ -4779,7 +5564,6 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; - struct fib_rule *rule; bool replace; int err; @@ -4798,12 +5582,58 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt); mlxsw_sp_rt6_release(fib_work->fen6_info.rt); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - rule = fib_work->fr_info.rule; - if (!fib6_rule_default(rule) && !rule->l3mdev) + case FIB_EVENT_RULE_ADD: + /* if we get here, a rule was added that we do not support. + * just do the fib_abort + */ + mlxsw_sp_router_fib_abort(mlxsw_sp); + break; + } + rtnl_unlock(); + kfree(fib_work); +} + +static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) +{ + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + bool replace; + int err; + + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: + replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; + + err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info, + replace); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + ipmr_cache_put(fib_work->men_info.mfc); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info); + ipmr_cache_put(fib_work->men_info.mfc); + break; + case FIB_EVENT_VIF_ADD: + err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp, + &fib_work->ven_info); + if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); - fib_rule_put(rule); + dev_put(fib_work->ven_info.dev); + break; + case FIB_EVENT_VIF_DEL: + mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, + &fib_work->ven_info); + dev_put(fib_work->ven_info.dev); + break; + case FIB_EVENT_RULE_ADD: + /* if we get here, a rule was added that we do not support. + * just do the fib_abort + */ + mlxsw_sp_router_fib_abort(mlxsw_sp); break; } rtnl_unlock(); @@ -4813,25 +5643,27 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { + struct fib_entry_notifier_info *fen_info; + struct fib_nh_notifier_info *fnh_info; + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: - memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info)); - /* Take referece on fib_info to prevent it from being + fen_info = container_of(info, struct fib_entry_notifier_info, + info); + fib_work->fen_info = *fen_info; + /* Take reference on fib_info to prevent it from being * freed while work is queued. Release it afterwards. */ fib_info_hold(fib_work->fen_info.fi); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); - fib_rule_get(fib_work->fr_info.rule); - break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: - memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info)); + fnh_info = container_of(info, struct fib_nh_notifier_info, + info); + fib_work->fnh_info = *fnh_info; fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); break; } @@ -4840,21 +5672,79 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { + struct fib6_entry_notifier_info *fen6_info; + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: - memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info)); + fen6_info = container_of(info, struct fib6_entry_notifier_info, + info); + fib_work->fen6_info = *fen6_info; rt6_hold(fib_work->fen6_info.rt); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); - fib_rule_get(fib_work->fr_info.rule); + } +} + +static void +mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info)); + ipmr_cache_hold(fib_work->men_info.mfc); + break; + case FIB_EVENT_VIF_ADD: /* fall through */ + case FIB_EVENT_VIF_DEL: + memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info)); + dev_hold(fib_work->ven_info.dev); break; } } +static int mlxsw_sp_router_fib_rule_event(unsigned long event, + struct fib_notifier_info *info, + struct mlxsw_sp *mlxsw_sp) +{ + struct netlink_ext_ack *extack = info->extack; + struct fib_rule_notifier_info *fr_info; + struct fib_rule *rule; + int err = 0; + + /* nothing to do at the moment */ + if (event == FIB_EVENT_RULE_DEL) + return 0; + + if (mlxsw_sp->router->aborted) + return 0; + + fr_info = container_of(info, struct fib_rule_notifier_info, info); + rule = fr_info->rule; + + switch (info->family) { + case AF_INET: + if (!fib4_rule_default(rule) && !rule->l3mdev) + err = -1; + break; + case AF_INET6: + if (!fib6_rule_default(rule) && !rule->l3mdev) + err = -1; + break; + case RTNL_FAMILY_IPMR: + if (!ipmr_rule_default(rule) && !rule->l3mdev) + err = -1; + break; + } + + if (err < 0) + NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload"); + + return err; +} + /* Called with rcu_read_lock() */ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) @@ -4862,16 +5752,28 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct mlxsw_sp_fib_event_work *fib_work; struct fib_notifier_info *info = ptr; struct mlxsw_sp_router *router; + int err; if (!net_eq(info->net, &init_net) || - (info->family != AF_INET && info->family != AF_INET6)) + (info->family != AF_INET && info->family != AF_INET6 && + info->family != RTNL_FAMILY_IPMR)) return NOTIFY_DONE; + router = container_of(nb, struct mlxsw_sp_router, fib_nb); + + switch (event) { + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + err = mlxsw_sp_router_fib_rule_event(event, info, + router->mlxsw_sp); + if (!err) + return NOTIFY_DONE; + } + fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); if (WARN_ON(!fib_work)) return NOTIFY_BAD; - router = container_of(nb, struct mlxsw_sp_router, fib_nb); fib_work->mlxsw_sp = router->mlxsw_sp; fib_work->event = event; @@ -4884,6 +5786,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); mlxsw_sp_router_fib6_event(fib_work, info); break; + case RTNL_FAMILY_IPMR: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work); + mlxsw_sp_router_fibmr_event(fib_work, info); + break; } mlxsw_core_schedule_work(&fib_work->work); @@ -5044,9 +5950,15 @@ int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) return rif->dev->ifindex; } +const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) +{ + return rif->dev; +} + static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_rif_params *params) + const struct mlxsw_sp_rif_params *params, + struct netlink_ext_ack *extack) { u32 tb_id = l3mdev_fib_table(params->dev); const struct mlxsw_sp_rif_ops *ops; @@ -5060,14 +5972,16 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev); ops = mlxsw_sp->router->rif_ops_arr[type]; - vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN); + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack); if (IS_ERR(vr)) return ERR_CAST(vr); vr->rif_count++; err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); - if (err) + if (err) { + NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces"); goto err_rif_index_alloc; + } rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev); if (!rif) { @@ -5093,11 +6007,17 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_configure; + err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif); + if (err) + goto err_mr_rif_add; + mlxsw_sp_rif_counters_alloc(rif); mlxsw_sp->router->rifs[rif_index] = rif; return rif; +err_mr_rif_add: + ops->deconfigure(rif); err_configure: if (fid) mlxsw_sp_fid_put(fid); @@ -5122,6 +6042,7 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp->router->rifs[rif->rif_index] = NULL; mlxsw_sp_rif_counters_free(rif); + mlxsw_sp_mr_rif_del(vr->mr4_table, rif); ops->deconfigure(rif); if (fid) /* Loopback RIFs are not associated with a FID. */ @@ -5147,7 +6068,8 @@ mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params, static int mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, - struct net_device *l3_dev) + struct net_device *l3_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -5163,7 +6085,7 @@ mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, }; mlxsw_sp_rif_subport_params_init(¶ms, mlxsw_sp_port_vlan); - rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms); + rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack); if (IS_ERR(rif)) return PTR_ERR(rif); } @@ -5218,7 +6140,8 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, struct net_device *port_dev, - unsigned long event, u16 vid) + unsigned long event, u16 vid, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev); struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; @@ -5230,7 +6153,7 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, switch (event) { case NETDEV_UP: return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, - l3_dev); + l3_dev, extack); case NETDEV_DOWN: mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan); break; @@ -5240,19 +6163,22 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev, } static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, - unsigned long event) + unsigned long event, + struct netlink_ext_ack *extack) { if (netif_is_bridge_port(port_dev) || netif_is_lag_port(port_dev) || netif_is_ovs_port(port_dev)) return 0; - return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1); + return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1, + extack); } static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, struct net_device *lag_dev, - unsigned long event, u16 vid) + unsigned long event, u16 vid, + struct netlink_ext_ack *extack) { struct net_device *port_dev; struct list_head *iter; @@ -5262,7 +6188,8 @@ static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, if (mlxsw_sp_port_dev_check(port_dev)) { err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev, port_dev, - event, vid); + event, vid, + extack); if (err) return err; } @@ -5272,16 +6199,19 @@ static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, } static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, - unsigned long event) + unsigned long event, + struct netlink_ext_ack *extack) { if (netif_is_bridge_port(lag_dev)) return 0; - return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1); + return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1, + extack); } static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, - unsigned long event) + unsigned long event, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); struct mlxsw_sp_rif_params params = { @@ -5291,7 +6221,7 @@ static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, switch (event) { case NETDEV_UP: - rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms); + rif = mlxsw_sp_rif_create(mlxsw_sp, ¶ms, extack); if (IS_ERR(rif)) return PTR_ERR(rif); break; @@ -5305,7 +6235,8 @@ static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, } static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, - unsigned long event) + unsigned long event, + struct netlink_ext_ack *extack) { struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); u16 vid = vlan_dev_vlan_id(vlan_dev); @@ -5315,27 +6246,28 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, if (mlxsw_sp_port_dev_check(real_dev)) return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev, - event, vid); + event, vid, extack); else if (netif_is_lag_master(real_dev)) return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, - vid); + vid, extack); else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev)) - return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event); + return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack); return 0; } static int __mlxsw_sp_inetaddr_event(struct net_device *dev, - unsigned long event) + unsigned long event, + struct netlink_ext_ack *extack) { if (mlxsw_sp_port_dev_check(dev)) - return mlxsw_sp_inetaddr_port_event(dev, event); + return mlxsw_sp_inetaddr_port_event(dev, event, extack); else if (netif_is_lag_master(dev)) - return mlxsw_sp_inetaddr_lag_event(dev, event); + return mlxsw_sp_inetaddr_lag_event(dev, event, extack); else if (netif_is_bridge_master(dev)) - return mlxsw_sp_inetaddr_bridge_event(dev, event); + return mlxsw_sp_inetaddr_bridge_event(dev, event, extack); else if (is_vlan_dev(dev)) - return mlxsw_sp_inetaddr_vlan_event(dev, event); + return mlxsw_sp_inetaddr_vlan_event(dev, event, extack); else return 0; } @@ -5349,6 +6281,32 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused, struct mlxsw_sp_rif *rif; int err = 0; + /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */ + if (event == NETDEV_UP) + goto out; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + err = __mlxsw_sp_inetaddr_event(dev, event, NULL); +out: + return notifier_from_errno(err); +} + +int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_validator_info *ivi = (struct in_validator_info *) ptr; + struct net_device *dev = ivi->ivi_dev->dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err = 0; + mlxsw_sp = mlxsw_sp_lower_get(dev); if (!mlxsw_sp) goto out; @@ -5357,7 +6315,7 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused, if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - err = __mlxsw_sp_inetaddr_event(dev, event); + err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack); out: return notifier_from_errno(err); } @@ -5386,7 +6344,7 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; - __mlxsw_sp_inetaddr_event(dev, event); + __mlxsw_sp_inetaddr_event(dev, event, NULL); out: rtnl_unlock(); dev_put(dev); @@ -5401,6 +6359,10 @@ int mlxsw_sp_inet6addr_event(struct notifier_block *unused, struct mlxsw_sp_inet6addr_event_work *inet6addr_work; struct net_device *dev = if6->idev->dev; + /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */ + if (event == NETDEV_UP) + return NOTIFY_DONE; + if (!mlxsw_sp_port_dev_lower_find_rcu(dev)) return NOTIFY_DONE; @@ -5417,6 +6379,28 @@ int mlxsw_sp_inet6addr_event(struct notifier_block *unused, return NOTIFY_DONE; } +int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr; + struct net_device *dev = i6vi->i6vi_dev->dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err = 0; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack); +out: + return notifier_from_errno(err); +} + static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, const char *mac, int mtu) { @@ -5463,6 +6447,17 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) if (err) goto err_rif_fdb_op; + if (rif->mtu != dev->mtu) { + struct mlxsw_sp_vr *vr; + + /* The RIF is relevant only to its mr_table instance, as unlike + * unicast routing, in multicast routing a RIF cannot be shared + * between several multicast routing tables. + */ + vr = &mlxsw_sp->router->vrs[rif->vr_id]; + mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu); + } + ether_addr_copy(rif->addr, dev->dev_addr); rif->mtu = dev->mtu; @@ -5478,7 +6473,8 @@ err_rif_edit: } static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev) + struct net_device *l3_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp_rif *rif; @@ -5487,9 +6483,9 @@ static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, */ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); if (rif) - __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN); + __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack); - return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP); + return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack); } static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, @@ -5500,7 +6496,7 @@ static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); if (!rif) return; - __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN); + __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL); } int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, @@ -5516,10 +6512,14 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, case NETDEV_PRECHANGEUPPER: return 0; case NETDEV_CHANGEUPPER: - if (info->linking) - err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev); - else + if (info->linking) { + struct netlink_ext_ack *extack; + + extack = netdev_notifier_info_to_extack(&info->info); + err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack); + } else { mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev); + } break; } @@ -5625,7 +6625,7 @@ static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); } -static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) +u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) { return mlxsw_core_max_ports(mlxsw_sp->core) + 1; } @@ -5826,7 +6826,7 @@ mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) struct mlxsw_sp_vr *ul_vr; int err; - ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id); + ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL); if (IS_ERR(ul_vr)) return PTR_ERR(ul_vr); @@ -5930,6 +6930,64 @@ static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) mlxsw_sp_router_fib_flush(router->mlxsw_sp); } +#ifdef CONFIG_IP_ROUTE_MULTIPATH +static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header) +{ + mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true); +} + +static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field) +{ + mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true); +} + +static void mlxsw_sp_mp4_hash_init(char *recr2_pl) +{ + bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy; + + mlxsw_sp_mp_hash_header_set(recr2_pl, + MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP); + mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP); + mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl); + mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl); + if (only_l3) + return; + mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT); +} + +static void mlxsw_sp_mp6_hash_init(char *recr2_pl) +{ + mlxsw_sp_mp_hash_header_set(recr2_pl, + MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP); + mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP); + mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl); + mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL); + mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER); +} + +static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) +{ + char recr2_pl[MLXSW_REG_RECR2_LEN]; + u32 seed; + + get_random_bytes(&seed, sizeof(seed)); + mlxsw_reg_recr2_pack(recr2_pl, seed); + mlxsw_sp_mp4_hash_init(recr2_pl); + mlxsw_sp_mp6_hash_init(recr2_pl); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl); +} +#else +static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) +{ + return 0; +} +#endif + static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { char rgcr_pl[MLXSW_REG_RGCR_LEN]; @@ -5990,10 +7048,15 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_nexthop_group_ht_init; + INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list); err = mlxsw_sp_lpm_init(mlxsw_sp); if (err) goto err_lpm_init; + err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops); + if (err) + goto err_mr_init; + err = mlxsw_sp_vrs_init(mlxsw_sp); if (err) goto err_vrs_init; @@ -6002,6 +7065,16 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_neigh_init; + mlxsw_sp->router->netevent_nb.notifier_call = + mlxsw_sp_router_netevent_event; + err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb); + if (err) + goto err_register_netevent_notifier; + + err = mlxsw_sp_mp_hash_init(mlxsw_sp); + if (err) + goto err_mp_hash_init; + mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; err = register_fib_notifier(&mlxsw_sp->router->fib_nb, mlxsw_sp_router_fib_dump_flush); @@ -6011,10 +7084,15 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) return 0; err_register_fib_notifier: +err_mp_hash_init: + unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); +err_register_netevent_notifier: mlxsw_sp_neigh_fini(mlxsw_sp); err_neigh_init: mlxsw_sp_vrs_fini(mlxsw_sp); err_vrs_init: + mlxsw_sp_mr_fini(mlxsw_sp); +err_mr_init: mlxsw_sp_lpm_fini(mlxsw_sp); err_lpm_init: rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); @@ -6034,8 +7112,10 @@ err_router_init: void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { unregister_fib_notifier(&mlxsw_sp->router->fib_nb); + unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); + mlxsw_sp_mr_fini(mlxsw_sp); mlxsw_sp_lpm_fini(mlxsw_sp); rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 345fcc4f38e9..1fb82246ce96 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -62,13 +62,18 @@ enum mlxsw_sp_rif_counter_dir { }; struct mlxsw_sp_neigh_entry; +struct mlxsw_sp_nexthop; +struct mlxsw_sp_ipip_entry; struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index); u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); +u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); +u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); +const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir, @@ -100,12 +105,44 @@ mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, bool adding); bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry); -union mlxsw_sp_l3addr -mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, - const struct net_device *ol_dev); -union mlxsw_sp_l3addr -mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, - const struct net_device *ol_dev); -__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev); +int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool recreate_loopback, + bool keep_encap, + bool update_nexthops, + struct netlink_ext_ack *extack); +void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry); +bool +mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr saddr, + u32 ul_tb_id, + const struct mlxsw_sp_ipip_entry *except); +struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, + struct mlxsw_sp_nexthop *nh); +bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh); +unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh); +int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, + u32 *p_adj_size, u32 *p_adj_hash_index); +struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh); +bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); +#define mlxsw_sp_nexthop_for_each(nh, router) \ + for (nh = mlxsw_sp_nexthop_next(router, NULL); nh; \ + nh = mlxsw_sp_nexthop_next(router, nh)) +int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, u64 *p_counter); +int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh); +void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh); +void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh); + +static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1, + const union mlxsw_sp_l3addr *addr2) +{ + return !memcmp(addr1, addr2, sizeof(*addr1)); +} #endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index d39ffbfcc436..7b8548e25ae7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -46,8 +46,10 @@ #include <linux/workqueue.h> #include <linux/jiffies.h> #include <linux/rtnetlink.h> +#include <linux/netlink.h> #include <net/switchdev.h> +#include "spectrum_router.h" #include "spectrum.h" #include "core.h" #include "reg.h" @@ -67,7 +69,6 @@ struct mlxsw_sp_bridge { u32 ageing_time; bool vlan_enabled_exists; struct list_head bridges_list; - struct list_head mids_list; DECLARE_BITMAP(mids_bitmap, MLXSW_SP_MID_MAX); const struct mlxsw_sp_bridge_ops *bridge_8021q_ops; const struct mlxsw_sp_bridge_ops *bridge_8021d_ops; @@ -77,8 +78,10 @@ struct mlxsw_sp_bridge_device { struct net_device *dev; struct list_head list; struct list_head ports_list; + struct list_head mids_list; u8 vlan_enabled:1, - multicast_enabled:1; + multicast_enabled:1, + mrouter:1; const struct mlxsw_sp_bridge_ops *ops; }; @@ -107,7 +110,8 @@ struct mlxsw_sp_bridge_vlan { struct mlxsw_sp_bridge_ops { int (*port_join)(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, - struct mlxsw_sp_port *mlxsw_sp_port); + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack); void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, struct mlxsw_sp_port *mlxsw_sp_port); @@ -121,6 +125,20 @@ mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_bridge_port *bridge_port, u16 fid_index); +static void +mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port); + +static void +mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_device + *bridge_device); + +static void +mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + bool add); + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge, const struct net_device *br_dev) @@ -154,6 +172,7 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, bridge_device->dev = br_dev; bridge_device->vlan_enabled = vlan_enabled; bridge_device->multicast_enabled = br_multicast_enabled(br_dev); + bridge_device->mrouter = br_multicast_router(br_dev); INIT_LIST_HEAD(&bridge_device->ports_list); if (vlan_enabled) { bridge->vlan_enabled_exists = true; @@ -161,6 +180,7 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, } else { bridge_device->ops = bridge->bridge_8021d_ops; } + INIT_LIST_HEAD(&bridge_device->mids_list); list_add(&bridge_device->list, &bridge->bridges_list); return bridge_device; @@ -174,6 +194,7 @@ mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, if (bridge_device->vlan_enabled) bridge->vlan_enabled_exists = false; WARN_ON(!list_empty(&bridge_device->ports_list)); + WARN_ON(!list_empty(&bridge_device->mids_list)); kfree(bridge_device); } @@ -249,7 +270,8 @@ mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device, bridge_port->dev = brport_dev; bridge_port->bridge_device = bridge_device; bridge_port->stp_state = BR_STATE_DISABLED; - bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC; + bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC | + BR_MCAST_FLOOD; INIT_LIST_HEAD(&bridge_port->vlans_list); list_add(&bridge_port->list, &bridge_device->ports_list); bridge_port->ref_count = 1; @@ -455,7 +477,8 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev, &attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: - attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD; + attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD | + BR_MCAST_FLOOD; break; default: return -EOPNOTSUPP; @@ -640,8 +663,18 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, if (err) return err; - memcpy(&bridge_port->flags, &brport_flags, sizeof(brport_flags)); + if (bridge_port->bridge_device->multicast_enabled) + goto out; + err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, + MLXSW_SP_FLOOD_TYPE_MC, + brport_flags & + BR_MCAST_FLOOD); + if (err) + return err; + +out: + memcpy(&bridge_port->flags, &brport_flags, sizeof(brport_flags)); return 0; } @@ -699,10 +732,10 @@ static int mlxsw_sp_port_attr_br_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, return -EINVAL; } -static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, - struct net_device *orig_dev, - bool is_port_mc_router) +static int mlxsw_sp_port_attr_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + struct net_device *orig_dev, + bool is_port_mrouter) { struct mlxsw_sp_bridge_port *bridge_port; int err; @@ -720,15 +753,26 @@ static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port, err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, MLXSW_SP_FLOOD_TYPE_MC, - is_port_mc_router); + is_port_mrouter); if (err) return err; + mlxsw_sp_port_mrouter_update_mdb(mlxsw_sp_port, bridge_port, + is_port_mrouter); out: - bridge_port->mrouter = is_port_mc_router; + bridge_port->mrouter = is_port_mrouter; return 0; } +static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port) +{ + const struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = bridge_port->bridge_device; + return bridge_device->multicast_enabled ? bridge_port->mrouter : + bridge_port->flags & BR_MCAST_FLOOD; +} + static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans, struct net_device *orig_dev, @@ -749,9 +793,15 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, if (!bridge_device) return 0; + if (bridge_device->multicast_enabled != !mc_disabled) { + bridge_device->multicast_enabled = !mc_disabled; + mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp_port, + bridge_device); + } + list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC; - bool member = mc_disabled ? true : bridge_port->mrouter; + bool member = mlxsw_sp_mc_flood(bridge_port); err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, @@ -765,6 +815,60 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } +static int mlxsw_sp_smid_router_port_set(struct mlxsw_sp *mlxsw_sp, + u16 mid_idx, bool add) +{ + char *smid_pl; + int err; + + smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL); + if (!smid_pl) + return -ENOMEM; + + mlxsw_reg_smid_pack(smid_pl, mid_idx, + mlxsw_sp_router_port(mlxsw_sp), add); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); + kfree(smid_pl); + return err; +} + +static void +mlxsw_sp_bridge_mrouter_update_mdb(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + bool add) +{ + struct mlxsw_sp_mid *mid; + + list_for_each_entry(mid, &bridge_device->mids_list, list) + mlxsw_sp_smid_router_port_set(mlxsw_sp, mid->mid, add); +} + +static int +mlxsw_sp_port_attr_br_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + struct net_device *orig_dev, + bool is_mrouter) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_bridge_device *bridge_device; + + if (switchdev_trans_ph_prepare(trans)) + return 0; + + /* It's possible we failed to enslave the port, yet this + * operation is executed due to it being deferred. + */ + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev); + if (!bridge_device) + return 0; + + if (bridge_device->mrouter != is_mrouter) + mlxsw_sp_bridge_mrouter_update_mdb(mlxsw_sp, bridge_device, + is_mrouter); + bridge_device->mrouter = is_mrouter; + return 0; +} + static int mlxsw_sp_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) @@ -793,15 +897,20 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, attr->u.vlan_filtering); break; case SWITCHDEV_ATTR_ID_PORT_MROUTER: - err = mlxsw_sp_port_attr_mc_router_set(mlxsw_sp_port, trans, - attr->orig_dev, - attr->u.mrouter); + err = mlxsw_sp_port_attr_mrouter_set(mlxsw_sp_port, trans, + attr->orig_dev, + attr->u.mrouter); break; case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: err = mlxsw_sp_port_mc_disabled_set(mlxsw_sp_port, trans, attr->orig_dev, attr->u.mc_disabled); break; + case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: + err = mlxsw_sp_port_attr_br_mrouter_set(mlxsw_sp_port, trans, + attr->orig_dev, + attr->u.mrouter); + break; default: err = -EOPNOTSUPP; break; @@ -810,14 +919,6 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, return err; } -static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port) -{ - const struct mlxsw_sp_bridge_device *bridge_device; - - bridge_device = bridge_port->bridge_device; - return !bridge_device->multicast_enabled ? true : bridge_port->mrouter; -} - static int mlxsw_sp_port_vlan_fid_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, struct mlxsw_sp_bridge_port *bridge_port) @@ -955,24 +1056,28 @@ mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) struct mlxsw_sp_bridge_vlan *bridge_vlan; struct mlxsw_sp_bridge_port *bridge_port; u16 vid = mlxsw_sp_port_vlan->vid; - bool last; + bool last_port, last_vlan; if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021Q && mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021D)) return; bridge_port = mlxsw_sp_port_vlan->bridge_port; + last_vlan = list_is_singular(&bridge_port->vlans_list); bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid); - last = list_is_singular(&bridge_vlan->port_vlan_list); + last_port = list_is_singular(&bridge_vlan->port_vlan_list); list_del(&mlxsw_sp_port_vlan->bridge_vlan_node); mlxsw_sp_bridge_vlan_put(bridge_vlan); mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_DISABLED); mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false); - if (last) + if (last_port) mlxsw_sp_bridge_port_fdb_flush(mlxsw_sp_port->mlxsw_sp, bridge_port, mlxsw_sp_fid_index(fid)); + if (last_vlan) + mlxsw_sp_bridge_port_mdb_flush(mlxsw_sp_port, bridge_port); + mlxsw_sp_port_vlan_fid_leave(mlxsw_sp_port_vlan); mlxsw_sp_bridge_port_put(mlxsw_sp_port->mlxsw_sp->bridge, bridge_port); @@ -1182,7 +1287,7 @@ mlxsw_sp_port_fdb_set(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, - u16 fid, u16 mid, bool adding) + u16 fid, u16 mid_idx, bool adding) { char *sfd_pl; int err; @@ -1193,16 +1298,16 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); mlxsw_reg_sfd_mc_pack(sfd_pl, 0, addr, fid, - MLXSW_REG_SFD_REC_ACTION_NOP, mid); + MLXSW_REG_SFD_REC_ACTION_NOP, mid_idx); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); return err; } -static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, - bool add, bool clear_all_ports) +static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx, + long *ports_bitmap, + bool set_router_port) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *smid_pl; int err, i; @@ -1210,66 +1315,208 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, if (!smid_pl) return -ENOMEM; - mlxsw_reg_smid_pack(smid_pl, mid, mlxsw_sp_port->local_port, add); - if (clear_all_ports) { - for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) - if (mlxsw_sp->ports[i]) - mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); + mlxsw_reg_smid_pack(smid_pl, mid_idx, 0, false); + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) { + if (mlxsw_sp->ports[i]) + mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); } + + mlxsw_reg_smid_port_mask_set(smid_pl, + mlxsw_sp_router_port(mlxsw_sp), 1); + + for_each_set_bit(i, ports_bitmap, mlxsw_core_max_ports(mlxsw_sp->core)) + mlxsw_reg_smid_port_set(smid_pl, i, 1); + + mlxsw_reg_smid_port_set(smid_pl, mlxsw_sp_router_port(mlxsw_sp), + set_router_port); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); + kfree(smid_pl); + return err; +} + +static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 mid_idx, bool add) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char *smid_pl; + int err; + + smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL); + if (!smid_pl) + return -ENOMEM; + + mlxsw_reg_smid_pack(smid_pl, mid_idx, mlxsw_sp_port->local_port, add); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); kfree(smid_pl); return err; } -static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp, - const unsigned char *addr, - u16 fid) +static struct +mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp_bridge_device *bridge_device, + const unsigned char *addr, + u16 fid) { struct mlxsw_sp_mid *mid; - list_for_each_entry(mid, &mlxsw_sp->bridge->mids_list, list) { + list_for_each_entry(mid, &bridge_device->mids_list, list) { if (ether_addr_equal(mid->addr, addr) && mid->fid == fid) return mid; } return NULL; } -static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, - const unsigned char *addr, - u16 fid) +static void +mlxsw_sp_bridge_port_get_ports_bitmap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_port *bridge_port, + unsigned long *ports_bitmap) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u64 max_lag_members, i; + int lag_id; + + if (!bridge_port->lagged) { + set_bit(bridge_port->system_port, ports_bitmap); + } else { + max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_LAG_MEMBERS); + lag_id = bridge_port->lag_id; + for (i = 0; i < max_lag_members; i++) { + mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, + lag_id, i); + if (mlxsw_sp_port) + set_bit(mlxsw_sp_port->local_port, + ports_bitmap); + } + } +} + +static void +mlxsw_sp_mc_get_mrouters_bitmap(unsigned long *flood_bitmap, + struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp *mlxsw_sp) { - struct mlxsw_sp_mid *mid; + struct mlxsw_sp_bridge_port *bridge_port; + + list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { + if (bridge_port->mrouter) { + mlxsw_sp_bridge_port_get_ports_bitmap(mlxsw_sp, + bridge_port, + flood_bitmap); + } + } +} + +static bool +mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mid *mid, + struct mlxsw_sp_bridge_device *bridge_device) +{ + long *flood_bitmap; + int num_of_ports; + int alloc_size; u16 mid_idx; + int err; mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap, MLXSW_SP_MID_MAX); if (mid_idx == MLXSW_SP_MID_MAX) - return NULL; + return false; + + num_of_ports = mlxsw_core_max_ports(mlxsw_sp->core); + alloc_size = sizeof(long) * BITS_TO_LONGS(num_of_ports); + flood_bitmap = kzalloc(alloc_size, GFP_KERNEL); + if (!flood_bitmap) + return false; + + bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports); + mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp); + + mid->mid = mid_idx; + err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap, + bridge_device->mrouter); + kfree(flood_bitmap); + if (err) + return false; + + err = mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid_idx, + true); + if (err) + return false; + + set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap); + mid->in_hw = true; + return true; +} + +static int mlxsw_sp_mc_remove_mdb_entry(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mid *mid) +{ + if (!mid->in_hw) + return 0; + + clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); + mid->in_hw = false; + return mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid->mid, + false); +} + +static struct +mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + const unsigned char *addr, + u16 fid) +{ + struct mlxsw_sp_mid *mid; + size_t alloc_size; mid = kzalloc(sizeof(*mid), GFP_KERNEL); if (!mid) return NULL; - set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap); + alloc_size = sizeof(unsigned long) * + BITS_TO_LONGS(mlxsw_core_max_ports(mlxsw_sp->core)); + + mid->ports_in_mid = kzalloc(alloc_size, GFP_KERNEL); + if (!mid->ports_in_mid) + goto err_ports_in_mid_alloc; + ether_addr_copy(mid->addr, addr); mid->fid = fid; - mid->mid = mid_idx; - mid->ref_count = 0; - list_add_tail(&mid->list, &mlxsw_sp->bridge->mids_list); + mid->in_hw = false; + + if (!bridge_device->multicast_enabled) + goto out; + if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, bridge_device)) + goto err_write_mdb_entry; + +out: + list_add_tail(&mid->list, &bridge_device->mids_list); return mid; + +err_write_mdb_entry: + kfree(mid->ports_in_mid); +err_ports_in_mid_alloc: + kfree(mid); + return NULL; } -static int __mlxsw_sp_mc_dec_ref(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_mid *mid) +static int mlxsw_sp_port_remove_from_mid(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mid *mid) { - if (--mid->ref_count == 0) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + int err = 0; + + clear_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); + if (bitmap_empty(mid->ports_in_mid, + mlxsw_core_max_ports(mlxsw_sp->core))) { + err = mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid); list_del(&mid->list); - clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); + kfree(mid->ports_in_mid); kfree(mid); - return 1; } - return 0; + return err; } static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, @@ -1302,39 +1549,72 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid_index); + mid = __mlxsw_sp_mc_get(bridge_device, mdb->addr, fid_index); if (!mid) { - mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, fid_index); + mid = __mlxsw_sp_mc_alloc(mlxsw_sp, bridge_device, mdb->addr, + fid_index); if (!mid) { netdev_err(dev, "Unable to allocate MC group\n"); return -ENOMEM; } } - mid->ref_count++; + set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); + + if (!bridge_device->multicast_enabled) + return 0; - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true, - mid->ref_count == 1); + if (bridge_port->mrouter) + return 0; + + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true); if (err) { netdev_err(dev, "Unable to set SMID\n"); goto err_out; } - if (mid->ref_count == 1) { - err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, - mid->mid, true); - if (err) { - netdev_err(dev, "Unable to set MC SFD\n"); - goto err_out; - } - } - return 0; err_out: - __mlxsw_sp_mc_dec_ref(mlxsw_sp, mid); + mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); return err; } +static void +mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_device + *bridge_device) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_mid *mid; + bool mc_enabled; + + mc_enabled = bridge_device->multicast_enabled; + + list_for_each_entry(mid, &bridge_device->mids_list, list) { + if (mc_enabled) + mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, + bridge_device); + else + mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid); + } +} + +static void +mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + bool add) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_mid *mid; + + bridge_device = bridge_port->bridge_device; + + list_for_each_entry(mid, &bridge_device->mids_list, list) { + if (!test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid)) + mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, add); + } +} + static int mlxsw_sp_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) @@ -1399,6 +1679,30 @@ static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } +static int +__mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_mid *mid) +{ + struct net_device *dev = mlxsw_sp_port->dev; + int err; + + if (bridge_port->bridge_device->multicast_enabled) { + if (bridge_port->bridge_device->multicast_enabled) { + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, + false); + if (err) + netdev_err(dev, "Unable to remove port from SMID\n"); + } + } + + err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); + if (err) + netdev_err(dev, "Unable to remove MC SFD\n"); + + return err; +} + static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_mdb *mdb) { @@ -1410,8 +1714,6 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_mid *mid; u16 fid_index; - u16 mid_idx; - int err = 0; bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); if (!bridge_port) @@ -1426,25 +1728,33 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid_index); + mid = __mlxsw_sp_mc_get(bridge_device, mdb->addr, fid_index); if (!mid) { netdev_err(dev, "Unable to remove port from MC DB\n"); return -EINVAL; } - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false, false); - if (err) - netdev_err(dev, "Unable to remove port from SMID\n"); + return __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, mid); +} - mid_idx = mid->mid; - if (__mlxsw_sp_mc_dec_ref(mlxsw_sp, mid)) { - err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, - mid_idx, false); - if (err) - netdev_err(dev, "Unable to remove MC SFD\n"); - } +static void +mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_mid *mid, *tmp; - return err; + bridge_device = bridge_port->bridge_device; + + list_for_each_entry_safe(mid, tmp, &bridge_device->mids_list, list) { + if (test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid)) { + __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, + mid); + } else if (bridge_device->multicast_enabled && + bridge_port->mrouter) { + mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); + } + } } static int mlxsw_sp_port_obj_del(struct net_device *dev, @@ -1497,12 +1807,15 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = { static int mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, - struct mlxsw_sp_port *mlxsw_sp_port) + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - if (is_vlan_dev(bridge_port->dev)) + if (is_vlan_dev(bridge_port->dev)) { + NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge"); return -EINVAL; + } mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1); if (WARN_ON(!mlxsw_sp_port_vlan)) @@ -1559,13 +1872,16 @@ mlxsw_sp_port_is_br_member(const struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, - struct mlxsw_sp_port *mlxsw_sp_port) + struct mlxsw_sp_port *mlxsw_sp_port, + struct netlink_ext_ack *extack) { struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; u16 vid; - if (!is_vlan_dev(bridge_port->dev)) + if (!is_vlan_dev(bridge_port->dev)) { + NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge"); return -EINVAL; + } vid = vlan_dev_vlan_id(bridge_port->dev); mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); @@ -1573,7 +1889,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, return -EINVAL; if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) { - netdev_err(mlxsw_sp_port->dev, "Can't bridge VLAN uppers of the same port\n"); + NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port"); return -EINVAL; } @@ -1616,7 +1932,8 @@ static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = { int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *brport_dev, - struct net_device *br_dev) + struct net_device *br_dev, + struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_bridge_device *bridge_device; @@ -1629,7 +1946,7 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, bridge_device = bridge_port->bridge_device; err = bridge_device->ops->port_join(bridge_device, bridge_port, - mlxsw_sp_port); + mlxsw_sp_port, extack); if (err) goto err_port_join; @@ -1981,17 +2298,6 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp) } -static void mlxsw_sp_mids_fini(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_sp_mid *mid, *tmp; - - list_for_each_entry_safe(mid, tmp, &mlxsw_sp->bridge->mids_list, list) { - list_del(&mid->list); - clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); - kfree(mid); - } -} - int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_bridge *bridge; @@ -2003,7 +2309,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) bridge->mlxsw_sp = mlxsw_sp; INIT_LIST_HEAD(&mlxsw_sp->bridge->bridges_list); - INIT_LIST_HEAD(&mlxsw_sp->bridge->mids_list); bridge->bridge_8021q_ops = &mlxsw_sp_bridge_8021q_ops; bridge->bridge_8021d_ops = &mlxsw_sp_bridge_8021d_ops; @@ -2014,7 +2319,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp_fdb_fini(mlxsw_sp); - mlxsw_sp_mids_fini(mlxsw_sp); WARN_ON(!list_empty(&mlxsw_sp->bridge->bridges_list)); kfree(mlxsw_sp->bridge); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index f396a1fef633..ec6cef8267ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -62,6 +62,8 @@ enum { MLXSW_TRAP_ID_TTLERROR = 0x53, MLXSW_TRAP_ID_LBERROR = 0x54, MLXSW_TRAP_ID_IPV4_OSPF = 0x55, + MLXSW_TRAP_ID_IPV4_PIM = 0x58, + MLXSW_TRAP_ID_RPF = 0x5C, MLXSW_TRAP_ID_IP2ME = 0x5F, MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60, MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61, @@ -89,6 +91,10 @@ enum { MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, MLXSW_TRAP_ID_ACL0 = 0x1C0, + /* Multicast trap used for routes with trap action */ + MLXSW_TRAP_ID_ACL1 = 0x1C1, + /* Multicast trap used for routes with trap-and-forward action */ + MLXSW_TRAP_ID_ACL2 = 0x1C2, MLXSW_TRAP_ID_MAX = 0x1FF }; diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index e798fbe08600..52207508744c 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -4338,11 +4338,11 @@ static void ksz_stop_timer(struct ksz_timer_info *info) } static void ksz_init_timer(struct ksz_timer_info *info, int period, - void (*function)(unsigned long), void *data) + void (*function)(struct timer_list *)) { info->max = 0; info->period = period; - setup_timer(&info->timer, function, (unsigned long)data); + timer_setup(&info->timer, function, 0); } static void ksz_update_timer(struct ksz_timer_info *info) @@ -6689,9 +6689,9 @@ static void mib_read_work(struct work_struct *work) } } -static void mib_monitor(unsigned long ptr) +static void mib_monitor(struct timer_list *t) { - struct dev_info *hw_priv = (struct dev_info *) ptr; + struct dev_info *hw_priv = from_timer(hw_priv, t, mib_timer_info.timer); mib_read_work(&hw_priv->mib_read); @@ -6716,10 +6716,10 @@ static void mib_monitor(unsigned long ptr) * * This routine is run in a kernel timer to monitor the network device. */ -static void dev_monitor(unsigned long ptr) +static void dev_monitor(struct timer_list *t) { - struct net_device *dev = (struct net_device *) ptr; - struct dev_priv *priv = netdev_priv(dev); + struct dev_priv *priv = from_timer(priv, t, monitor_timer_info.timer); + struct net_device *dev = priv->mii_if.dev; struct dev_info *hw_priv = priv->adapter; struct ksz_hw *hw = &hw_priv->hw; struct ksz_port *port = &priv->port; @@ -6789,7 +6789,7 @@ static int __init netdev_init(struct net_device *dev) /* 500 ms timeout */ ksz_init_timer(&priv->monitor_timer_info, 500 * HZ / 1000, - dev_monitor, dev); + dev_monitor); /* 500 ms timeout */ dev->watchdog_timeo = HZ / 2; @@ -7065,7 +7065,7 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id) /* 500 ms timeout */ ksz_init_timer(&hw_priv->mib_timer_info, 500 * HZ / 1000, - mib_monitor, hw_priv); + mib_monitor); for (i = 0; i < hw->dev_count; i++) { dev = alloc_etherdev(sizeof(struct dev_priv)); diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 18af2a23a933..b9a1a9f999ea 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -610,7 +610,7 @@ static int netdev_open(struct net_device *dev); static void do_cable_magic(struct net_device *dev); static void undo_cable_magic(struct net_device *dev); static void check_link(struct net_device *dev); -static void netdev_timer(unsigned long data); +static void netdev_timer(struct timer_list *t); static void dump_ring(struct net_device *dev); static void ns_tx_timeout(struct net_device *dev); static int alloc_ring(struct net_device *dev); @@ -1571,10 +1571,8 @@ static int netdev_open(struct net_device *dev) dev->name, (int)readl(ioaddr + ChipCmd)); /* Set the timer to check for link beat. */ - init_timer(&np->timer); + timer_setup(&np->timer, netdev_timer, 0); np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ); - np->timer.data = (unsigned long)dev; - np->timer.function = netdev_timer; /* timer handler */ add_timer(&np->timer); return 0; @@ -1789,10 +1787,10 @@ static void init_registers(struct net_device *dev) * this check via dspcfg_workaround sysfs option. * 3) check of death of the RX path due to OOM */ -static void netdev_timer(unsigned long data) +static void netdev_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = from_timer(np, t, timer); + struct net_device *dev = np->dev; void __iomem * ioaddr = ns_ioaddr(dev); int next_tick = NATSEMI_TIMER_FREQ; const int irq = np->pci_dev->irq; diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 729095db3e08..958fced4dacf 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1600,10 +1600,10 @@ static void ns83820_tx_timeout(struct net_device *ndev) spin_unlock_irqrestore(&dev->tx_lock, flags); } -static void ns83820_tx_watch(unsigned long data) +static void ns83820_tx_watch(struct timer_list *t) { - struct net_device *ndev = (void *)data; - struct ns83820 *dev = PRIV(ndev); + struct ns83820 *dev = from_timer(dev, t, tx_watchdog); + struct net_device *ndev = dev->ndev; #if defined(DEBUG) printk("ns83820_tx_watch: %u %u %d\n", @@ -1652,9 +1652,7 @@ static int ns83820_open(struct net_device *ndev) writel(0, dev->base + TXDP_HI); writel(desc, dev->base + TXDP); - init_timer(&dev->tx_watchdog); - dev->tx_watchdog.data = (unsigned long)ndev; - dev->tx_watchdog.function = ns83820_tx_watch; + timer_setup(&dev->tx_watchdog, ns83820_tx_watch, 0); mod_timer(&dev->tx_watchdog, jiffies + 2*HZ); netif_start_queue(ndev); /* FIXME: wait for phy to come up */ diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 462eda926b1c..b8983e73265a 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -337,12 +337,6 @@ static const char ethtool_driver_stats_keys[][ETH_GSTRING_LEN] = { #define S2IO_TEST_LEN ARRAY_SIZE(s2io_gstrings) #define S2IO_STRINGS_LEN (S2IO_TEST_LEN * ETH_GSTRING_LEN) -#define S2IO_TIMER_CONF(timer, handle, arg, exp) \ - init_timer(&timer); \ - timer.function = handle; \ - timer.data = (unsigned long)arg; \ - mod_timer(&timer, (jiffies + exp)) \ - /* copy mac addr to def_mac_addr array */ static void do_s2io_copy_mac_addr(struct s2io_nic *sp, int offset, u64 mac_addr) { @@ -4193,9 +4187,9 @@ pci_map_failed: } static void -s2io_alarm_handle(unsigned long data) +s2io_alarm_handle(struct timer_list *t) { - struct s2io_nic *sp = (struct s2io_nic *)data; + struct s2io_nic *sp = from_timer(sp, t, alarm_timer); struct net_device *dev = sp->dev; s2io_handle_errors(dev); @@ -7186,7 +7180,8 @@ static int s2io_card_up(struct s2io_nic *sp) return -ENODEV; } - S2IO_TIMER_CONF(sp->alarm_timer, s2io_alarm_handle, sp, (HZ/2)); + timer_setup(&sp->alarm_timer, s2io_alarm_handle, 0); + mod_timer(&sp->alarm_timer, jiffies + HZ / 2); set_bit(__S2IO_STATE_CARD_UP, &sp->state); diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h index 6c5997dc8afc..1a24a7218794 100644 --- a/drivers/net/ethernet/neterion/s2io.h +++ b/drivers/net/ethernet/neterion/s2io.h @@ -1094,7 +1094,7 @@ static int s2io_poll_msix(struct napi_struct *napi, int budget); static int s2io_poll_inta(struct napi_struct *napi, int budget); static void s2io_init_pci(struct s2io_nic * sp); static int do_s2io_prog_unicast(struct net_device *dev, u8 *addr); -static void s2io_alarm_handle(unsigned long data); +static void s2io_alarm_handle(struct timer_list *t); static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id); static irqreturn_t diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 5dd5f61e1114..fe7e0e1dd01d 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -1122,7 +1122,6 @@ static void vxge_set_multicast(struct net_device *dev) struct netdev_hw_addr *ha; struct vxgedev *vdev; int i, mcast_cnt = 0; - struct __vxge_hw_device *hldev; struct vxge_vpath *vpath; enum vxge_hw_status status = VXGE_HW_OK; struct macInfo mac_info; @@ -1136,7 +1135,6 @@ static void vxge_set_multicast(struct net_device *dev) "%s:%d", __func__, __LINE__); vdev = netdev_priv(dev); - hldev = vdev->devh; if (unlikely(!is_vxge_card_up(vdev))) return; @@ -1283,7 +1281,6 @@ static int vxge_set_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; struct vxgedev *vdev; - struct __vxge_hw_device *hldev; enum vxge_hw_status status = VXGE_HW_OK; struct macInfo mac_info_new, mac_info_old; int vpath_idx = 0; @@ -1291,7 +1288,6 @@ static int vxge_set_mac_addr(struct net_device *dev, void *p) vxge_debug_entryexit(VXGE_TRACE, "%s:%d", __func__, __LINE__); vdev = netdev_priv(dev); - hldev = vdev->devh; if (!is_valid_ether_addr(addr->sa_data)) return -EINVAL; @@ -2177,7 +2173,6 @@ static void adaptive_coalesce_rx_interrupts(struct vxge_ring *ring) */ static irqreturn_t vxge_isr_napi(int irq, void *dev_id) { - struct net_device *dev; struct __vxge_hw_device *hldev; u64 reason; enum vxge_hw_status status; @@ -2185,7 +2180,6 @@ static irqreturn_t vxge_isr_napi(int irq, void *dev_id) vxge_debug_intr(VXGE_TRACE, "%s:%d", __func__, __LINE__); - dev = vdev->ndev; hldev = pci_get_drvdata(vdev->pdev); if (pci_channel_offline(vdev->pdev)) @@ -2597,9 +2591,9 @@ INTA_MODE: return VXGE_HW_OK; } -static void vxge_poll_vp_reset(unsigned long data) +static void vxge_poll_vp_reset(struct timer_list *t) { - struct vxgedev *vdev = (struct vxgedev *)data; + struct vxgedev *vdev = from_timer(vdev, t, vp_reset_timer); int i, j = 0; for (i = 0; i < vdev->no_of_vpath; i++) { @@ -2616,9 +2610,9 @@ static void vxge_poll_vp_reset(unsigned long data) mod_timer(&vdev->vp_reset_timer, jiffies + HZ / 2); } -static void vxge_poll_vp_lockup(unsigned long data) +static void vxge_poll_vp_lockup(struct timer_list *t) { - struct vxgedev *vdev = (struct vxgedev *)data; + struct vxgedev *vdev = from_timer(vdev, t, vp_lockup_timer); enum vxge_hw_status status = VXGE_HW_OK; struct vxge_vpath *vpath; struct vxge_ring *ring; @@ -2713,14 +2707,13 @@ static int vxge_open(struct net_device *dev) struct vxge_vpath *vpath; int ret = 0; int i; - u64 val64, function_mode; + u64 val64; vxge_debug_entryexit(VXGE_TRACE, "%s: %s:%d", dev->name, __func__, __LINE__); vdev = netdev_priv(dev); hldev = pci_get_drvdata(vdev->pdev); - function_mode = vdev->config.device_hw_info.function_mode; /* make sure you have link off by default every time Nic is * initialized */ @@ -2858,12 +2851,12 @@ static int vxge_open(struct net_device *dev) vdev->config.rx_pause_enable); if (vdev->vp_reset_timer.function == NULL) - vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset, vdev, + vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset, HZ / 2); /* There is no need to check for RxD leak and RxD lookup on Titan1A */ if (vdev->titan1 && vdev->vp_lockup_timer.function == NULL) - vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup, vdev, + vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup, HZ / 2); set_bit(__VXGE_STATE_CARD_UP, &vdev->state); diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.h b/drivers/net/ethernet/neterion/vxge/vxge-main.h index 3a79d93b8445..59a57ff5e96a 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.h +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.h @@ -417,12 +417,10 @@ struct vxge_tx_priv { module_param(p, int, 0) static inline -void vxge_os_timer(struct timer_list *timer, void (*func)(unsigned long data), - struct vxgedev *vdev, unsigned long timeout) +void vxge_os_timer(struct timer_list *timer, void (*func)(struct timer_list *), + unsigned long timeout) { - init_timer(timer); - timer->function = func; - timer->data = (unsigned long)vdev; + timer_setup(timer, func, 0); mod_timer(timer, jiffies + timeout); } diff --git a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c index 5f630a24e491..0c3b5dea2858 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c @@ -1209,9 +1209,6 @@ void vxge_hw_ring_rxd_pre_post(struct __vxge_hw_ring *ring, void *rxdh) void vxge_hw_ring_rxd_post_post(struct __vxge_hw_ring *ring, void *rxdh) { struct vxge_hw_ring_rxd_1 *rxdp = (struct vxge_hw_ring_rxd_1 *)rxdh; - struct __vxge_hw_channel *channel; - - channel = &ring->channel; rxdp->control_0 = VXGE_HW_RING_RXD_LIST_OWN_ADAPTER; @@ -1359,11 +1356,8 @@ exit: enum vxge_hw_status vxge_hw_ring_handle_tcode( struct __vxge_hw_ring *ring, void *rxdh, u8 t_code) { - struct __vxge_hw_channel *channel; enum vxge_hw_status status = VXGE_HW_OK; - channel = &ring->channel; - /* If the t_code is not supported and if the * t_code is other than 0x5 (unparseable packet * such as unknown UPV6 header), Drop it !!! @@ -1399,10 +1393,6 @@ exit: static void __vxge_hw_non_offload_db_post(struct __vxge_hw_fifo *fifo, u64 txdl_ptr, u32 num_txds, u32 no_snoop) { - struct __vxge_hw_channel *channel; - - channel = &fifo->channel; - writeq(VXGE_HW_NODBW_TYPE(VXGE_HW_NODBW_TYPE_NODBW) | VXGE_HW_NODBW_LAST_TXD_NUMBER(num_txds) | VXGE_HW_NODBW_GET_NO_SNOOP(no_snoop), @@ -1506,9 +1496,6 @@ void vxge_hw_fifo_txdl_buffer_set(struct __vxge_hw_fifo *fifo, { struct __vxge_hw_fifo_txdl_priv *txdl_priv; struct vxge_hw_fifo_txd *txdp, *txdp_last; - struct __vxge_hw_channel *channel; - - channel = &fifo->channel; txdl_priv = __vxge_hw_fifo_txdl_priv(fifo, txdlh); txdp = (struct vxge_hw_fifo_txd *)txdlh + txdl_priv->frags; @@ -1554,9 +1541,6 @@ void vxge_hw_fifo_txdl_post(struct __vxge_hw_fifo *fifo, void *txdlh) struct __vxge_hw_fifo_txdl_priv *txdl_priv; struct vxge_hw_fifo_txd *txdp_last; struct vxge_hw_fifo_txd *txdp_first; - struct __vxge_hw_channel *channel; - - channel = &fifo->channel; txdl_priv = __vxge_hw_fifo_txdl_priv(fifo, txdlh); txdp_first = txdlh; @@ -1672,10 +1656,7 @@ enum vxge_hw_status vxge_hw_fifo_handle_tcode(struct __vxge_hw_fifo *fifo, void *txdlh, enum vxge_hw_fifo_tcode t_code) { - struct __vxge_hw_channel *channel; - enum vxge_hw_status status = VXGE_HW_OK; - channel = &fifo->channel; if (((t_code & 0x7) < 0) || ((t_code & 0x7) > 0x4)) { status = VXGE_HW_ERR_INVALID_TCODE; diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 3cafa3d15082..24c4408b5734 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -15,6 +15,7 @@ nfp-objs := \ nfpcore/nfp_resource.o \ nfpcore/nfp_rtsym.o \ nfpcore/nfp_target.o \ + nfp_asm.o \ nfp_app.o \ nfp_app_nic.o \ nfp_devlink.o \ @@ -27,8 +28,6 @@ nfp-objs := \ nfp_net_sriov.o \ nfp_netvf_main.o \ nfp_port.o \ - bpf/main.o \ - bpf/offload.o \ nic/main.o ifeq ($(CONFIG_NFP_APP_FLOWER),y) @@ -38,11 +37,14 @@ nfp-objs += \ flower/main.o \ flower/match.o \ flower/metadata.o \ - flower/offload.o + flower/offload.o \ + flower/tunnel_conf.o endif ifeq ($(CONFIG_BPF_SYSCALL),y) nfp-objs += \ + bpf/main.o \ + bpf/offload.o \ bpf/verifier.o \ bpf/jit.o endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 239dfbe8a0a1..995e95410b11 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -77,17 +77,6 @@ nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return meta->l.prev != &nfp_prog->insns; } -static void nfp_prog_free(struct nfp_prog *nfp_prog) -{ - struct nfp_insn_meta *meta, *tmp; - - list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { - list_del(&meta->l); - kfree(meta); - } - kfree(nfp_prog); -} - static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) { if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) { @@ -110,150 +99,7 @@ nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) return offset - nfp_prog->start_off; } -/* --- SW reg --- */ -struct nfp_insn_ur_regs { - enum alu_dst_ab dst_ab; - u16 dst; - u16 areg, breg; - bool swap; - bool wr_both; -}; - -struct nfp_insn_re_regs { - enum alu_dst_ab dst_ab; - u8 dst; - u8 areg, breg; - bool swap; - bool wr_both; - bool i8; -}; - -static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst) -{ - u16 val = FIELD_GET(NN_REG_VAL, swreg); - - switch (FIELD_GET(NN_REG_TYPE, swreg)) { - case NN_REG_GPR_A: - case NN_REG_GPR_B: - case NN_REG_GPR_BOTH: - return val; - case NN_REG_NNR: - return UR_REG_NN | val; - case NN_REG_XFER: - return UR_REG_XFR | val; - case NN_REG_IMM: - if (val & ~0xff) { - pr_err("immediate too large\n"); - return 0; - } - return UR_REG_IMM_encode(val); - case NN_REG_NONE: - return is_dst ? UR_REG_NO_DST : REG_NONE; - default: - pr_err("unrecognized reg encoding %08x\n", swreg); - return 0; - } -} - -static int -swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg) -{ - memset(reg, 0, sizeof(*reg)); - - /* Decode destination */ - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) - return -EFAULT; - - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) - reg->dst_ab = ALU_DST_B; - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) - reg->wr_both = true; - reg->dst = nfp_swreg_to_unreg(dst, true); - - /* Decode source operands */ - if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) - return -EFAULT; - - if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || - FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { - reg->areg = nfp_swreg_to_unreg(rreg, false); - reg->breg = nfp_swreg_to_unreg(lreg, false); - reg->swap = true; - } else { - reg->areg = nfp_swreg_to_unreg(lreg, false); - reg->breg = nfp_swreg_to_unreg(rreg, false); - } - - return 0; -} - -static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8) -{ - u16 val = FIELD_GET(NN_REG_VAL, swreg); - - switch (FIELD_GET(NN_REG_TYPE, swreg)) { - case NN_REG_GPR_A: - case NN_REG_GPR_B: - case NN_REG_GPR_BOTH: - return val; - case NN_REG_XFER: - return RE_REG_XFR | val; - case NN_REG_IMM: - if (val & ~(0x7f | has_imm8 << 7)) { - pr_err("immediate too large\n"); - return 0; - } - *i8 = val & 0x80; - return RE_REG_IMM_encode(val & 0x7f); - case NN_REG_NONE: - return is_dst ? RE_REG_NO_DST : REG_NONE; - default: - pr_err("unrecognized reg encoding\n"); - return 0; - } -} - -static int -swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg, - bool has_imm8) -{ - memset(reg, 0, sizeof(*reg)); - - /* Decode destination */ - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) - return -EFAULT; - - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) - reg->dst_ab = ALU_DST_B; - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) - reg->wr_both = true; - reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); - - /* Decode source operands */ - if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) - return -EFAULT; - - if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || - FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { - reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); - reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); - reg->swap = true; - } else { - reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); - reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); - } - - return 0; -} - /* --- Emitters --- */ -static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { - [CMD_TGT_WRITE8] = { 0x00, 0x42 }, - [CMD_TGT_READ8] = { 0x01, 0x43 }, - [CMD_TGT_READ_LE] = { 0x01, 0x40 }, - [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, -}; - static void __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync) @@ -281,7 +127,7 @@ __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, static void emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, - u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync) + u8 mode, u8 xfer, swreg lreg, swreg rreg, u8 size, bool sync) { struct nfp_insn_re_regs reg; int err; @@ -296,6 +142,11 @@ emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, nfp_prog->error = -EFAULT; return; } + if (reg.dst_lmextn || reg.src_lmextn) { + pr_err("cmd can't use LMextn\n"); + nfp_prog->error = -EFAULT; + return; + } __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync); } @@ -340,49 +191,10 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) } static void -__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8, - u8 byte, bool equal, u16 addr, u8 defer) -{ - u16 addr_lo, addr_hi; - u64 insn; - - addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO)); - addr_hi = addr != addr_lo; - - insn = OP_BBYTE_BASE | - FIELD_PREP(OP_BB_A_SRC, areg) | - FIELD_PREP(OP_BB_BYTE, byte) | - FIELD_PREP(OP_BB_B_SRC, breg) | - FIELD_PREP(OP_BB_I8, imm8) | - FIELD_PREP(OP_BB_EQ, equal) | - FIELD_PREP(OP_BB_DEFBR, defer) | - FIELD_PREP(OP_BB_ADDR_LO, addr_lo) | - FIELD_PREP(OP_BB_ADDR_HI, addr_hi); - - nfp_prog_push(nfp_prog, insn); -} - -static void -emit_br_byte_neq(struct nfp_prog *nfp_prog, - u32 dst, u8 imm, u8 byte, u16 addr, u8 defer) -{ - struct nfp_insn_re_regs reg; - int err; - - err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), ®, true); - if (err) { - nfp_prog->error = err; - return; - } - - __emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr, - defer); -} - -static void __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, enum immed_width width, bool invert, - enum immed_shift shift, bool wr_both) + enum immed_shift shift, bool wr_both, + bool dst_lmextn, bool src_lmextn) { u64 insn; @@ -393,19 +205,21 @@ __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, FIELD_PREP(OP_IMMED_WIDTH, width) | FIELD_PREP(OP_IMMED_INV, invert) | FIELD_PREP(OP_IMMED_SHIFT, shift) | - FIELD_PREP(OP_IMMED_WR_AB, wr_both); + FIELD_PREP(OP_IMMED_WR_AB, wr_both) | + FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn); nfp_prog_push(nfp_prog, insn); } static void -emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm, +emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm, enum immed_width width, bool invert, enum immed_shift shift) { struct nfp_insn_ur_regs reg; int err; - if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) { + if (swreg_type(dst) == NN_REG_IMM) { nfp_prog->error = -EFAULT; return; } @@ -417,13 +231,15 @@ emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm, } __emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width, - invert, shift, reg.wr_both); + invert, shift, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); } static void __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, enum shf_sc sc, u8 shift, - u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both) + u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both, + bool dst_lmextn, bool src_lmextn) { u64 insn; @@ -445,14 +261,16 @@ __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, FIELD_PREP(OP_SHF_SHIFT, shift) | FIELD_PREP(OP_SHF_OP, op) | FIELD_PREP(OP_SHF_DST_AB, dst_ab) | - FIELD_PREP(OP_SHF_WR_AB, wr_both); + FIELD_PREP(OP_SHF_WR_AB, wr_both) | + FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn); nfp_prog_push(nfp_prog, insn); } static void -emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg, - enum shf_sc sc, u8 shift) +emit_shf(struct nfp_prog *nfp_prog, swreg dst, + swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift) { struct nfp_insn_re_regs reg; int err; @@ -464,12 +282,14 @@ emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg, } __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, - reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both); + reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); } static void __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, - u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both) + u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both, + bool dst_lmextn, bool src_lmextn) { u64 insn; @@ -480,13 +300,16 @@ __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, FIELD_PREP(OP_ALU_SW, swap) | FIELD_PREP(OP_ALU_OP, op) | FIELD_PREP(OP_ALU_DST_AB, dst_ab) | - FIELD_PREP(OP_ALU_WR_AB, wr_both); + FIELD_PREP(OP_ALU_WR_AB, wr_both) | + FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn); nfp_prog_push(nfp_prog, insn); } static void -emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg) +emit_alu(struct nfp_prog *nfp_prog, swreg dst, + swreg lreg, enum alu_op op, swreg rreg) { struct nfp_insn_ur_regs reg; int err; @@ -498,13 +321,15 @@ emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg) } __emit_alu(nfp_prog, reg.dst, reg.dst_ab, - reg.areg, op, reg.breg, reg.swap, reg.wr_both); + reg.areg, op, reg.breg, reg.swap, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); } static void __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, - bool zero, bool swap, bool wr_both) + bool zero, bool swap, bool wr_both, + bool dst_lmextn, bool src_lmextn) { u64 insn; @@ -517,33 +342,84 @@ __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, FIELD_PREP(OP_LDF_ZF, zero) | FIELD_PREP(OP_LDF_BMASK, bmask) | FIELD_PREP(OP_LDF_SHF, shift) | - FIELD_PREP(OP_LDF_WR_AB, wr_both); + FIELD_PREP(OP_LDF_WR_AB, wr_both) | + FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn); nfp_prog_push(nfp_prog, insn); } static void -emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift, - u32 dst, u8 bmask, u32 src, bool zero) +emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, + enum shf_sc sc, u8 shift, bool zero) { struct nfp_insn_re_regs reg; int err; - err = swreg_to_restricted(reg_none(), dst, src, ®, true); + /* Note: ld_field is special as it uses one of the src regs as dst */ + err = swreg_to_restricted(dst, dst, src, ®, true); if (err) { nfp_prog->error = err; return; } __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, - reg.i8, zero, reg.swap, reg.wr_both); + reg.i8, zero, reg.swap, reg.wr_both, + reg.dst_lmextn, reg.src_lmextn); } static void -emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src, +emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, enum shf_sc sc, u8 shift) { - emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false); + emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false); +} + +static void +__emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr, + bool dst_lmextn, bool src_lmextn) +{ + u64 insn; + + insn = OP_LCSR_BASE | + FIELD_PREP(OP_LCSR_A_SRC, areg) | + FIELD_PREP(OP_LCSR_B_SRC, breg) | + FIELD_PREP(OP_LCSR_WRITE, wr) | + FIELD_PREP(OP_LCSR_ADDR, addr) | + FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | + FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); + + nfp_prog_push(nfp_prog, insn); +} + +static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr) +{ + struct nfp_insn_ur_regs reg; + int err; + + /* This instruction takes immeds instead of reg_none() for the ignored + * operand, but we can't encode 2 immeds in one instr with our normal + * swreg infra so if param is an immed, we encode as reg_none() and + * copy the immed to both operands. + */ + if (swreg_type(src) == NN_REG_IMM) { + err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®); + reg.breg = reg.areg; + } else { + err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®); + } + if (err) { + nfp_prog->error = err; + return; + } + + __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4, + false, reg.src_lmextn); +} + +static void emit_nop(struct nfp_prog *nfp_prog) +{ + __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); } /* --- Wrappers --- */ @@ -565,7 +441,7 @@ static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) return true; } -static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm) +static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm) { enum immed_shift shift; u16 val; @@ -586,7 +462,7 @@ static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm) * If the @imm is small enough encode it directly in operand and return * otherwise load @imm to a spare register and return its encoding. */ -static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) { if (FIELD_FIT(UR_REG_IMM_MAX, imm)) return reg_imm(imm); @@ -599,7 +475,7 @@ static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) * If the @imm is small enough encode it directly in operand and return * otherwise load @imm to a spare register and return its encoding. */ -static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) { if (FIELD_FIT(RE_REG_IMM_MAX, imm)) return reg_imm(imm); @@ -608,6 +484,12 @@ static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) return tmp_reg; } +static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) +{ + while (count--) + emit_nop(nfp_prog); +} + static void wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_special special) @@ -618,78 +500,374 @@ wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, FIELD_PREP(OP_BR_SPECIAL, special); } +static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) +{ + emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src); +} + static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) { - emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src)); + wrp_mov(nfp_prog, reg_both(dst), reg_b(src)); } static int -construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, - u16 src, bool src_valid, u8 size) +data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) { unsigned int i; u16 shift, sz; - u32 tmp_reg; /* We load the value from the address indicated in @offset and then * shift out the data we don't need. Note: this is big endian! */ - sz = size < 4 ? 4 : size; + sz = max(size, 4); shift = size < 4 ? 4 - size : 0; - if (src_valid) { - /* Calculate the true offset (src_reg + imm) */ - tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); - emit_alu(nfp_prog, imm_both(nfp_prog), - reg_a(src), ALU_OP_ADD, tmp_reg); - /* Check packet length (size guaranteed to fit b/c it's u8) */ - emit_alu(nfp_prog, imm_a(nfp_prog), - imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); - emit_alu(nfp_prog, reg_none(), - NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog)); - wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); - /* Load data */ - emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, - pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true); - } else { - /* Check packet length */ - tmp_reg = ur_load_imm_any(nfp_prog, offset + size, - imm_a(nfp_prog)); - emit_alu(nfp_prog, reg_none(), - NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg); - wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); - /* Load data */ - tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); - emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, - pkt_reg(nfp_prog), tmp_reg, sz - 1, true); - } + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pptr_reg(nfp_prog), offset, sz - 1, true); i = 0; if (shift) - emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE, + emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE, reg_xfer(0), SHF_SC_R_SHF, shift * 8); else for (; i * 4 < size; i++) - emit_alu(nfp_prog, reg_both(i), - reg_none(), ALU_OP_NONE, reg_xfer(i)); + wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); + + if (i < 2) + wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); + + return 0; +} + +static int +data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, + u8 dst_gpr, int size) +{ + unsigned int i; + u8 mask, sz; + + /* We load the value from the address indicated in @offset and then + * mask out the data we don't need. Note: this is little endian! + */ + sz = max(size, 4); + mask = size < 4 ? GENMASK(size - 1, 0) : 0; + + emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, + reg_a(src_gpr), offset, sz / 4 - 1, true); + + i = 0; + if (mask) + emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask, + reg_xfer(0), SHF_SC_NONE, 0, true); + else + for (; i * 4 < size; i++) + wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); if (i < 2) - wrp_immed(nfp_prog, reg_both(1), 0); + wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); return 0; } +static int +construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) +{ + swreg tmp_reg; + + /* Calculate the true offset (src_reg + imm) */ + tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg); + + /* Check packet length (size guaranteed to fit b/c it's u8) */ + emit_alu(nfp_prog, imm_a(nfp_prog), + imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); + emit_alu(nfp_prog, reg_none(), + plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + + /* Load data */ + return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); +} + static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) { - return construct_data_ind_ld(nfp_prog, offset, 0, false, size); + swreg tmp_reg; + + /* Check packet length */ + tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); + emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + + /* Load data */ + tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + return data_ld(nfp_prog, tmp_reg, 0, size); +} + +static int +data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, + u8 src_gpr, u8 size) +{ + unsigned int i; + + for (i = 0; i * 4 < size; i++) + wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i)); + + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(dst_gpr), offset, size - 1, true); + + return 0; } -static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src) +static int +data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, + u64 imm, u8 size) { - emit_alu(nfp_prog, NFP_BPF_ABI_MARK, - reg_none(), ALU_OP_NONE, reg_b(src)); - emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS, - NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK)); + wrp_immed(nfp_prog, reg_xfer(0), imm); + if (size == 8) + wrp_immed(nfp_prog, reg_xfer(1), imm >> 32); + + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(dst_gpr), offset, size - 1, true); + + return 0; +} + +typedef int +(*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc); + +static int +wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc) +{ + bool should_inc = needs_inc && new_gpr && !last; + u32 idx, src_byte; + enum shf_sc sc; + swreg reg; + int shf; + u8 mask; + + if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4)) + return -EOPNOTSUPP; + + idx = off / 4; + + /* Move the entire word */ + if (size == 4) { + wrp_mov(nfp_prog, reg_both(dst), + should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx)); + return 0; + } + + if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) + return -EOPNOTSUPP; + + src_byte = off % 4; + + mask = (1 << size) - 1; + mask <<= dst_byte; + + if (WARN_ON_ONCE(mask > 0xf)) + return -EOPNOTSUPP; + + shf = abs(src_byte - dst_byte) * 8; + if (src_byte == dst_byte) { + sc = SHF_SC_NONE; + } else if (src_byte < dst_byte) { + shf = 32 - shf; + sc = SHF_SC_L_SHF; + } else { + sc = SHF_SC_R_SHF; + } + + /* ld_field can address fewer indexes, if offset too large do RMW. + * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. + */ + if (idx <= RE_REG_LM_IDX_MAX) { + reg = reg_lm(lm3 ? 3 : 0, idx); + } else { + reg = imm_a(nfp_prog); + /* If it's not the first part of the load and we start a new GPR + * that means we are loading a second part of the LMEM word into + * a new GPR. IOW we've already looked that LMEM word and + * therefore it has been loaded into imm_a(). + */ + if (first || !new_gpr) + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + } + + emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); + + if (should_inc) + wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); + + return 0; +} + +static int +wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, + unsigned int size, bool first, bool new_gpr, bool last, bool lm3, + bool needs_inc) +{ + bool should_inc = needs_inc && new_gpr && !last; + u32 idx, dst_byte; + enum shf_sc sc; + swreg reg; + int shf; + u8 mask; + + if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4)) + return -EOPNOTSUPP; + + idx = off / 4; + + /* Move the entire word */ + if (size == 4) { + wrp_mov(nfp_prog, + should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx), + reg_b(src)); + return 0; + } + + if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) + return -EOPNOTSUPP; + + dst_byte = off % 4; + + mask = (1 << size) - 1; + mask <<= dst_byte; + + if (WARN_ON_ONCE(mask > 0xf)) + return -EOPNOTSUPP; + + shf = abs(src_byte - dst_byte) * 8; + if (src_byte == dst_byte) { + sc = SHF_SC_NONE; + } else if (src_byte < dst_byte) { + shf = 32 - shf; + sc = SHF_SC_L_SHF; + } else { + sc = SHF_SC_R_SHF; + } + + /* ld_field can address fewer indexes, if offset too large do RMW. + * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. + */ + if (idx <= RE_REG_LM_IDX_MAX) { + reg = reg_lm(lm3 ? 3 : 0, idx); + } else { + reg = imm_a(nfp_prog); + /* Only first and last LMEM locations are going to need RMW, + * the middle location will be overwritten fully. + */ + if (first || last) + wrp_mov(nfp_prog, reg, reg_lm(0, idx)); + } + + emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf); + + if (new_gpr || last) { + if (idx > RE_REG_LM_IDX_MAX) + wrp_mov(nfp_prog, reg_lm(0, idx), reg); + if (should_inc) + wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); + } + + return 0; +} + +static int +mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, + bool clr_gpr, lmem_step step) +{ + s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off; + bool first = true, last; + bool needs_inc = false; + swreg stack_off_reg; + u8 prev_gpr = 255; + u32 gpr_byte = 0; + bool lm3 = true; + int ret; + + if (meta->ptr_not_const) { + /* Use of the last encountered ptr_off is OK, they all have + * the same alignment. Depend on low bits of value being + * discarded when written to LMaddr register. + */ + stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off, + stack_imm(nfp_prog)); + + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg); + + needs_inc = true; + } else if (off + size <= 64) { + /* We can reach bottom 64B with LMaddr0 */ + lm3 = false; + } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { + /* We have to set up a new pointer. If we know the offset + * and the entire access falls into a single 32 byte aligned + * window we won't have to increment the LM pointer. + * The 32 byte alignment is imporant because offset is ORed in + * not added when doing *l$indexN[off]. + */ + stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32), + stack_imm(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); + + off %= 32; + } else { + stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4), + stack_imm(nfp_prog)); + + emit_alu(nfp_prog, imm_b(nfp_prog), + stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); + + needs_inc = true; + } + if (lm3) { + emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); + /* For size < 4 one slot will be filled by zeroing of upper. */ + wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3); + } + + if (clr_gpr && size < 8) + wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + + while (size) { + u32 slice_end; + u8 slice_size; + + slice_size = min(size, 4 - gpr_byte); + slice_end = min(off + slice_size, round_up(off + 1, 4)); + slice_size = slice_end - off; + + last = slice_size == size; + + if (needs_inc) + off %= 4; + + ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, + first, gpr != prev_gpr, last, lm3, needs_inc); + if (ret) + return ret; + + prev_gpr = gpr; + first = false; + + gpr_byte += slice_size; + if (gpr_byte >= 4) { + gpr_byte -= 4; + gpr++; + } + + size -= slice_size; + off += slice_size; + } return 0; } @@ -697,7 +875,7 @@ static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src) static void wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) { - u32 tmp_reg; + swreg tmp_reg; if (alu_op == ALU_OP_AND) { if (!imm) @@ -714,7 +892,7 @@ wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) if (alu_op == ALU_OP_XOR) { if (!~imm) emit_alu(nfp_prog, reg_both(dst), reg_none(), - ALU_OP_NEG, reg_b(dst)); + ALU_OP_NOT, reg_b(dst)); if (!imm || !~imm) return; } @@ -815,7 +993,7 @@ wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, const struct bpf_insn *insn = &meta->insn; u64 imm = insn->imm; /* sign extend */ u8 reg = insn->dst_reg * 2; - u32 tmp_reg; + swreg tmp_reg; if (insn->off < 0) /* TODO */ return -EOPNOTSUPP; @@ -844,7 +1022,10 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, enum br_mask br_mask, bool swap) { const struct bpf_insn *insn = &meta->insn; - u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2; + u8 areg, breg; + + areg = insn->dst_reg * 2; + breg = insn->src_reg * 2; if (insn->off < 0) /* TODO */ return -EOPNOTSUPP; @@ -863,13 +1044,34 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, return 0; } +static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) +{ + emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in, + SHF_SC_R_ROT, 8); + emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out), + SHF_SC_R_ROT, 16); +} + /* --- Callbacks --- */ static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; - - wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); - wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1); + u8 dst = insn->dst_reg * 2; + u8 src = insn->src_reg * 2; + + if (insn->src_reg == BPF_REG_10) { + swreg stack_depth_reg; + + stack_depth_reg = ur_load_imm_any(nfp_prog, + nfp_prog->stack_depth, + stack_imm(nfp_prog)); + emit_alu(nfp_prog, reg_both(dst), + stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } else { + wrp_reg_mov(nfp_prog, dst, src); + wrp_reg_mov(nfp_prog, dst + 1, src + 1); + } return 0; } @@ -964,28 +1166,64 @@ static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } -static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; - if (insn->imm != 32) - return 1; /* TODO */ - - wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2); - wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0), + ALU_OP_SUB, reg_b(insn->dst_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0), + ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1)); return 0; } -static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + if (insn->imm < 32) { + emit_shf(nfp_prog, reg_both(dst + 1), + reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), + SHF_SC_R_DSHF, 32 - insn->imm); + emit_shf(nfp_prog, reg_both(dst), + reg_none(), SHF_OP_NONE, reg_b(dst), + SHF_SC_L_SHF, insn->imm); + } else if (insn->imm == 32) { + wrp_reg_mov(nfp_prog, dst + 1, dst); + wrp_immed(nfp_prog, reg_both(dst), 0); + } else if (insn->imm > 32) { + emit_shf(nfp_prog, reg_both(dst + 1), + reg_none(), SHF_OP_NONE, reg_b(dst), + SHF_SC_L_SHF, insn->imm - 32); + wrp_immed(nfp_prog, reg_both(dst), 0); + } - if (insn->imm != 32) - return 1; /* TODO */ + return 0; +} - wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1); - wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); +static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; + + if (insn->imm < 32) { + emit_shf(nfp_prog, reg_both(dst), + reg_a(dst + 1), SHF_OP_NONE, reg_b(dst), + SHF_SC_R_DSHF, insn->imm); + emit_shf(nfp_prog, reg_both(dst + 1), + reg_none(), SHF_OP_NONE, reg_b(dst + 1), + SHF_SC_R_SHF, insn->imm); + } else if (insn->imm == 32) { + wrp_reg_mov(nfp_prog, dst, dst + 1); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } else if (insn->imm > 32) { + emit_shf(nfp_prog, reg_both(dst), + reg_none(), SHF_OP_NONE, reg_b(dst + 1), + SHF_SC_R_SHF, insn->imm - 32); + wrp_immed(nfp_prog, reg_both(dst + 1), 0); + } return 0; } @@ -1060,6 +1298,16 @@ static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); } +static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u8 dst = meta->insn.dst_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst)); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; @@ -1075,21 +1323,59 @@ static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u8 gpr = insn->dst_reg * 2; + + switch (insn->imm) { + case 16: + emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr), + SHF_SC_R_ROT, 8); + emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr), + SHF_SC_R_SHF, 16); + + wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + break; + case 32: + wrp_end32(nfp_prog, reg_a(gpr), gpr); + wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + break; + case 64: + wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1)); + + wrp_end32(nfp_prog, reg_a(gpr), gpr + 1); + wrp_end32(nfp_prog, imm_a(nfp_prog), gpr); + break; + } + + return 0; +} + static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1), - meta->insn.imm); + struct nfp_insn_meta *prev = nfp_meta_prev(meta); + u32 imm_lo, imm_hi; + u8 dst; + + dst = prev->insn.dst_reg * 2; + imm_lo = prev->insn.imm; + imm_hi = meta->insn.imm; + + wrp_immed(nfp_prog, reg_both(dst), imm_lo); + + /* mov is always 1 insn, load imm may be two, so try to use mov */ + if (imm_hi == imm_lo) + wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst)); + else + wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi); return 0; } static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - const struct bpf_insn *insn = &meta->insn; - meta->double_cb = imm_ld8_part2; - wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); - return 0; } @@ -1111,82 +1397,235 @@ static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { return construct_data_ind_ld(nfp_prog, meta->insn.imm, - meta->insn.src_reg * 2, true, 1); + meta->insn.src_reg * 2, 1); } static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { return construct_data_ind_ld(nfp_prog, meta->insn.imm, - meta->insn.src_reg * 2, true, 2); + meta->insn.src_reg * 2, 2); } static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { return construct_data_ind_ld(nfp_prog, meta->insn.imm, - meta->insn.src_reg * 2, true, 4); + meta->insn.src_reg * 2, 4); } -static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int +mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size, unsigned int ptr_off) { - if (meta->insn.off == offsetof(struct sk_buff, len)) - emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN); - else + return mem_op_stack(nfp_prog, meta, size, ptr_off, + meta->insn.dst_reg * 2, meta->insn.src_reg * 2, + true, wrp_lmem_load); +} + +static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 size) +{ + swreg dst = reg_both(meta->insn.dst_reg * 2); + + switch (meta->insn.off) { + case offsetof(struct __sk_buff, len): + if (size != FIELD_SIZEOF(struct __sk_buff, len)) + return -EOPNOTSUPP; + wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); + break; + case offsetof(struct __sk_buff, data): + if (size != FIELD_SIZEOF(struct __sk_buff, data)) + return -EOPNOTSUPP; + wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); + break; + case offsetof(struct __sk_buff, data_end): + if (size != FIELD_SIZEOF(struct __sk_buff, data_end)) + return -EOPNOTSUPP; + emit_alu(nfp_prog, dst, + plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); + break; + default: return -EOPNOTSUPP; + } + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); return 0; } -static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 size) { - u32 dst = reg_both(meta->insn.dst_reg * 2); + swreg dst = reg_both(meta->insn.dst_reg * 2); - if (meta->insn.off != offsetof(struct xdp_md, data) && - meta->insn.off != offsetof(struct xdp_md, data_end)) + switch (meta->insn.off) { + case offsetof(struct xdp_md, data): + if (size != FIELD_SIZEOF(struct xdp_md, data)) + return -EOPNOTSUPP; + wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); + break; + case offsetof(struct xdp_md, data_end): + if (size != FIELD_SIZEOF(struct xdp_md, data_end)) + return -EOPNOTSUPP; + emit_alu(nfp_prog, dst, + plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); + break; + default: return -EOPNOTSUPP; + } - emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); - if (meta->insn.off == offsetof(struct xdp_md, data)) - return 0; + return 0; +} - emit_alu(nfp_prog, dst, dst, ALU_OP_ADD, NFP_BPF_ABI_LEN); +static int +mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg tmp_reg; - return 0; + tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_ld_host_order(nfp_prog, meta->insn.src_reg * 2, tmp_reg, + meta->insn.dst_reg * 2, size); +} + +static int +mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + if (meta->ptr.type == PTR_TO_CTX) { + if (nfp_prog->type == BPF_PROG_TYPE_XDP) + return mem_ldx_xdp(nfp_prog, meta, size); + else + return mem_ldx_skb(nfp_prog, meta, size); + } + + if (meta->ptr.type == PTR_TO_PACKET) + return mem_ldx_data(nfp_prog, meta, size); + + if (meta->ptr.type == PTR_TO_STACK) + return mem_ldx_stack(nfp_prog, meta, size, + meta->ptr.off + meta->ptr.var_off.value); + + return -EOPNOTSUPP; +} + +static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 1); +} + +static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 2); } static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - int ret; + return mem_ldx(nfp_prog, meta, 4); +} - if (nfp_prog->act == NN_ACT_XDP) - ret = mem_ldx4_xdp(nfp_prog, meta); - else - ret = mem_ldx4_skb(nfp_prog, meta); +static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_ldx(nfp_prog, meta, 8); +} - wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); +static int +mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + u64 imm = meta->insn.imm; /* sign extend */ + swreg off_reg; + + off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); - return ret; + return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, + imm, size); } -static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) { - if (meta->insn.off == offsetof(struct sk_buff, mark)) - return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2); + if (meta->ptr.type == PTR_TO_PACKET) + return mem_st_data(nfp_prog, meta, size); return -EOPNOTSUPP; } -static int mem_stx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 1); +} + +static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 2); +} + +static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_st(nfp_prog, meta, 4); +} + +static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { + return mem_st(nfp_prog, meta, 8); +} + +static int +mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg off_reg; + + off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, + meta->insn.src_reg * 2, size); +} + +static int +mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size, unsigned int ptr_off) +{ + return mem_op_stack(nfp_prog, meta, size, ptr_off, + meta->insn.src_reg * 2, meta->insn.dst_reg * 2, + false, wrp_lmem_store); +} + +static int +mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int size) +{ + if (meta->ptr.type == PTR_TO_PACKET) + return mem_stx_data(nfp_prog, meta, size); + + if (meta->ptr.type == PTR_TO_STACK) + return mem_stx_stack(nfp_prog, meta, size, + meta->ptr.off + meta->ptr.var_off.value); + return -EOPNOTSUPP; } +static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_stx(nfp_prog, meta, 1); +} + +static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_stx(nfp_prog, meta, 2); +} + static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - if (nfp_prog->act == NN_ACT_XDP) - return mem_stx4_xdp(nfp_prog, meta); - return mem_stx4_skb(nfp_prog, meta); + return mem_stx(nfp_prog, meta, 4); +} + +static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_stx(nfp_prog, meta, 8); } static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -1202,8 +1641,10 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; u64 imm = insn->imm; /* sign extend */ - u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1); - u32 tmp_reg; + swreg or1, or2, tmp_reg; + + or1 = reg_a(insn->dst_reg * 2); + or2 = reg_b(insn->dst_reg * 2 + 1); if (insn->off < 0) /* TODO */ return -EOPNOTSUPP; @@ -1230,29 +1671,29 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); + return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true); } static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); + return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false); } static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false); + return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); } static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true); + return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); } static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; u64 imm = insn->imm; /* sign extend */ - u32 tmp_reg; + swreg tmp_reg; if (insn->off < 0) /* TODO */ return -EOPNOTSUPP; @@ -1283,7 +1724,7 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; u64 imm = insn->imm; /* sign extend */ - u32 tmp_reg; + swreg tmp_reg; if (insn->off < 0) /* TODO */ return -EOPNOTSUPP; @@ -1292,6 +1733,7 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); emit_br(nfp_prog, BR_BNE, insn->off, 0); + return 0; } tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); @@ -1327,22 +1769,22 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); + return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true); } static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); + return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false); } static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false); + return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); } static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true); + return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); } static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -1375,6 +1817,7 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, + [BPF_ALU64 | BPF_NEG] = neg_reg64, [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, @@ -1389,7 +1832,9 @@ static const instr_cb_t instr_cb[256] = { [BPF_ALU | BPF_ADD | BPF_K] = add_imm, [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, + [BPF_ALU | BPF_NEG] = neg_reg, [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, + [BPF_ALU | BPF_END | BPF_X] = end_reg32, [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, [BPF_LD | BPF_ABS | BPF_B] = data_ld1, [BPF_LD | BPF_ABS | BPF_H] = data_ld2, @@ -1397,8 +1842,18 @@ static const instr_cb_t instr_cb[256] = { [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, + [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1, + [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2, [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, + [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8, + [BPF_STX | BPF_MEM | BPF_B] = mem_stx1, + [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, + [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, + [BPF_ST | BPF_MEM | BPF_B] = mem_st1, + [BPF_ST | BPF_MEM | BPF_H] = mem_st2, + [BPF_ST | BPF_MEM | BPF_W] = mem_st4, + [BPF_ST | BPF_MEM | BPF_DW] = mem_st8, [BPF_JMP | BPF_JA | BPF_K] = jump, [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, @@ -1510,37 +1965,9 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) static void nfp_intro(struct nfp_prog *nfp_prog) { - emit_alu(nfp_prog, pkt_reg(nfp_prog), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT); -} - -static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) -{ - const u8 act2code[] = { - [NN_ACT_TC_DROP] = 0x22, - [NN_ACT_TC_REDIR] = 0x24 - }; - /* Target for aborts */ - nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); - wrp_immed(nfp_prog, reg_both(0), 0); - - /* Target for normal exits */ - nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); - /* Legacy TC mode: - * 0 0x11 -> pass, count as stat0 - * -1 drop 0x22 -> drop, count as stat1 - * redir 0x24 -> redir, count as stat1 - * ife mark 0x21 -> pass, count as stat1 - * ife + tx 0x24 -> redir, count as stat1 - */ - emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2); - emit_alu(nfp_prog, reg_a(0), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); - emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); - - emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1); - emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]), - SHF_SC_L_SHF, 16); + wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0)); + emit_alu(nfp_prog, plen_reg(nfp_prog), + plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); } static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) @@ -1562,8 +1989,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); - emit_alu(nfp_prog, reg_a(0), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); /* Target for normal exits */ @@ -1572,8 +1998,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) /* if R0 > 7 jump to abort */ emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); - emit_alu(nfp_prog, reg_a(0), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); wrp_immed(nfp_prog, reg_b(2), 0x41221211); wrp_immed(nfp_prog, reg_b(3), 0x41001211); @@ -1610,8 +2035,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); - emit_alu(nfp_prog, reg_a(0), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); /* Target for normal exits */ @@ -1632,24 +2056,21 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); - emit_alu(nfp_prog, reg_a(0), - reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); } static void nfp_outro(struct nfp_prog *nfp_prog) { - switch (nfp_prog->act) { - case NN_ACT_DIRECT: + switch (nfp_prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: nfp_outro_tc_da(nfp_prog); break; - case NN_ACT_TC_DROP: - case NN_ACT_TC_REDIR: - nfp_outro_tc_legacy(nfp_prog); - break; - case NN_ACT_XDP: + case BPF_PROG_TYPE_XDP: nfp_outro_xdp(nfp_prog); break; + default: + WARN_ON(1); } } @@ -1688,29 +2109,11 @@ static int nfp_translate(struct nfp_prog *nfp_prog) if (nfp_prog->error) return nfp_prog->error; - return nfp_fixup_branches(nfp_prog); -} - -static int -nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, - unsigned int cnt) -{ - unsigned int i; - - for (i = 0; i < cnt; i++) { - struct nfp_insn_meta *meta; - - meta = kzalloc(sizeof(*meta), GFP_KERNEL); - if (!meta) - return -ENOMEM; - - meta->insn = prog[i]; - meta->n = i; - - list_add_tail(&meta->l, &nfp_prog->insns); - } + wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW); + if (nfp_prog->error) + return nfp_prog->error; - return 0; + return nfp_fixup_branches(nfp_prog); } /* --- Optimizations --- */ @@ -1737,38 +2140,6 @@ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) } } -/* Try to rename registers so that program uses only low ones */ -static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog) -{ - bool reg_used[MAX_BPF_REG] = {}; - u8 tgt_reg[MAX_BPF_REG] = {}; - struct nfp_insn_meta *meta; - unsigned int i, j; - - list_for_each_entry(meta, &nfp_prog->insns, l) { - if (meta->skip) - continue; - - reg_used[meta->insn.src_reg] = true; - reg_used[meta->insn.dst_reg] = true; - } - - for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) { - if (!reg_used[i]) - continue; - - tgt_reg[i] = j++; - } - nfp_prog->num_regs = j; - - list_for_each_entry(meta, &nfp_prog->insns, l) { - meta->insn.src_reg = tgt_reg[meta->insn.src_reg]; - meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg]; - } - - return 0; -} - /* Remove masking after load since our load guarantees this is not needed */ static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) { @@ -1845,79 +2216,47 @@ static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) { - int ret; - nfp_bpf_opt_reg_init(nfp_prog); - ret = nfp_bpf_opt_reg_rename(nfp_prog); - if (ret) - return ret; - nfp_bpf_opt_ld_mask(nfp_prog); nfp_bpf_opt_ld_shift(nfp_prog); return 0; } -/** - * nfp_bpf_jit() - translate BPF code into NFP assembly - * @filter: kernel BPF filter struct - * @prog_mem: memory to store assembler instructions - * @act: action attached to this eBPF program - * @prog_start: offset of the first instruction when loaded - * @prog_done: where to jump on exit - * @prog_sz: size of @prog_mem in instructions - * @res: achieved parameters of translation results - */ -int -nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, - enum nfp_bpf_action_type act, - unsigned int prog_start, unsigned int prog_done, - unsigned int prog_sz, struct nfp_bpf_result *res) +static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore) { - struct nfp_prog *nfp_prog; - int ret; + int i; - nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); - if (!nfp_prog) - return -ENOMEM; + for (i = 0; i < nfp_prog->prog_len; i++) { + int err; - INIT_LIST_HEAD(&nfp_prog->insns); - nfp_prog->act = act; - nfp_prog->start_off = prog_start; - nfp_prog->tgt_done = prog_done; + err = nfp_ustore_check_valid_no_ecc(nfp_prog->prog[i]); + if (err) + return err; - ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len); - if (ret) - goto out; + nfp_prog->prog[i] = nfp_ustore_calc_ecc_insn(nfp_prog->prog[i]); - ret = nfp_prog_verify(nfp_prog, filter); - if (ret) - goto out; + ustore[i] = cpu_to_le64(nfp_prog->prog[i]); + } - ret = nfp_bpf_optimize(nfp_prog); - if (ret) - goto out; + return 0; +} - if (nfp_prog->num_regs <= 7) - nfp_prog->regs_per_thread = 16; - else - nfp_prog->regs_per_thread = 32; +int nfp_bpf_jit(struct nfp_prog *nfp_prog) +{ + int ret; - nfp_prog->prog = prog_mem; - nfp_prog->__prog_alloc_len = prog_sz; + ret = nfp_bpf_optimize(nfp_prog); + if (ret) + return ret; ret = nfp_translate(nfp_prog); if (ret) { pr_err("Translation failed with error %d (translated: %u)\n", ret, nfp_prog->n_translated); - ret = -EINVAL; + return -EINVAL; } - res->n_instr = nfp_prog->prog_len; - res->dense_mode = nfp_prog->num_regs <= 7; -out: - nfp_prog_free(nfp_prog); - - return ret; + return nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)nfp_prog->prog); } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index be2cf10a2cd7..e379b78e86ef 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -42,9 +42,11 @@ static bool nfp_net_ebpf_capable(struct nfp_net *nn) { +#ifdef __LITTLE_ENDIAN if (nn->cap & NFP_NET_CFG_CTRL_BPF && nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI) return true; +#endif return false; } @@ -52,28 +54,25 @@ static int nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog) { - struct tc_cls_bpf_offload cmd = { - .prog = prog, - }; + bool running, xdp_running; int ret; if (!nfp_net_ebpf_capable(nn)) return -EINVAL; - if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) { - if (!nn->dp.bpf_offload_xdp) - return prog ? -EBUSY : 0; - cmd.command = prog ? TC_CLSBPF_REPLACE : TC_CLSBPF_DESTROY; - } else { - if (!prog) - return 0; - cmd.command = TC_CLSBPF_ADD; - } + running = nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF; + xdp_running = running && nn->dp.bpf_offload_xdp; + + if (!prog && !xdp_running) + return 0; + if (prog && running && !xdp_running) + return -EBUSY; - ret = nfp_net_bpf_offload(nn, &cmd); + ret = nfp_net_bpf_offload(nn, prog, running); /* Stop offload if replace not possible */ - if (ret && cmd.command == TC_CLSBPF_REPLACE) + if (ret && prog) nfp_bpf_xdp_offload(app, nn, NULL); + nn->dp.bpf_offload_xdp = prog && !ret; return ret; } @@ -83,59 +82,78 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) return nfp_net_ebpf_capable(nn) ? "BPF" : ""; } -static int -nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) -{ - struct nfp_net_bpf_priv *priv; - int ret; - - /* Limit to single port, otherwise it's just a NIC */ - if (id > 0) { - nfp_warn(app->cpp, - "BPF NIC doesn't support more than one port right now\n"); - nn->port = nfp_port_alloc(app, NFP_PORT_INVALID, nn->dp.netdev); - return PTR_ERR_OR_ZERO(nn->port); - } - - priv = kmalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - nn->app_priv = priv; - spin_lock_init(&priv->rx_filter_lock); - setup_timer(&priv->rx_filter_stats_timer, - nfp_net_filter_stats_timer, (unsigned long)nn); - - ret = nfp_app_nic_vnic_alloc(app, nn, id); - if (ret) - kfree(priv); - - return ret; -} - static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn) { if (nn->dp.bpf_offload_xdp) nfp_bpf_xdp_offload(app, nn, NULL); - kfree(nn->app_priv); } -static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev, - enum tc_setup_type type, void *type_data) +static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) { struct tc_cls_bpf_offload *cls_bpf = type_data; - struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net *nn = cb_priv; - if (type != TC_SETUP_CLSBPF || !nfp_net_ebpf_capable(nn) || - !is_classid_clsact_ingress(cls_bpf->common.classid) || + if (type != TC_SETUP_CLSBPF || + !tc_can_offload(nn->dp.netdev) || + !nfp_net_ebpf_capable(nn) || cls_bpf->common.protocol != htons(ETH_P_ALL) || cls_bpf->common.chain_index) return -EOPNOTSUPP; - if (nn->dp.bpf_offload_xdp) return -EBUSY; - return nfp_net_bpf_offload(nn, cls_bpf); + /* Only support TC direct action */ + if (!cls_bpf->exts_integrated || + tcf_exts_has_actions(cls_bpf->exts)) { + nn_err(nn, "only direct action with no legacy actions supported\n"); + return -EOPNOTSUPP; + } + + switch (cls_bpf->command) { + case TC_CLSBPF_REPLACE: + return nfp_net_bpf_offload(nn, cls_bpf->prog, true); + case TC_CLSBPF_ADD: + return nfp_net_bpf_offload(nn, cls_bpf->prog, false); + case TC_CLSBPF_DESTROY: + return nfp_net_bpf_offload(nn, NULL, true); + default: + return -EOPNOTSUPP; + } +} + +static int nfp_bpf_setup_tc_block(struct net_device *netdev, + struct tc_block_offload *f) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, + nfp_bpf_setup_tc_block_cb, + nn, nn); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, + nfp_bpf_setup_tc_block_cb, + nn); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev, + enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return nfp_bpf_setup_tc_block(netdev, type_data); + default: + return -EOPNOTSUPP; + } } static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn) @@ -149,10 +167,14 @@ const struct nfp_app_type app_bpf = { .extra_cap = nfp_bpf_extra_cap, - .vnic_alloc = nfp_bpf_vnic_alloc, + .vnic_alloc = nfp_app_nic_vnic_alloc, .vnic_free = nfp_bpf_vnic_free, .setup_tc = nfp_bpf_setup_tc, .tc_busy = nfp_bpf_tc_busy, .xdp_offload = nfp_bpf_xdp_offload, + + .bpf_verifier_prep = nfp_bpf_verifier_prep, + .bpf_translate = nfp_bpf_translate, + .bpf_destroy = nfp_bpf_destroy, }; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 4051e943f363..082a15f6dfb5 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -36,10 +36,11 @@ #include <linux/bitfield.h> #include <linux/bpf.h> +#include <linux/bpf_verifier.h> #include <linux/list.h> #include <linux/types.h> -#include "../nfp_net.h" +#include "../nfp_asm.h" /* For branch fixup logic use up-most byte of branch instruction as scratch * area. Remember to clear this before sending instructions to HW! @@ -53,51 +54,29 @@ enum br_special { }; enum static_regs { - STATIC_REG_PKT = 1, -#define REG_PKT_BANK ALU_DST_A - STATIC_REG_IMM = 2, /* Bank AB */ + STATIC_REG_IMM = 21, /* Bank AB */ + STATIC_REG_STACK = 22, /* Bank A */ + STATIC_REG_PKT_LEN = 22, /* Bank B */ }; -enum nfp_bpf_action_type { - NN_ACT_TC_DROP, - NN_ACT_TC_REDIR, - NN_ACT_DIRECT, - NN_ACT_XDP, +enum pkt_vec { + PKT_VEC_PKT_LEN = 0, + PKT_VEC_PKT_PTR = 2, }; -/* Software register representation, hardware encoding in asm.h */ -#define NN_REG_TYPE GENMASK(31, 24) -#define NN_REG_VAL GENMASK(7, 0) - -enum nfp_bpf_reg_type { - NN_REG_GPR_A = BIT(0), - NN_REG_GPR_B = BIT(1), - NN_REG_NNR = BIT(2), - NN_REG_XFER = BIT(3), - NN_REG_IMM = BIT(4), - NN_REG_NONE = BIT(5), -}; - -#define NN_REG_GPR_BOTH (NN_REG_GPR_A | NN_REG_GPR_B) - -#define reg_both(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH)) -#define reg_a(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A)) -#define reg_b(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B)) -#define reg_nnr(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR)) -#define reg_xfer(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER)) -#define reg_imm(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM)) -#define reg_none() (FIELD_PREP(NN_REG_TYPE, NN_REG_NONE)) +#define pv_len(np) reg_lm(1, PKT_VEC_PKT_LEN) +#define pv_ctm_ptr(np) reg_lm(1, PKT_VEC_PKT_PTR) -#define pkt_reg(np) reg_a((np)->regs_per_thread - STATIC_REG_PKT) -#define imm_a(np) reg_a((np)->regs_per_thread - STATIC_REG_IMM) -#define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM) -#define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM) +#define stack_reg(np) reg_a(STATIC_REG_STACK) +#define stack_imm(np) imm_b(np) +#define plen_reg(np) reg_b(STATIC_REG_PKT_LEN) +#define pptr_reg(np) pv_ctm_ptr(np) +#define imm_a(np) reg_a(STATIC_REG_IMM) +#define imm_b(np) reg_b(STATIC_REG_IMM) +#define imm_both(np) reg_both(STATIC_REG_IMM) -#define NFP_BPF_ABI_FLAGS reg_nnr(0) +#define NFP_BPF_ABI_FLAGS reg_imm(0) #define NFP_BPF_ABI_FLAG_MARK 1 -#define NFP_BPF_ABI_MARK reg_nnr(1) -#define NFP_BPF_ABI_PKT reg_nnr(2) -#define NFP_BPF_ABI_LEN reg_nnr(3) struct nfp_prog; struct nfp_insn_meta; @@ -113,6 +92,8 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); /** * struct nfp_insn_meta - BPF instruction wrapper * @insn: BPF instruction + * @ptr: pointer type for memory operations + * @ptr_not_const: pointer is not always constant * @off: index of first generated machine instruction (in nfp_prog.prog) * @n: eBPF instruction number * @skip: skip this instruction (optimized out) @@ -121,6 +102,8 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); */ struct nfp_insn_meta { struct bpf_insn insn; + struct bpf_reg_state ptr; + bool ptr_not_const; unsigned int off; unsigned short n; bool skip; @@ -156,15 +139,15 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) * @prog: machine code * @prog_len: number of valid instructions in @prog array * @__prog_alloc_len: alloc size of @prog array - * @act: BPF program/action type (TC DA, TC with action, XDP etc.) - * @num_regs: number of registers used by this program - * @regs_per_thread: number of basic registers allocated per thread + * @verifier_meta: temporary storage for verifier's insn meta + * @type: BPF program type * @start_off: address of the first instruction in the memory * @tgt_out: jump target for normal exit * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) * @tgt_done: jump target to get the next packet * @n_translated: number of successfully translated instructions (for errors) * @error: error code if something went wrong + * @stack_depth: max stack depth from the verifier * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) */ struct nfp_prog { @@ -172,10 +155,9 @@ struct nfp_prog { unsigned int prog_len; unsigned int __prog_alloc_len; - enum nfp_bpf_action_type act; + struct nfp_insn_meta *verifier_meta; - unsigned int num_regs; - unsigned int regs_per_thread; + enum bpf_prog_type type; unsigned int start_off; unsigned int tgt_out; @@ -185,40 +167,26 @@ struct nfp_prog { unsigned int n_translated; int error; - struct list_head insns; -}; + unsigned int stack_depth; -struct nfp_bpf_result { - unsigned int n_instr; - bool dense_mode; + struct list_head insns; }; -int -nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, - unsigned int prog_start, unsigned int prog_done, - unsigned int prog_sz, struct nfp_bpf_result *res); +int nfp_bpf_jit(struct nfp_prog *prog); -int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); +extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops; +struct netdev_bpf; +struct nfp_app; struct nfp_net; -struct tc_cls_bpf_offload; - -/** - * struct nfp_net_bpf_priv - per-vNIC BPF private data - * @rx_filter: Filter offload statistics - dropped packets/bytes - * @rx_filter_prev: Filter offload statistics - values from previous update - * @rx_filter_change: Jiffies when statistics last changed - * @rx_filter_stats_timer: Timer for polling filter offload statistics - * @rx_filter_lock: Lock protecting timer state changes (teardown) - */ -struct nfp_net_bpf_priv { - struct nfp_stat_pair rx_filter, rx_filter_prev; - unsigned long rx_filter_change; - struct timer_list rx_filter_stats_timer; - spinlock_t rx_filter_lock; -}; -int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf); -void nfp_net_filter_stats_timer(unsigned long data); +int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, + bool old_prog); +int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf); +int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog); +int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index a88bb5bc0082..b6cee71f49d3 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -51,112 +51,114 @@ #include "../nfp_net_ctrl.h" #include "../nfp_net.h" -void nfp_net_filter_stats_timer(unsigned long data) +static int +nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, + unsigned int cnt) { - struct nfp_net *nn = (void *)data; - struct nfp_net_bpf_priv *priv; - struct nfp_stat_pair latest; - - priv = nn->app_priv; - - spin_lock_bh(&priv->rx_filter_lock); + unsigned int i; - if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) - mod_timer(&priv->rx_filter_stats_timer, - jiffies + NFP_NET_STAT_POLL_IVL); + for (i = 0; i < cnt; i++) { + struct nfp_insn_meta *meta; - spin_unlock_bh(&priv->rx_filter_lock); + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; - latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); - latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); + meta->insn = prog[i]; + meta->n = i; - if (latest.pkts != priv->rx_filter.pkts) - priv->rx_filter_change = jiffies; + list_add_tail(&meta->l, &nfp_prog->insns); + } - priv->rx_filter = latest; + return 0; } -static void nfp_net_bpf_stats_reset(struct nfp_net *nn) +static void nfp_prog_free(struct nfp_prog *nfp_prog) { - struct nfp_net_bpf_priv *priv = nn->app_priv; + struct nfp_insn_meta *meta, *tmp; - priv->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); - priv->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); - priv->rx_filter_prev = priv->rx_filter; - priv->rx_filter_change = jiffies; + list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { + list_del(&meta->l); + kfree(meta); + } + kfree(nfp_prog); } -static int -nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) { - struct nfp_net_bpf_priv *priv = nn->app_priv; - u64 bytes, pkts; + struct bpf_prog *prog = bpf->verifier.prog; + struct nfp_prog *nfp_prog; + int ret; - pkts = priv->rx_filter.pkts - priv->rx_filter_prev.pkts; - bytes = priv->rx_filter.bytes - priv->rx_filter_prev.bytes; - bytes -= pkts * ETH_HLEN; + nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); + if (!nfp_prog) + return -ENOMEM; + prog->aux->offload->dev_priv = nfp_prog; - priv->rx_filter_prev = priv->rx_filter; + INIT_LIST_HEAD(&nfp_prog->insns); + nfp_prog->type = prog->type; - tcf_exts_stats_update(cls_bpf->exts, - bytes, pkts, priv->rx_filter_change); + ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len); + if (ret) + goto err_free; - return 0; -} + nfp_prog->verifier_meta = nfp_prog_first_meta(nfp_prog); + bpf->verifier.ops = &nfp_bpf_analyzer_ops; -static int -nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) -{ - const struct tc_action *a; - LIST_HEAD(actions); + return 0; - if (!cls_bpf->exts) - return NN_ACT_XDP; +err_free: + nfp_prog_free(nfp_prog); - /* TC direct action */ - if (cls_bpf->exts_integrated) { - if (!tcf_exts_has_actions(cls_bpf->exts)) - return NN_ACT_DIRECT; + return ret; +} +int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog) +{ + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + unsigned int stack_size; + unsigned int max_instr; + + stack_size = nn_readb(nn, NFP_NET_CFG_BPF_STACK_SZ) * 64; + if (prog->aux->stack_depth > stack_size) { + nn_info(nn, "stack too large: program %dB > FW stack %dB\n", + prog->aux->stack_depth, stack_size); return -EOPNOTSUPP; } - /* TC legacy mode */ - if (!tcf_exts_has_one_action(cls_bpf->exts)) - return -EOPNOTSUPP; + nfp_prog->stack_depth = prog->aux->stack_depth; + nfp_prog->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + nfp_prog->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE); - tcf_exts_to_list(cls_bpf->exts, &actions); - list_for_each_entry(a, &actions, list) { - if (is_tcf_gact_shot(a)) - return NN_ACT_TC_DROP; + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + nfp_prog->__prog_alloc_len = max_instr * sizeof(u64); - if (is_tcf_mirred_egress_redirect(a) && - tcf_mirred_ifindex(a) == nn->dp.netdev->ifindex) - return NN_ACT_TC_REDIR; - } + nfp_prog->prog = kmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL); + if (!nfp_prog->prog) + return -ENOMEM; - return -EOPNOTSUPP; + return nfp_bpf_jit(nfp_prog); } -static int -nfp_net_bpf_offload_prepare(struct nfp_net *nn, - struct tc_cls_bpf_offload *cls_bpf, - struct nfp_bpf_result *res, - void **code, dma_addr_t *dma_addr, u16 max_instr) +int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog) { - unsigned int code_sz = max_instr * sizeof(u64); - enum nfp_bpf_action_type act; - u16 start_off, done_off; - unsigned int max_mtu; - int ret; + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - if (!IS_ENABLED(CONFIG_BPF_SYSCALL)) - return -EOPNOTSUPP; + kfree(nfp_prog->prog); + nfp_prog_free(nfp_prog); - ret = nfp_net_bpf_get_act(nn, cls_bpf); - if (ret < 0) - return ret; - act = ret; + return 0; +} + +static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) +{ + struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; + unsigned int max_mtu; + dma_addr_t dma_addr; + int err; max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; if (max_mtu < nn->dp.netdev->mtu) { @@ -164,134 +166,80 @@ nfp_net_bpf_offload_prepare(struct nfp_net *nn, return -EOPNOTSUPP; } - start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); - done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); - - *code = dma_zalloc_coherent(nn->dp.dev, code_sz, dma_addr, GFP_KERNEL); - if (!*code) + dma_addr = dma_map_single(nn->dp.dev, nfp_prog->prog, + nfp_prog->prog_len * sizeof(u64), + DMA_TO_DEVICE); + if (dma_mapping_error(nn->dp.dev, dma_addr)) return -ENOMEM; - ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off, - max_instr, res); - if (ret) - goto out; + nn_writew(nn, NFP_NET_CFG_BPF_SIZE, nfp_prog->prog_len); + nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr); - return 0; + /* Load up the JITed code */ + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); + if (err) + nn_err(nn, "FW command error while loading BPF: %d\n", err); -out: - dma_free_coherent(nn->dp.dev, code_sz, *code, *dma_addr); - return ret; + dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64), + DMA_TO_DEVICE); + + return err; } -static void -nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, - void *code, dma_addr_t dma_addr, - unsigned int code_sz, unsigned int n_instr, - bool dense_mode) +static void nfp_net_bpf_start(struct nfp_net *nn) { - struct nfp_net_bpf_priv *priv = nn->app_priv; - u64 bpf_addr = dma_addr; int err; - nn->dp.bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW); - - if (dense_mode) - bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX; - - nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr); - nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr); - - /* Load up the JITed code */ - err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); - if (err) - nn_err(nn, "FW command error while loading BPF: %d\n", err); - /* Enable passing packets through BPF function */ nn->dp.ctrl |= NFP_NET_CFG_CTRL_BPF; nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl); err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); if (err) nn_err(nn, "FW command error while enabling BPF: %d\n", err); - - dma_free_coherent(nn->dp.dev, code_sz, code, dma_addr); - - nfp_net_bpf_stats_reset(nn); - mod_timer(&priv->rx_filter_stats_timer, - jiffies + NFP_NET_STAT_POLL_IVL); } static int nfp_net_bpf_stop(struct nfp_net *nn) { - struct nfp_net_bpf_priv *priv = nn->app_priv; - if (!(nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF)) return 0; - spin_lock_bh(&priv->rx_filter_lock); nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_BPF; - spin_unlock_bh(&priv->rx_filter_lock); nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl); - del_timer_sync(&priv->rx_filter_stats_timer); - nn->dp.bpf_offload_skip_sw = 0; - return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); } -int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, + bool old_prog) { - struct nfp_bpf_result res; - dma_addr_t dma_addr; - u16 max_instr; - void *code; int err; - max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + if (prog && !prog->aux->offload) + return -EINVAL; - switch (cls_bpf->command) { - case TC_CLSBPF_REPLACE: - /* There is nothing stopping us from implementing seamless - * replace but the simple method of loading I adopted in - * the firmware does not handle atomic replace (i.e. we have to - * stop the BPF offload and re-enable it). Leaking-in a few - * frames which didn't have BPF applied in the hardware should - * be fine if software fallback is available, though. - */ - if (nn->dp.bpf_offload_skip_sw) - return -EBUSY; - - err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, - &dma_addr, max_instr); - if (err) - return err; - - nfp_net_bpf_stop(nn); - nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, - dma_addr, max_instr * sizeof(u64), - res.n_instr, res.dense_mode); - return 0; + if (prog && old_prog) { + u8 cap; - case TC_CLSBPF_ADD: - if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) + cap = nn_readb(nn, NFP_NET_CFG_BPF_CAP); + if (!(cap & NFP_NET_BPF_CAP_RELO)) { + nn_err(nn, "FW does not support live reload\n"); return -EBUSY; + } + } - err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, - &dma_addr, max_instr); - if (err) - return err; - - nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, - dma_addr, max_instr * sizeof(u64), - res.n_instr, res.dense_mode); - return 0; + /* Something else is loaded, different program type? */ + if (!old_prog && nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) + return -EBUSY; - case TC_CLSBPF_DESTROY: + if (old_prog && !prog) return nfp_net_bpf_stop(nn); - case TC_CLSBPF_STATS: - return nfp_net_bpf_stats_update(nn, cls_bpf); + err = nfp_net_bpf_load(nn, prog); + if (err) + return err; + + if (!old_prog) + nfp_net_bpf_start(nn); - default: - return -EOPNOTSUPP; - } + return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 5b783a91b115..8d43491ddd6b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -40,12 +40,6 @@ #include "main.h" -/* Analyzer/verifier definitions */ -struct nfp_bpf_analyzer_priv { - struct nfp_prog *prog; - struct nfp_insn_meta *meta; -}; - static struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int insn_idx, unsigned int n_insns) @@ -76,12 +70,12 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, static int nfp_bpf_check_exit(struct nfp_prog *nfp_prog, - const struct bpf_verifier_env *env) + struct bpf_verifier_env *env) { - const struct bpf_reg_state *reg0 = &env->cur_state.regs[0]; + const struct bpf_reg_state *reg0 = cur_regs(env) + BPF_REG_0; u64 imm; - if (nfp_prog->act == NN_ACT_XDP) + if (nfp_prog->type == BPF_PROG_TYPE_XDP) return 0; if (!(reg0->type == SCALAR_VALUE && tnum_is_const(reg0->var_off))) { @@ -94,13 +88,8 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, } imm = reg0->var_off.value; - if (nfp_prog->act != NN_ACT_DIRECT && imm != 0 && (imm & ~0U) != ~0U) { - pr_info("unsupported exit state: %d, imm: %llx\n", - reg0->type, imm); - return -EINVAL; - } - - if (nfp_prog->act == NN_ACT_DIRECT && imm <= TC_ACT_REDIRECT && + if (nfp_prog->type == BPF_PROG_TYPE_SCHED_CLS && + imm <= TC_ACT_REDIRECT && imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN && imm != TC_ACT_QUEUED) { pr_info("unsupported exit state: %d, imm: %llx\n", @@ -112,29 +101,76 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, } static int -nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog, - const struct bpf_verifier_env *env, u8 reg) +nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, + const struct bpf_reg_state *reg) { - if (env->cur_state.regs[reg].type != PTR_TO_CTX) + s32 old_off, new_off; + + if (!tnum_is_const(reg->var_off)) { + pr_info("variable ptr stack access\n"); return -EINVAL; + } - return 0; + if (meta->ptr.type == NOT_INIT) + return 0; + + old_off = meta->ptr.off + meta->ptr.var_off.value; + new_off = reg->off + reg->var_off.value; + + meta->ptr_not_const |= old_off != new_off; + + if (!meta->ptr_not_const) + return 0; + + if (old_off % 4 == new_off % 4) + return 0; + + pr_info("stack access changed location was:%d is:%d\n", + old_off, new_off); + return -EINVAL; } static int -nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) +nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct bpf_verifier_env *env, u8 reg_no) { - struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv; - struct nfp_insn_meta *meta = priv->meta; + const struct bpf_reg_state *reg = cur_regs(env) + reg_no; + int err; - meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len); - priv->meta = meta; + if (reg->type != PTR_TO_CTX && + reg->type != PTR_TO_STACK && + reg->type != PTR_TO_PACKET) { + pr_info("unsupported ptr type: %d\n", reg->type); + return -EINVAL; + } - if (meta->insn.src_reg == BPF_REG_10 || - meta->insn.dst_reg == BPF_REG_10) { - pr_err("stack not yet supported\n"); + if (reg->type == PTR_TO_STACK) { + err = nfp_bpf_check_stack_access(nfp_prog, meta, reg); + if (err) + return err; + } + + if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) { + pr_info("ptr type changed for instruction %d -> %d\n", + meta->ptr.type, reg->type); return -EINVAL; } + + meta->ptr = *reg; + + return 0; +} + +static int +nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) +{ + struct nfp_prog *nfp_prog = env->prog->aux->offload->dev_priv; + struct nfp_insn_meta *meta = nfp_prog->verifier_meta; + + meta = nfp_bpf_goto_meta(nfp_prog, meta, insn_idx, env->prog->len); + nfp_prog->verifier_meta = meta; + if (meta->insn.src_reg >= MAX_BPF_REG || meta->insn.dst_reg >= MAX_BPF_REG) { pr_err("program uses extended registers - jit hardening?\n"); @@ -142,37 +178,18 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) } if (meta->insn.code == (BPF_JMP | BPF_EXIT)) - return nfp_bpf_check_exit(priv->prog, env); + return nfp_bpf_check_exit(nfp_prog, env); if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM)) - return nfp_bpf_check_ctx_ptr(priv->prog, env, - meta->insn.src_reg); + return nfp_bpf_check_ptr(nfp_prog, meta, env, + meta->insn.src_reg); if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM)) - return nfp_bpf_check_ctx_ptr(priv->prog, env, - meta->insn.dst_reg); + return nfp_bpf_check_ptr(nfp_prog, meta, env, + meta->insn.dst_reg); return 0; } -static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { +const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { .insn_hook = nfp_verify_insn, }; - -int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog) -{ - struct nfp_bpf_analyzer_priv *priv; - int ret; - - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; - - priv->prog = nfp_prog; - priv->meta = nfp_prog_first_meta(nfp_prog); - - ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv); - - kfree(priv); - - return ret; -} diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 8ea9320014ee..c1c595f8bb87 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -36,7 +36,9 @@ #include <net/switchdev.h> #include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_vlan.h> +#include <net/tc_act/tc_tunnel_key.h> #include "cmsg.h" #include "main.h" @@ -45,13 +47,9 @@ static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan) { size_t act_size = sizeof(struct nfp_fl_pop_vlan); - u16 tmp_pop_vlan_op; - tmp_pop_vlan_op = - FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | - FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_POP_VLAN); - - pop_vlan->a_op = cpu_to_be16(tmp_pop_vlan_op); + pop_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_POP_VLAN; + pop_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ; pop_vlan->reserved = 0; } @@ -60,64 +58,373 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, const struct tc_action *action) { size_t act_size = sizeof(struct nfp_fl_push_vlan); - struct tcf_vlan *vlan = to_vlan(action); u16 tmp_push_vlan_tci; - u16 tmp_push_vlan_op; - - tmp_push_vlan_op = - FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | - FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PUSH_VLAN); - push_vlan->a_op = cpu_to_be16(tmp_push_vlan_op); - /* Set action push vlan parameters. */ + push_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_VLAN; + push_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ; push_vlan->reserved = 0; push_vlan->vlan_tpid = tcf_vlan_push_proto(action); tmp_push_vlan_tci = - FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, vlan->tcfv_push_prio) | - FIELD_PREP(NFP_FL_PUSH_VLAN_VID, vlan->tcfv_push_vid) | + FIELD_PREP(NFP_FL_PUSH_VLAN_PRIO, tcf_vlan_push_prio(action)) | + FIELD_PREP(NFP_FL_PUSH_VLAN_VID, tcf_vlan_push_vid(action)) | NFP_FL_PUSH_VLAN_CFI; push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); } +static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev, + enum nfp_flower_tun_type tun_type) +{ + if (!out_dev->rtnl_link_ops) + return false; + + if (!strcmp(out_dev->rtnl_link_ops->kind, "vxlan")) + return tun_type == NFP_FL_TUNNEL_VXLAN; + + return false; +} + static int nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, struct nfp_fl_payload *nfp_flow, bool last, - struct net_device *in_dev) + struct net_device *in_dev, enum nfp_flower_tun_type tun_type, + int *tun_out_cnt) { size_t act_size = sizeof(struct nfp_fl_output); struct net_device *out_dev; - u16 tmp_output_op; + u16 tmp_flags; int ifindex; - /* Set action opcode to output action. */ - tmp_output_op = - FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | - FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_OUTPUT); - - output->a_op = cpu_to_be16(tmp_output_op); - - /* Set action output parameters. */ - output->flags = cpu_to_be16(last ? NFP_FL_OUT_FLAGS_LAST : 0); + output->head.jump_id = NFP_FL_ACTION_OPCODE_OUTPUT; + output->head.len_lw = act_size >> NFP_FL_LW_SIZ; ifindex = tcf_mirred_ifindex(action); out_dev = __dev_get_by_index(dev_net(in_dev), ifindex); if (!out_dev) return -EOPNOTSUPP; - /* Only offload egress ports are on the same device as the ingress - * port. + tmp_flags = last ? NFP_FL_OUT_FLAGS_LAST : 0; + + if (tun_type) { + /* Verify the egress netdev matches the tunnel type. */ + if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type)) + return -EOPNOTSUPP; + + if (*tun_out_cnt) + return -EOPNOTSUPP; + (*tun_out_cnt)++; + + output->flags = cpu_to_be16(tmp_flags | + NFP_FL_OUT_FLAGS_USE_TUN); + output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); + } else { + /* Set action output parameters. */ + output->flags = cpu_to_be16(tmp_flags); + + /* Only offload if egress ports are on the same device as the + * ingress port. + */ + if (!switchdev_port_same_parent_id(in_dev, out_dev)) + return -EOPNOTSUPP; + if (!nfp_netdev_is_nfp_repr(out_dev)) + return -EOPNOTSUPP; + + output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev)); + if (!output->port) + return -EOPNOTSUPP; + } + nfp_flow->meta.shortcut = output->port; + + return 0; +} + +static bool nfp_fl_supported_tun_port(const struct tc_action *action) +{ + struct ip_tunnel_info *tun = tcf_tunnel_info(action); + + return tun->key.tp_dst == htons(NFP_FL_VXLAN_PORT); +} + +static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) +{ + size_t act_size = sizeof(struct nfp_fl_pre_tunnel); + struct nfp_fl_pre_tunnel *pre_tun_act; + + /* Pre_tunnel action must be first on action list. + * If other actions already exist they need pushed forward. */ - if (!switchdev_port_same_parent_id(in_dev, out_dev)) + if (act_len) + memmove(act_data + act_size, act_data, act_len); + + pre_tun_act = (struct nfp_fl_pre_tunnel *)act_data; + + memset(pre_tun_act, 0, act_size); + + pre_tun_act->head.jump_id = NFP_FL_ACTION_OPCODE_PRE_TUNNEL; + pre_tun_act->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + return pre_tun_act; +} + +static int +nfp_fl_set_vxlan(struct nfp_fl_set_vxlan *set_vxlan, + const struct tc_action *action, + struct nfp_fl_pre_tunnel *pre_tun) +{ + struct ip_tunnel_info *vxlan = tcf_tunnel_info(action); + size_t act_size = sizeof(struct nfp_fl_set_vxlan); + u32 tmp_set_vxlan_type_index = 0; + /* Currently support one pre-tunnel so index is always 0. */ + int pretun_idx = 0; + + if (vxlan->options_len) { + /* Do not support options e.g. vxlan gpe. */ return -EOPNOTSUPP; - if (!nfp_netdev_is_nfp_repr(out_dev)) + } + + set_vxlan->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; + set_vxlan->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + /* Set tunnel type and pre-tunnel index. */ + tmp_set_vxlan_type_index |= + FIELD_PREP(NFP_FL_IPV4_TUNNEL_TYPE, NFP_FL_TUNNEL_VXLAN) | + FIELD_PREP(NFP_FL_IPV4_PRE_TUN_INDEX, pretun_idx); + + set_vxlan->tun_type_index = cpu_to_be32(tmp_set_vxlan_type_index); + + set_vxlan->tun_id = vxlan->key.tun_id; + set_vxlan->tun_flags = vxlan->key.tun_flags; + set_vxlan->ipv4_ttl = vxlan->key.ttl; + set_vxlan->ipv4_tos = vxlan->key.tos; + + /* Complete pre_tunnel action. */ + pre_tun->ipv4_dst = vxlan->key.u.ipv4.dst; + + return 0; +} + +static void nfp_fl_set_helper32(u32 value, u32 mask, u8 *p_exact, u8 *p_mask) +{ + u32 oldvalue = get_unaligned((u32 *)p_exact); + u32 oldmask = get_unaligned((u32 *)p_mask); + + value &= mask; + value |= oldvalue & ~mask; + + put_unaligned(oldmask | mask, (u32 *)p_mask); + put_unaligned(value, (u32 *)p_exact); +} + +static int +nfp_fl_set_eth(const struct tc_action *action, int idx, u32 off, + struct nfp_fl_set_eth *set_eth) +{ + u32 exact, mask; + + if (off + 4 > ETH_ALEN * 2) return -EOPNOTSUPP; - output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev)); - if (!output->port) + mask = ~tcf_pedit_mask(action, idx); + exact = tcf_pedit_val(action, idx); + + if (exact & ~mask) return -EOPNOTSUPP; - nfp_flow->meta.shortcut = output->port; + nfp_fl_set_helper32(exact, mask, &set_eth->eth_addr_val[off], + &set_eth->eth_addr_mask[off]); + + set_eth->reserved = cpu_to_be16(0); + set_eth->head.jump_id = NFP_FL_ACTION_OPCODE_SET_ETHERNET; + set_eth->head.len_lw = sizeof(*set_eth) >> NFP_FL_LW_SIZ; + + return 0; +} + +static int +nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off, + struct nfp_fl_set_ip4_addrs *set_ip_addr) +{ + __be32 exact, mask; + + /* We are expecting tcf_pedit to return a big endian value */ + mask = (__force __be32)~tcf_pedit_mask(action, idx); + exact = (__force __be32)tcf_pedit_val(action, idx); + + if (exact & ~mask) + return -EOPNOTSUPP; + + switch (off) { + case offsetof(struct iphdr, daddr): + set_ip_addr->ipv4_dst_mask = mask; + set_ip_addr->ipv4_dst = exact; + break; + case offsetof(struct iphdr, saddr): + set_ip_addr->ipv4_src_mask = mask; + set_ip_addr->ipv4_src = exact; + break; + default: + return -EOPNOTSUPP; + } + + set_ip_addr->reserved = cpu_to_be16(0); + set_ip_addr->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS; + set_ip_addr->head.len_lw = sizeof(*set_ip_addr) >> NFP_FL_LW_SIZ; + + return 0; +} + +static void +nfp_fl_set_ip6_helper(int opcode_tag, int idx, __be32 exact, __be32 mask, + struct nfp_fl_set_ipv6_addr *ip6) +{ + ip6->ipv6[idx % 4].mask = mask; + ip6->ipv6[idx % 4].exact = exact; + + ip6->reserved = cpu_to_be16(0); + ip6->head.jump_id = opcode_tag; + ip6->head.len_lw = sizeof(*ip6) >> NFP_FL_LW_SIZ; +} + +static int +nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off, + struct nfp_fl_set_ipv6_addr *ip_dst, + struct nfp_fl_set_ipv6_addr *ip_src) +{ + __be32 exact, mask; + + /* We are expecting tcf_pedit to return a big endian value */ + mask = (__force __be32)~tcf_pedit_mask(action, idx); + exact = (__force __be32)tcf_pedit_val(action, idx); + + if (exact & ~mask) + return -EOPNOTSUPP; + + if (off < offsetof(struct ipv6hdr, saddr)) + return -EOPNOTSUPP; + else if (off < offsetof(struct ipv6hdr, daddr)) + nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, idx, + exact, mask, ip_src); + else if (off < offsetof(struct ipv6hdr, daddr) + + sizeof(struct in6_addr)) + nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, idx, + exact, mask, ip_dst); + else + return -EOPNOTSUPP; + + return 0; +} + +static int +nfp_fl_set_tport(const struct tc_action *action, int idx, u32 off, + struct nfp_fl_set_tport *set_tport, int opcode) +{ + u32 exact, mask; + + if (off) + return -EOPNOTSUPP; + + mask = ~tcf_pedit_mask(action, idx); + exact = tcf_pedit_val(action, idx); + + if (exact & ~mask) + return -EOPNOTSUPP; + + nfp_fl_set_helper32(exact, mask, set_tport->tp_port_val, + set_tport->tp_port_mask); + + set_tport->reserved = cpu_to_be16(0); + set_tport->head.jump_id = opcode; + set_tport->head.len_lw = sizeof(*set_tport) >> NFP_FL_LW_SIZ; + + return 0; +} + +static int +nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len) +{ + struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src; + struct nfp_fl_set_ip4_addrs set_ip_addr; + struct nfp_fl_set_tport set_tport; + struct nfp_fl_set_eth set_eth; + enum pedit_header_type htype; + int idx, nkeys, err; + size_t act_size; + u32 offset, cmd; + + memset(&set_ip6_dst, 0, sizeof(set_ip6_dst)); + memset(&set_ip6_src, 0, sizeof(set_ip6_src)); + memset(&set_ip_addr, 0, sizeof(set_ip_addr)); + memset(&set_tport, 0, sizeof(set_tport)); + memset(&set_eth, 0, sizeof(set_eth)); + nkeys = tcf_pedit_nkeys(action); + + for (idx = 0; idx < nkeys; idx++) { + cmd = tcf_pedit_cmd(action, idx); + htype = tcf_pedit_htype(action, idx); + offset = tcf_pedit_offset(action, idx); + + if (cmd != TCA_PEDIT_KEY_EX_CMD_SET) + return -EOPNOTSUPP; + + switch (htype) { + case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: + err = nfp_fl_set_eth(action, idx, offset, &set_eth); + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: + err = nfp_fl_set_ip4(action, idx, offset, &set_ip_addr); + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: + err = nfp_fl_set_ip6(action, idx, offset, &set_ip6_dst, + &set_ip6_src); + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: + err = nfp_fl_set_tport(action, idx, offset, &set_tport, + NFP_FL_ACTION_OPCODE_SET_TCP); + break; + case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: + err = nfp_fl_set_tport(action, idx, offset, &set_tport, + NFP_FL_ACTION_OPCODE_SET_UDP); + break; + default: + return -EOPNOTSUPP; + } + if (err) + return err; + } + + if (set_eth.head.len_lw) { + act_size = sizeof(set_eth); + memcpy(nfp_action, &set_eth, act_size); + *a_len += act_size; + } else if (set_ip_addr.head.len_lw) { + act_size = sizeof(set_ip_addr); + memcpy(nfp_action, &set_ip_addr, act_size); + *a_len += act_size; + } else if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) { + /* TC compiles set src and dst IPv6 address as a single action, + * the hardware requires this to be 2 separate actions. + */ + act_size = sizeof(set_ip6_src); + memcpy(nfp_action, &set_ip6_src, act_size); + *a_len += act_size; + + act_size = sizeof(set_ip6_dst); + memcpy(&nfp_action[sizeof(set_ip6_src)], &set_ip6_dst, + act_size); + *a_len += act_size; + } else if (set_ip6_dst.head.len_lw) { + act_size = sizeof(set_ip6_dst); + memcpy(nfp_action, &set_ip6_dst, act_size); + *a_len += act_size; + } else if (set_ip6_src.head.len_lw) { + act_size = sizeof(set_ip6_src); + memcpy(nfp_action, &set_ip6_src, act_size); + *a_len += act_size; + } else if (set_tport.head.len_lw) { + act_size = sizeof(set_tport); + memcpy(nfp_action, &set_tport, act_size); + *a_len += act_size; + } return 0; } @@ -125,8 +432,11 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, static int nfp_flower_loop_action(const struct tc_action *a, struct nfp_fl_payload *nfp_fl, int *a_len, - struct net_device *netdev) + struct net_device *netdev, + enum nfp_flower_tun_type *tun_type, int *tun_out_cnt) { + struct nfp_fl_pre_tunnel *pre_tun; + struct nfp_fl_set_vxlan *s_vxl; struct nfp_fl_push_vlan *psh_v; struct nfp_fl_pop_vlan *pop_v; struct nfp_fl_output *output; @@ -139,7 +449,8 @@ nfp_flower_loop_action(const struct tc_action *a, return -EOPNOTSUPP; output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_output(output, a, nfp_fl, true, netdev); + err = nfp_fl_output(output, a, nfp_fl, true, netdev, *tun_type, + tun_out_cnt); if (err) return err; @@ -149,7 +460,8 @@ nfp_flower_loop_action(const struct tc_action *a, return -EOPNOTSUPP; output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_output(output, a, nfp_fl, false, netdev); + err = nfp_fl_output(output, a, nfp_fl, false, netdev, *tun_type, + tun_out_cnt); if (err) return err; @@ -172,6 +484,32 @@ nfp_flower_loop_action(const struct tc_action *a, nfp_fl_push_vlan(psh_v, a); *a_len += sizeof(struct nfp_fl_push_vlan); + } else if (is_tcf_tunnel_set(a) && nfp_fl_supported_tun_port(a)) { + /* Pre-tunnel action is required for tunnel encap. + * This checks for next hop entries on NFP. + * If none, the packet falls back before applying other actions. + */ + if (*a_len + sizeof(struct nfp_fl_pre_tunnel) + + sizeof(struct nfp_fl_set_vxlan) > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + *tun_type = NFP_FL_TUNNEL_VXLAN; + pre_tun = nfp_fl_pre_tunnel(nfp_fl->action_data, *a_len); + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + *a_len += sizeof(struct nfp_fl_pre_tunnel); + + s_vxl = (struct nfp_fl_set_vxlan *)&nfp_fl->action_data[*a_len]; + err = nfp_fl_set_vxlan(s_vxl, a, pre_tun); + if (err) + return err; + + *a_len += sizeof(struct nfp_fl_set_vxlan); + } else if (is_tcf_tunnel_release(a)) { + /* Tunnel decap is handled by default so accept action. */ + return 0; + } else if (is_tcf_pedit(a)) { + if (nfp_fl_pedit(a, &nfp_fl->action_data[*a_len], a_len)) + return -EOPNOTSUPP; } else { /* Currently we do not handle any other actions. */ return -EOPNOTSUPP; @@ -184,18 +522,22 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow, struct net_device *netdev, struct nfp_fl_payload *nfp_flow) { - int act_len, act_cnt, err; + int act_len, act_cnt, err, tun_out_cnt; + enum nfp_flower_tun_type tun_type; const struct tc_action *a; LIST_HEAD(actions); memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); nfp_flow->meta.act_len = 0; + tun_type = NFP_FL_TUNNEL_NONE; act_len = 0; act_cnt = 0; + tun_out_cnt = 0; tcf_exts_to_list(flow->exts, &actions); list_for_each_entry(a, &actions, list) { - err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev); + err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev, + &tun_type, &tun_out_cnt); if (err) return err; act_cnt++; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index c3ca05d10fe1..e98bb9cdb6a3 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -38,17 +38,10 @@ #include <net/dst_metadata.h> #include "main.h" -#include "../nfpcore/nfp_cpp.h" #include "../nfp_net.h" #include "../nfp_net_repr.h" #include "./cmsg.h" -#define nfp_flower_cmsg_warn(app, fmt, args...) \ - do { \ - if (net_ratelimit()) \ - nfp_warn((app)->cpp, fmt, ## args); \ - } while (0) - static struct nfp_flower_cmsg_hdr * nfp_flower_cmsg_get_hdr(struct sk_buff *skb) { @@ -57,14 +50,14 @@ nfp_flower_cmsg_get_hdr(struct sk_buff *skb) struct sk_buff * nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size, - enum nfp_flower_cmsg_type_port type) + enum nfp_flower_cmsg_type_port type, gfp_t flag) { struct nfp_flower_cmsg_hdr *ch; struct sk_buff *skb; size += NFP_FLOWER_CMSG_HLEN; - skb = nfp_app_ctrl_msg_alloc(app, size, GFP_KERNEL); + skb = nfp_app_ctrl_msg_alloc(app, size, flag); if (!skb) return NULL; @@ -85,7 +78,8 @@ nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports) unsigned int size; size = sizeof(*msg) + num_ports * sizeof(msg->ports[0]); - skb = nfp_flower_cmsg_alloc(app, size, NFP_FLOWER_CMSG_TYPE_MAC_REPR); + skb = nfp_flower_cmsg_alloc(app, size, NFP_FLOWER_CMSG_TYPE_MAC_REPR, + GFP_KERNEL); if (!skb) return NULL; @@ -116,7 +110,7 @@ int nfp_flower_cmsg_portmod(struct nfp_repr *repr, bool carrier_ok) struct sk_buff *skb; skb = nfp_flower_cmsg_alloc(repr->app, sizeof(*msg), - NFP_FLOWER_CMSG_TYPE_PORT_MOD); + NFP_FLOWER_CMSG_TYPE_PORT_MOD, GFP_KERNEL); if (!skb) return -ENOMEM; @@ -188,6 +182,15 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) case NFP_FLOWER_CMSG_TYPE_FLOW_STATS: nfp_flower_rx_flow_stats(app, skb); break; + case NFP_FLOWER_CMSG_TYPE_NO_NEIGH: + nfp_tunnel_request_route(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS: + nfp_tunnel_keep_alive(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_TUN_NEIGH: + /* Acks from the NFP that the route is added - ignore. */ + break; default: nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n", type); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index a2ec60344236..66070741d55f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -39,6 +39,7 @@ #include <linux/types.h> #include "../nfp_app.h" +#include "../nfpcore/nfp_cpp.h" #define NFP_FLOWER_LAYER_META BIT(0) #define NFP_FLOWER_LAYER_PORT BIT(1) @@ -56,6 +57,11 @@ #define NFP_FLOWER_MASK_VLAN_CFI BIT(12) #define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0) +#define NFP_FLOWER_MASK_MPLS_LB GENMASK(31, 12) +#define NFP_FLOWER_MASK_MPLS_TC GENMASK(11, 9) +#define NFP_FLOWER_MASK_MPLS_BOS BIT(8) +#define NFP_FLOWER_MASK_MPLS_Q BIT(0) + #define NFP_FL_SC_ACT_DROP 0x80000000 #define NFP_FL_SC_ACT_USER 0x7D000000 #define NFP_FL_SC_ACT_POPV 0x6A000000 @@ -67,13 +73,18 @@ #define NFP_FL_LW_SIZ 2 /* Action opcodes */ -#define NFP_FL_ACTION_OPCODE_OUTPUT 0 -#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 -#define NFP_FL_ACTION_OPCODE_POP_VLAN 2 -#define NFP_FL_ACTION_OPCODE_NUM 32 - -#define NFP_FL_ACT_JMP_ID GENMASK(15, 8) -#define NFP_FL_ACT_LEN_LW GENMASK(7, 0) +#define NFP_FL_ACTION_OPCODE_OUTPUT 0 +#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 +#define NFP_FL_ACTION_OPCODE_POP_VLAN 2 +#define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL 6 +#define NFP_FL_ACTION_OPCODE_SET_ETHERNET 7 +#define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS 9 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_SRC 11 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_DST 12 +#define NFP_FL_ACTION_OPCODE_SET_UDP 14 +#define NFP_FL_ACTION_OPCODE_SET_TCP 15 +#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_NUM 32 #define NFP_FL_OUT_FLAGS_LAST BIT(15) #define NFP_FL_OUT_FLAGS_USE_TUN BIT(4) @@ -83,21 +94,74 @@ #define NFP_FL_PUSH_VLAN_CFI BIT(12) #define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) +/* Tunnel ports */ +#define NFP_FL_PORT_TYPE_TUN 0x50000000 +#define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4) +#define NFP_FL_IPV4_PRE_TUN_INDEX GENMASK(2, 0) + +#define nfp_flower_cmsg_warn(app, fmt, args...) \ + do { \ + if (net_ratelimit()) \ + nfp_warn((app)->cpp, fmt, ## args); \ + } while (0) + +enum nfp_flower_tun_type { + NFP_FL_TUNNEL_NONE = 0, + NFP_FL_TUNNEL_VXLAN = 2, +}; + +struct nfp_fl_act_head { + u8 jump_id; + u8 len_lw; +}; + +struct nfp_fl_set_eth { + struct nfp_fl_act_head head; + __be16 reserved; + u8 eth_addr_mask[ETH_ALEN * 2]; + u8 eth_addr_val[ETH_ALEN * 2]; +}; + +struct nfp_fl_set_ip4_addrs { + struct nfp_fl_act_head head; + __be16 reserved; + __be32 ipv4_src_mask; + __be32 ipv4_src; + __be32 ipv4_dst_mask; + __be32 ipv4_dst; +}; + +struct nfp_fl_set_ipv6_addr { + struct nfp_fl_act_head head; + __be16 reserved; + struct { + __be32 mask; + __be32 exact; + } ipv6[4]; +}; + +struct nfp_fl_set_tport { + struct nfp_fl_act_head head; + __be16 reserved; + u8 tp_port_mask[4]; + u8 tp_port_val[4]; +}; + struct nfp_fl_output { - __be16 a_op; + struct nfp_fl_act_head head; __be16 flags; __be32 port; }; struct nfp_fl_push_vlan { - __be16 a_op; + struct nfp_fl_act_head head; __be16 reserved; __be16 vlan_tpid; __be16 vlan_tci; }; struct nfp_fl_pop_vlan { - __be16 a_op; + struct nfp_fl_act_head head; __be16 reserved; }; @@ -115,6 +179,25 @@ struct nfp_flower_meta_one { u16 reserved; }; +struct nfp_fl_pre_tunnel { + struct nfp_fl_act_head head; + __be16 reserved; + __be32 ipv4_dst; + /* reserved for use with IPv6 addresses */ + __be32 extra[3]; +}; + +struct nfp_fl_set_vxlan { + struct nfp_fl_act_head head; + __be16 reserved; + __be64 tun_id; + __be32 tun_type_index; + __be16 tun_flags; + u8 ipv4_ttl; + u8 ipv4_tos; + __be32 extra[2]; +} __packed; + /* Metadata with L2 (1W/4B) * ---------------------------------------------------------------- * 3 2 1 @@ -230,6 +313,36 @@ struct nfp_flower_ipv6 { struct in6_addr ipv6_dst; }; +/* Flow Frame VXLAN --> Tunnel details (4W/16B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_src | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | tun_flags | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | gpe_flags | Reserved | Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VNI | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_vxlan { + __be32 ip_src; + __be32 ip_dst; + __be16 tun_flags; + u8 tos; + u8 ttl; + u8 gpe_flags; + u8 reserved[2]; + u8 nxt_proto; + __be32 tun_id; +}; + +#define NFP_FL_TUN_VNI_OFFSET 8 + /* The base header for a control message packet. * Defines an 8-bit version, and an 8-bit type, padded * to a 32-bit word. Rest of the packet is type-specific. @@ -249,6 +362,11 @@ enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2, NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7, NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, + NFP_FLOWER_CMSG_TYPE_NO_NEIGH = 10, + NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11, + NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS = 12, + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH = 13, + NFP_FLOWER_CMSG_TYPE_TUN_IPS = 14, NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15, NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16, NFP_FLOWER_CMSG_TYPE_MAX = 32, @@ -282,6 +400,7 @@ enum nfp_flower_cmsg_port_type { NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC = 0x0, NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT = 0x1, NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT = 0x2, + NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT = 0x3, }; enum nfp_flower_cmsg_port_vnic_type { @@ -323,6 +442,11 @@ static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb) return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN; } +static inline int nfp_flower_cmsg_get_data_len(struct sk_buff *skb) +{ + return skb->len - NFP_FLOWER_CMSG_HLEN; +} + struct sk_buff * nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports); void @@ -334,6 +458,6 @@ void nfp_flower_cmsg_process_rx(struct work_struct *work); void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb); struct sk_buff * nfp_flower_cmsg_alloc(struct nfp_app *app, unsigned int size, - enum nfp_flower_cmsg_type_port type); + enum nfp_flower_cmsg_type_port type, gfp_t flag); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index 91fe03617106..e0283bb24f06 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -142,8 +142,8 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, { u8 nfp_pcie = nfp_cppcore_pcie_unit(app->pf->cpp); struct nfp_flower_priv *priv = app->priv; - struct nfp_reprs *reprs, *old_reprs; enum nfp_port_type port_type; + struct nfp_reprs *reprs; const u8 queue = 0; int i, err; @@ -194,11 +194,7 @@ nfp_flower_spawn_vnic_reprs(struct nfp_app *app, reprs->reprs[i]->name); } - old_reprs = nfp_app_reprs_set(app, repr_type, reprs); - if (IS_ERR(old_reprs)) { - err = PTR_ERR(old_reprs); - goto err_reprs_clean; - } + nfp_app_reprs_set(app, repr_type, reprs); return 0; err_reprs_clean: @@ -222,8 +218,8 @@ static int nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) { struct nfp_eth_table *eth_tbl = app->pf->eth_tbl; - struct nfp_reprs *reprs, *old_reprs; struct sk_buff *ctrl_skb; + struct nfp_reprs *reprs; unsigned int i; int err; @@ -280,11 +276,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) phys_port, reprs->reprs[phys_port]->name); } - old_reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); - if (IS_ERR(old_reprs)) { - err = PTR_ERR(old_reprs); - goto err_reprs_clean; - } + nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); /* The MAC_REPR control message should be sent after the MAC * representors are registered using nfp_app_reprs_set(). This is @@ -436,6 +428,16 @@ static void nfp_flower_clean(struct nfp_app *app) app->priv = NULL; } +static int nfp_flower_start(struct nfp_app *app) +{ + return nfp_tunnel_config_start(app); +} + +static void nfp_flower_stop(struct nfp_app *app) +{ + nfp_tunnel_config_stop(app); +} + const struct nfp_app_type app_flower = { .id = NFP_APP_FLOWER_NIC, .name = "flower", @@ -453,6 +455,9 @@ const struct nfp_app_type app_flower = { .repr_open = nfp_flower_repr_netdev_open, .repr_stop = nfp_flower_repr_netdev_stop, + .start = nfp_flower_start, + .stop = nfp_flower_stop, + .ctrl_msg_rx = nfp_flower_cmsg_rx, .sriov_enable = nfp_flower_sriov_enable, diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index c20dd00a1cae..c90e72b7ff5a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -58,6 +58,8 @@ struct nfp_app; #define NFP_FL_MASK_REUSE_TIME_NS 40000 #define NFP_FL_MASK_ID_LOCATION 1 +#define NFP_FL_VXLAN_PORT 4789 + struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; struct timespec64 *last_used; @@ -82,6 +84,18 @@ struct nfp_fl_stats_id { * @flow_table: Hash table used to store flower rules * @cmsg_work: Workqueue for control messages processing * @cmsg_skbs: List of skbs for control message processing + * @nfp_mac_off_list: List of MAC addresses to offload + * @nfp_mac_index_list: List of unique 8-bit indexes for non NFP netdevs + * @nfp_ipv4_off_list: List of IPv4 addresses to offload + * @nfp_neigh_off_list: List of neighbour offloads + * @nfp_mac_off_lock: Lock for the MAC address list + * @nfp_mac_index_lock: Lock for the MAC index list + * @nfp_ipv4_off_lock: Lock for the IPv4 address list + * @nfp_neigh_off_lock: Lock for the neighbour address list + * @nfp_mac_off_ids: IDA to manage id assignment for offloaded macs + * @nfp_mac_off_count: Number of MACs in address list + * @nfp_tun_mac_nb: Notifier to monitor link state + * @nfp_tun_neigh_nb: Notifier to monitor neighbour state */ struct nfp_flower_priv { struct nfp_app *app; @@ -94,6 +108,18 @@ struct nfp_flower_priv { DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS); struct work_struct cmsg_work; struct sk_buff_head cmsg_skbs; + struct list_head nfp_mac_off_list; + struct list_head nfp_mac_index_list; + struct list_head nfp_ipv4_off_list; + struct list_head nfp_neigh_off_list; + struct mutex nfp_mac_off_lock; + struct mutex nfp_mac_index_lock; + struct mutex nfp_ipv4_off_lock; + spinlock_t nfp_neigh_off_lock; + struct ida nfp_mac_off_ids; + int nfp_mac_off_count; + struct notifier_block nfp_tun_mac_nb; + struct notifier_block nfp_tun_neigh_nb; }; struct nfp_fl_key_ls { @@ -126,6 +152,7 @@ struct nfp_fl_payload { struct rcu_head rcu; spinlock_t lock; /* lock stats */ struct nfp_fl_stats stats; + __be32 nfp_tun_ipv4_addr; char *unmasked_data; char *mask_data; char *action_data; @@ -163,4 +190,12 @@ nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie); void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb); +int nfp_tunnel_config_start(struct nfp_app *app); +void nfp_tunnel_config_stop(struct nfp_app *app); +void nfp_tunnel_write_macs(struct nfp_app *app); +void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); +void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); +void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb); + #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index d25b5038c3a2..60614d4f0e22 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -77,14 +77,17 @@ nfp_flower_compile_meta(struct nfp_flower_meta_one *frame, u8 key_type) static int nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, - bool mask_version) + bool mask_version, enum nfp_flower_tun_type tun_type) { if (mask_version) { frame->in_port = cpu_to_be32(~0); return 0; } - frame->in_port = cpu_to_be32(cmsg_port); + if (tun_type) + frame->in_port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); + else + frame->in_port = cpu_to_be32(cmsg_port); return 0; } @@ -108,8 +111,21 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame, ether_addr_copy(frame->mac_src, &addr->src[0]); } - if (mask_version) - frame->mpls_lse = cpu_to_be32(~0); + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_MPLS)) { + struct flow_dissector_key_mpls *mpls; + u32 t_mpls; + + mpls = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_MPLS, + target); + + t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, mpls->mpls_label) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, mpls->mpls_tc) | + FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, mpls->mpls_bos) | + NFP_FLOWER_MASK_MPLS_Q; + + frame->mpls_lse = cpu_to_be32(t_mpls); + } } static void @@ -140,7 +156,6 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame, struct flow_dissector_key_ipv4_addrs *addr; struct flow_dissector_key_basic *basic; - /* Wildcard TOS/TTL for now. */ memset(frame, 0, sizeof(struct nfp_flower_ipv4)); if (dissector_uses_key(flow->dissector, @@ -158,6 +173,16 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame, target); frame->proto = basic->ip_proto; } + + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) { + struct flow_dissector_key_ip *flow_ip; + + flow_ip = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_IP, + target); + frame->tos = flow_ip->tos; + frame->ttl = flow_ip->ttl; + } } static void @@ -169,7 +194,6 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, struct flow_dissector_key_ipv6_addrs *addr; struct flow_dissector_key_basic *basic; - /* Wildcard LABEL/TOS/TTL for now. */ memset(frame, 0, sizeof(struct nfp_flower_ipv6)); if (dissector_uses_key(flow->dissector, @@ -187,6 +211,51 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, target); frame->proto = basic->ip_proto; } + + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) { + struct flow_dissector_key_ip *flow_ip; + + flow_ip = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_IP, + target); + frame->tos = flow_ip->tos; + frame->ttl = flow_ip->ttl; + } +} + +static void +nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame, + struct tc_cls_flower_offload *flow, + bool mask_version, __be32 *tun_dst) +{ + struct fl_flow_key *target = mask_version ? flow->mask : flow->key; + struct flow_dissector_key_ipv4_addrs *vxlan_ips; + struct flow_dissector_key_keyid *vni; + + /* Wildcard TOS/TTL/GPE_FLAGS/NXT_PROTO for now. */ + memset(frame, 0, sizeof(struct nfp_flower_vxlan)); + + if (dissector_uses_key(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID)) { + u32 temp_vni; + + vni = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + target); + temp_vni = be32_to_cpu(vni->keyid) << NFP_FL_TUN_VNI_OFFSET; + frame->tun_id = cpu_to_be32(temp_vni); + } + + if (dissector_uses_key(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + vxlan_ips = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + target); + frame->ip_src = vxlan_ips->src; + frame->ip_dst = vxlan_ips->dst; + *tun_dst = vxlan_ips->dst; + } } int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, @@ -194,10 +263,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, struct net_device *netdev, struct nfp_fl_payload *nfp_flow) { + enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; + __be32 tun_dst, tun_dst_mask = 0; + struct nfp_repr *netdev_repr; int err; u8 *ext; u8 *msk; + if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) + tun_type = NFP_FL_TUNNEL_VXLAN; + memset(nfp_flow->unmasked_data, 0, key_ls->key_size); memset(nfp_flow->mask_data, 0, key_ls->key_size); @@ -216,14 +291,14 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, /* Populate Exact Port data. */ err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext, nfp_repr_get_port_id(netdev), - false); + false, tun_type); if (err) return err; /* Populate Mask Port Data. */ err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk, nfp_repr_get_port_id(netdev), - true); + true, tun_type); if (err) return err; @@ -291,5 +366,28 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, msk += sizeof(struct nfp_flower_ipv6); } + if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) { + /* Populate Exact VXLAN Data. */ + nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)ext, + flow, false, &tun_dst); + /* Populate Mask VXLAN Data. */ + nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)msk, + flow, true, &tun_dst_mask); + ext += sizeof(struct nfp_flower_vxlan); + msk += sizeof(struct nfp_flower_vxlan); + + /* Configure tunnel end point MAC. */ + if (nfp_netdev_is_nfp_repr(netdev)) { + netdev_repr = netdev_priv(netdev); + nfp_tunnel_write_macs(netdev_repr->app); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = tun_dst; + nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst); + } + } + return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 3226ddc55f99..193520ef23f0 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -140,7 +140,7 @@ exit_rcu_unlock: void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb) { - unsigned int msg_len = skb->len - NFP_FLOWER_CMSG_HLEN; + unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb); struct nfp_fl_stats_frame *stats_frame; unsigned char *msg; int i; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index a18b4d2b1d3e..cdbb5464b790 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -52,8 +52,26 @@ BIT(FLOW_DISSECTOR_KEY_PORTS) | \ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_VLAN) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_MPLS) | \ BIT(FLOW_DISSECTOR_KEY_IP)) +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \ + (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)) + +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \ + (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)) + static int nfp_flower_xmit_flow(struct net_device *netdev, struct nfp_fl_payload *nfp_flow, u8 mtype) @@ -77,7 +95,7 @@ nfp_flower_xmit_flow(struct net_device *netdev, nfp_flow->meta.mask_len >>= NFP_FL_LW_SIZ; nfp_flow->meta.act_len >>= NFP_FL_LW_SIZ; - skb = nfp_flower_cmsg_alloc(priv->app, tot_len, mtype); + skb = nfp_flower_cmsg_alloc(priv->app, tot_len, mtype, GFP_KERNEL); if (!skb) return -ENOMEM; @@ -117,7 +135,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, { struct flow_dissector_key_basic *mask_basic = NULL; struct flow_dissector_key_basic *key_basic = NULL; - struct flow_dissector_key_ip *mask_ip = NULL; u32 key_layer_two; u8 key_layer; int key_size; @@ -125,15 +142,58 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) return -EOPNOTSUPP; + /* If any tun dissector is used then the required set must be used. */ + if (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR && + (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) + != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) + return -EOPNOTSUPP; + + key_layer_two = 0; + key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC; + key_size = sizeof(struct nfp_flower_meta_one) + + sizeof(struct nfp_flower_in_port) + + sizeof(struct nfp_flower_mac_mpls); + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL; + struct flow_dissector_key_ports *mask_enc_ports = NULL; + struct flow_dissector_key_ports *enc_ports = NULL; struct flow_dissector_key_control *mask_enc_ctl = skb_flow_dissector_target(flow->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL, flow->mask); - /* We are expecting a tunnel. For now we ignore offloading. */ - if (mask_enc_ctl->addr_type) + struct flow_dissector_key_control *enc_ctl = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + flow->key); + if (mask_enc_ctl->addr_type != 0xffff || + enc_ctl->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) return -EOPNOTSUPP; + + /* These fields are already verified as used. */ + mask_ipv4 = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + flow->mask); + if (mask_ipv4->dst != cpu_to_be32(~0)) + return -EOPNOTSUPP; + + mask_enc_ports = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + flow->mask); + enc_ports = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + flow->key); + + if (mask_enc_ports->dst != cpu_to_be16(~0) || + enc_ports->dst != htons(NFP_FL_VXLAN_PORT)) + return -EOPNOTSUPP; + + key_layer |= NFP_FLOWER_LAYER_VXLAN; + key_size += sizeof(struct nfp_flower_vxlan); } if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) { @@ -146,34 +206,15 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, flow->key); } - if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) - mask_ip = skb_flow_dissector_target(flow->dissector, - FLOW_DISSECTOR_KEY_IP, - flow->mask); - - key_layer_two = 0; - key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC; - key_size = sizeof(struct nfp_flower_meta_one) + - sizeof(struct nfp_flower_in_port) + - sizeof(struct nfp_flower_mac_mpls); - if (mask_basic && mask_basic->n_proto) { /* Ethernet type is present in the key. */ switch (key_basic->n_proto) { case cpu_to_be16(ETH_P_IP): - if (mask_ip && mask_ip->tos) - return -EOPNOTSUPP; - if (mask_ip && mask_ip->ttl) - return -EOPNOTSUPP; key_layer |= NFP_FLOWER_LAYER_IPV4; key_size += sizeof(struct nfp_flower_ipv4); break; case cpu_to_be16(ETH_P_IPV6): - if (mask_ip && mask_ip->tos) - return -EOPNOTSUPP; - if (mask_ip && mask_ip->ttl) - return -EOPNOTSUPP; key_layer |= NFP_FLOWER_LAYER_IPV6; key_size += sizeof(struct nfp_flower_ipv6); break; @@ -184,11 +225,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, case cpu_to_be16(ETH_P_ARP): return -EOPNOTSUPP; - /* Currently we do not offload MPLS. */ - case cpu_to_be16(ETH_P_MPLS_UC): - case cpu_to_be16(ETH_P_MPLS_MC): - return -EOPNOTSUPP; - /* Will be included in layer 2. */ case cpu_to_be16(ETH_P_8021Q): break; @@ -252,6 +288,7 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) if (!flow_pay->action_data) goto err_free_mask; + flow_pay->nfp_tun_ipv4_addr = 0; flow_pay->meta.flags = 0; spin_lock_init(&flow_pay->lock); @@ -361,6 +398,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, if (err) goto err_free_flow; + if (nfp_flow->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr); + err = nfp_flower_xmit_flow(netdev, nfp_flow, NFP_FLOWER_CMSG_TYPE_FLOW_DEL); if (err) @@ -409,6 +449,10 @@ static int nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, struct tc_cls_flower_offload *flower) { + if (!eth_proto_is_802_3(flower->common.protocol) || + flower->common.chain_index) + return -EOPNOTSUPP; + switch (flower->command) { case TC_CLSFLOWER_REPLACE: return nfp_flower_add_offload(app, netdev, flower); @@ -421,16 +465,53 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, return -EOPNOTSUPP; } -int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, - enum tc_setup_type type, void *type_data) +static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) { - struct tc_cls_flower_offload *cls_flower = type_data; + struct nfp_repr *repr = cb_priv; - if (type != TC_SETUP_CLSFLOWER || - !is_classid_clsact_ingress(cls_flower->common.classid) || - !eth_proto_is_802_3(cls_flower->common.protocol) || - cls_flower->common.chain_index) + if (!tc_can_offload(repr->netdev)) return -EOPNOTSUPP; - return nfp_flower_repr_offload(app, netdev, cls_flower); + switch (type) { + case TC_SETUP_CLSFLOWER: + return nfp_flower_repr_offload(repr->app, repr->netdev, + type_data); + default: + return -EOPNOTSUPP; + } +} + +static int nfp_flower_setup_tc_block(struct net_device *netdev, + struct tc_block_offload *f) +{ + struct nfp_repr *repr = netdev_priv(netdev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, + nfp_flower_setup_tc_block_cb, + repr, repr); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, + nfp_flower_setup_tc_block_cb, + repr); + return 0; + default: + return -EOPNOTSUPP; + } +} + +int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev, + enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return nfp_flower_setup_tc_block(netdev, type_data); + default: + return -EOPNOTSUPP; + } } diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c new file mode 100644 index 000000000000..b03f22f29612 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -0,0 +1,804 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/inetdevice.h> +#include <net/netevent.h> +#include <linux/idr.h> +#include <net/dst_metadata.h> +#include <net/arp.h> + +#include "cmsg.h" +#include "main.h" +#include "../nfp_net_repr.h" +#include "../nfp_net.h" + +#define NFP_FL_MAX_ROUTES 32 + +/** + * struct nfp_tun_active_tuns - periodic message of active tunnels + * @seq: sequence number of the message + * @count: number of tunnels report in message + * @flags: options part of the request + * @ipv4: dest IPv4 address of active route + * @egress_port: port the encapsulated packet egressed + * @extra: reserved for future use + * @tun_info: tunnels that have sent traffic in reported period + */ +struct nfp_tun_active_tuns { + __be32 seq; + __be32 count; + __be32 flags; + struct route_ip_info { + __be32 ipv4; + __be32 egress_port; + __be32 extra[2]; + } tun_info[]; +}; + +/** + * struct nfp_tun_neigh - neighbour/route entry on the NFP + * @dst_ipv4: destination IPv4 address + * @src_ipv4: source IPv4 address + * @dst_addr: destination MAC address + * @src_addr: source MAC address + * @port_id: NFP port to output packet on - associated with source IPv4 + */ +struct nfp_tun_neigh { + __be32 dst_ipv4; + __be32 src_ipv4; + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; + __be32 port_id; +}; + +/** + * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup + * @ingress_port: ingress port of packet that signalled request + * @ipv4_addr: destination ipv4 address for route + * @reserved: reserved for future use + */ +struct nfp_tun_req_route_ipv4 { + __be32 ingress_port; + __be32 ipv4_addr; + __be32 reserved[2]; +}; + +/** + * struct nfp_ipv4_route_entry - routes that are offloaded to the NFP + * @ipv4_addr: destination of route + * @list: list pointer + */ +struct nfp_ipv4_route_entry { + __be32 ipv4_addr; + struct list_head list; +}; + +#define NFP_FL_IPV4_ADDRS_MAX 32 + +/** + * struct nfp_tun_ipv4_addr - set the IP address list on the NFP + * @count: number of IPs populated in the array + * @ipv4_addr: array of IPV4_ADDRS_MAX 32 bit IPv4 addresses + */ +struct nfp_tun_ipv4_addr { + __be32 count; + __be32 ipv4_addr[NFP_FL_IPV4_ADDRS_MAX]; +}; + +/** + * struct nfp_ipv4_addr_entry - cached IPv4 addresses + * @ipv4_addr: IP address + * @ref_count: number of rules currently using this IP + * @list: list pointer + */ +struct nfp_ipv4_addr_entry { + __be32 ipv4_addr; + int ref_count; + struct list_head list; +}; + +/** + * struct nfp_tun_mac_addr - configure MAC address of tunnel EP on NFP + * @reserved: reserved for future use + * @count: number of MAC addresses in the message + * @index: index of MAC address in the lookup table + * @addr: interface MAC address + * @addresses: series of MACs to offload + */ +struct nfp_tun_mac_addr { + __be16 reserved; + __be16 count; + struct index_mac_addr { + __be16 index; + u8 addr[ETH_ALEN]; + } addresses[]; +}; + +/** + * struct nfp_tun_mac_offload_entry - list of MACs to offload + * @index: index of MAC address for offloading + * @addr: interface MAC address + * @list: list pointer + */ +struct nfp_tun_mac_offload_entry { + __be16 index; + u8 addr[ETH_ALEN]; + struct list_head list; +}; + +#define NFP_MAX_MAC_INDEX 0xff + +/** + * struct nfp_tun_mac_non_nfp_idx - converts non NFP netdev ifindex to 8-bit id + * @ifindex: netdev ifindex of the device + * @index: index of netdevs mac on NFP + * @list: list pointer + */ +struct nfp_tun_mac_non_nfp_idx { + int ifindex; + u8 index; + struct list_head list; +}; + +void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_active_tuns *payload; + struct net_device *netdev; + int count, i, pay_len; + struct neighbour *n; + __be32 ipv4_addr; + u32 port; + + payload = nfp_flower_cmsg_get_data(skb); + count = be32_to_cpu(payload->count); + if (count > NFP_FL_MAX_ROUTES) { + nfp_flower_cmsg_warn(app, "Tunnel keep-alive request exceeds max routes.\n"); + return; + } + + pay_len = nfp_flower_cmsg_get_data_len(skb); + if (pay_len != sizeof(struct nfp_tun_active_tuns) + + sizeof(struct route_ip_info) * count) { + nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n"); + return; + } + + for (i = 0; i < count; i++) { + ipv4_addr = payload->tun_info[i].ipv4; + port = be32_to_cpu(payload->tun_info[i].egress_port); + netdev = nfp_app_repr_get(app, port); + if (!netdev) + continue; + + n = neigh_lookup(&arp_tbl, &ipv4_addr, netdev); + if (!n) + continue; + + /* Update the used timestamp of neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } +} + +static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev) +{ + if (!netdev->rtnl_link_ops) + return false; + if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch")) + return true; + if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan")) + return true; + + return false; +} + +static int +nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, + gfp_t flag) +{ + struct sk_buff *skb; + unsigned char *msg; + + skb = nfp_flower_cmsg_alloc(app, plen, mtype, flag); + if (!skb) + return -ENOMEM; + + msg = nfp_flower_cmsg_get_data(skb); + memcpy(msg, pdata, nfp_flower_cmsg_get_data_len(skb)); + + nfp_ctrl_tx(app->ctrl, skb); + return 0; +} + +static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_route_entry *entry; + struct list_head *ptr, *storage; + + spin_lock_bh(&priv->nfp_neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); + if (entry->ipv4_addr == ipv4_addr) { + spin_unlock_bh(&priv->nfp_neigh_off_lock); + return true; + } + } + spin_unlock_bh(&priv->nfp_neigh_off_lock); + return false; +} + +static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_route_entry *entry; + struct list_head *ptr, *storage; + + spin_lock_bh(&priv->nfp_neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); + if (entry->ipv4_addr == ipv4_addr) { + spin_unlock_bh(&priv->nfp_neigh_off_lock); + return; + } + } + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) { + spin_unlock_bh(&priv->nfp_neigh_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n"); + return; + } + + entry->ipv4_addr = ipv4_addr; + list_add_tail(&entry->list, &priv->nfp_neigh_off_list); + spin_unlock_bh(&priv->nfp_neigh_off_lock); +} + +static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_route_entry *entry; + struct list_head *ptr, *storage; + + spin_lock_bh(&priv->nfp_neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); + if (entry->ipv4_addr == ipv4_addr) { + list_del(&entry->list); + kfree(entry); + break; + } + } + spin_unlock_bh(&priv->nfp_neigh_off_lock); +} + +static void +nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, + struct flowi4 *flow, struct neighbour *neigh, gfp_t flag) +{ + struct nfp_tun_neigh payload; + + /* Only offload representor IPv4s for now. */ + if (!nfp_netdev_is_nfp_repr(netdev)) + return; + + memset(&payload, 0, sizeof(struct nfp_tun_neigh)); + payload.dst_ipv4 = flow->daddr; + + /* If entry has expired send dst IP with all other fields 0. */ + if (!(neigh->nud_state & NUD_VALID)) { + nfp_tun_del_route_from_cache(app, payload.dst_ipv4); + /* Trigger ARP to verify invalid neighbour state. */ + neigh_event_send(neigh, NULL); + goto send_msg; + } + + /* Have a valid neighbour so populate rest of entry. */ + payload.src_ipv4 = flow->saddr; + ether_addr_copy(payload.src_addr, netdev->dev_addr); + neigh_ha_snapshot(payload.dst_addr, neigh, netdev); + payload.port_id = cpu_to_be32(nfp_repr_get_port_id(netdev)); + /* Add destination of new route to NFP cache. */ + nfp_tun_add_route_to_cache(app, payload.dst_ipv4); + +send_msg: + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, + sizeof(struct nfp_tun_neigh), + (unsigned char *)&payload, flag); +} + +static int +nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct nfp_flower_priv *app_priv; + struct netevent_redirect *redir; + struct flowi4 flow = {}; + struct neighbour *n; + struct nfp_app *app; + struct rtable *rt; + int err; + + switch (event) { + case NETEVENT_REDIRECT: + redir = (struct netevent_redirect *)ptr; + n = redir->neigh; + break; + case NETEVENT_NEIGH_UPDATE: + n = (struct neighbour *)ptr; + break; + default: + return NOTIFY_DONE; + } + + flow.daddr = *(__be32 *)n->primary_key; + + /* Only concerned with route changes for representors. */ + if (!nfp_netdev_is_nfp_repr(n->dev)) + return NOTIFY_DONE; + + app_priv = container_of(nb, struct nfp_flower_priv, nfp_tun_neigh_nb); + app = app_priv->app; + + /* Only concerned with changes to routes already added to NFP. */ + if (!nfp_tun_has_route(app, flow.daddr)) + return NOTIFY_DONE; + +#if IS_ENABLED(CONFIG_INET) + /* Do a route lookup to populate flow data. */ + rt = ip_route_output_key(dev_net(n->dev), &flow); + err = PTR_ERR_OR_ZERO(rt); + if (err) + return NOTIFY_DONE; +#else + return NOTIFY_DONE; +#endif + + flow.flowi4_proto = IPPROTO_UDP; + nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC); + + return NOTIFY_OK; +} + +void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_req_route_ipv4 *payload; + struct net_device *netdev; + struct flowi4 flow = {}; + struct neighbour *n; + struct rtable *rt; + int err; + + payload = nfp_flower_cmsg_get_data(skb); + + netdev = nfp_app_repr_get(app, be32_to_cpu(payload->ingress_port)); + if (!netdev) + goto route_fail_warning; + + flow.daddr = payload->ipv4_addr; + flow.flowi4_proto = IPPROTO_UDP; + +#if IS_ENABLED(CONFIG_INET) + /* Do a route lookup on same namespace as ingress port. */ + rt = ip_route_output_key(dev_net(netdev), &flow); + err = PTR_ERR_OR_ZERO(rt); + if (err) + goto route_fail_warning; +#else + goto route_fail_warning; +#endif + + /* Get the neighbour entry for the lookup */ + n = dst_neigh_lookup(&rt->dst, &flow.daddr); + ip_rt_put(rt); + if (!n) + goto route_fail_warning; + nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_KERNEL); + neigh_release(n); + return; + +route_fail_warning: + nfp_flower_cmsg_warn(app, "Requested route not found.\n"); +} + +static void nfp_tun_write_ipv4_list(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct nfp_tun_ipv4_addr payload; + struct list_head *ptr, *storage; + int count; + + memset(&payload, 0, sizeof(struct nfp_tun_ipv4_addr)); + mutex_lock(&priv->nfp_ipv4_off_lock); + count = 0; + list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + if (count >= NFP_FL_IPV4_ADDRS_MAX) { + mutex_unlock(&priv->nfp_ipv4_off_lock); + nfp_flower_cmsg_warn(app, "IPv4 offload exceeds limit.\n"); + return; + } + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + payload.ipv4_addr[count++] = entry->ipv4_addr; + } + payload.count = cpu_to_be32(count); + mutex_unlock(&priv->nfp_ipv4_off_lock); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS, + sizeof(struct nfp_tun_ipv4_addr), + &payload, GFP_KERNEL); +} + +void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + if (entry->ipv4_addr == ipv4) { + entry->ref_count++; + mutex_unlock(&priv->nfp_ipv4_off_lock); + return; + } + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->nfp_ipv4_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n"); + return; + } + entry->ipv4_addr = ipv4; + entry->ref_count = 1; + list_add_tail(&entry->list, &priv->nfp_ipv4_off_list); + mutex_unlock(&priv->nfp_ipv4_off_lock); + + nfp_tun_write_ipv4_list(app); +} + +void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + if (entry->ipv4_addr == ipv4) { + entry->ref_count--; + if (!entry->ref_count) { + list_del(&entry->list); + kfree(entry); + } + break; + } + } + mutex_unlock(&priv->nfp_ipv4_off_lock); + + nfp_tun_write_ipv4_list(app); +} + +void nfp_tunnel_write_macs(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_offload_entry *entry; + struct nfp_tun_mac_addr *payload; + struct list_head *ptr, *storage; + int mac_count, err, pay_size; + + mutex_lock(&priv->nfp_mac_off_lock); + if (!priv->nfp_mac_off_count) { + mutex_unlock(&priv->nfp_mac_off_lock); + return; + } + + pay_size = sizeof(struct nfp_tun_mac_addr) + + sizeof(struct index_mac_addr) * priv->nfp_mac_off_count; + + payload = kzalloc(pay_size, GFP_KERNEL); + if (!payload) { + mutex_unlock(&priv->nfp_mac_off_lock); + return; + } + + payload->count = cpu_to_be16(priv->nfp_mac_off_count); + + mac_count = 0; + list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { + entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, + list); + payload->addresses[mac_count].index = entry->index; + ether_addr_copy(payload->addresses[mac_count].addr, + entry->addr); + mac_count++; + } + + err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC, + pay_size, payload, GFP_KERNEL); + + kfree(payload); + + if (err) { + mutex_unlock(&priv->nfp_mac_off_lock); + /* Write failed so retain list for future retry. */ + return; + } + + /* If list was successfully offloaded, flush it. */ + list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { + entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, + list); + list_del(&entry->list); + kfree(entry); + } + + priv->nfp_mac_off_count = 0; + mutex_unlock(&priv->nfp_mac_off_lock); +} + +static int nfp_tun_get_mac_idx(struct nfp_app *app, int ifindex) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_non_nfp_idx *entry; + struct list_head *ptr, *storage; + int idx; + + mutex_lock(&priv->nfp_mac_index_lock); + list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { + entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list); + if (entry->ifindex == ifindex) { + idx = entry->index; + mutex_unlock(&priv->nfp_mac_index_lock); + return idx; + } + } + + idx = ida_simple_get(&priv->nfp_mac_off_ids, 0, + NFP_MAX_MAC_INDEX, GFP_KERNEL); + if (idx < 0) { + mutex_unlock(&priv->nfp_mac_index_lock); + return idx; + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->nfp_mac_index_lock); + return -ENOMEM; + } + entry->ifindex = ifindex; + entry->index = idx; + list_add_tail(&entry->list, &priv->nfp_mac_index_list); + mutex_unlock(&priv->nfp_mac_index_lock); + + return idx; +} + +static void nfp_tun_del_mac_idx(struct nfp_app *app, int ifindex) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_non_nfp_idx *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_mac_index_lock); + list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { + entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list); + if (entry->ifindex == ifindex) { + ida_simple_remove(&priv->nfp_mac_off_ids, + entry->index); + list_del(&entry->list); + kfree(entry); + break; + } + } + mutex_unlock(&priv->nfp_mac_index_lock); +} + +static void nfp_tun_add_to_mac_offload_list(struct net_device *netdev, + struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_offload_entry *entry; + u16 nfp_mac_idx; + int port = 0; + + /* Check if MAC should be offloaded. */ + if (!is_valid_ether_addr(netdev->dev_addr)) + return; + + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_repr_get_port_id(netdev); + else if (!nfp_tun_is_netdev_to_offload(netdev)) + return; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + nfp_flower_cmsg_warn(app, "Mem fail when offloading MAC.\n"); + return; + } + + if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) == + NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT) { + nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT; + } else if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) == + NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT) { + port = FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC, port); + nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT; + } else { + /* Must assign our own unique 8-bit index. */ + int idx = nfp_tun_get_mac_idx(app, netdev->ifindex); + + if (idx < 0) { + nfp_flower_cmsg_warn(app, "Can't assign non-repr MAC index.\n"); + kfree(entry); + return; + } + nfp_mac_idx = idx << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT; + } + + entry->index = cpu_to_be16(nfp_mac_idx); + ether_addr_copy(entry->addr, netdev->dev_addr); + + mutex_lock(&priv->nfp_mac_off_lock); + priv->nfp_mac_off_count++; + list_add_tail(&entry->list, &priv->nfp_mac_off_list); + mutex_unlock(&priv->nfp_mac_off_lock); +} + +static int nfp_tun_mac_event_handler(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct nfp_flower_priv *app_priv; + struct net_device *netdev; + struct nfp_app *app; + + if (event == NETDEV_DOWN || event == NETDEV_UNREGISTER) { + app_priv = container_of(nb, struct nfp_flower_priv, + nfp_tun_mac_nb); + app = app_priv->app; + netdev = netdev_notifier_info_to_dev(ptr); + + /* If non-nfp netdev then free its offload index. */ + if (nfp_tun_is_netdev_to_offload(netdev)) + nfp_tun_del_mac_idx(app, netdev->ifindex); + } else if (event == NETDEV_UP || event == NETDEV_CHANGEADDR || + event == NETDEV_REGISTER) { + app_priv = container_of(nb, struct nfp_flower_priv, + nfp_tun_mac_nb); + app = app_priv->app; + netdev = netdev_notifier_info_to_dev(ptr); + + nfp_tun_add_to_mac_offload_list(netdev, app); + + /* Force a list write to keep NFP up to date. */ + nfp_tunnel_write_macs(app); + } + return NOTIFY_OK; +} + +int nfp_tunnel_config_start(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct net_device *netdev; + int err; + + /* Initialise priv data for MAC offloading. */ + priv->nfp_mac_off_count = 0; + mutex_init(&priv->nfp_mac_off_lock); + INIT_LIST_HEAD(&priv->nfp_mac_off_list); + priv->nfp_tun_mac_nb.notifier_call = nfp_tun_mac_event_handler; + mutex_init(&priv->nfp_mac_index_lock); + INIT_LIST_HEAD(&priv->nfp_mac_index_list); + ida_init(&priv->nfp_mac_off_ids); + + /* Initialise priv data for IPv4 offloading. */ + mutex_init(&priv->nfp_ipv4_off_lock); + INIT_LIST_HEAD(&priv->nfp_ipv4_off_list); + + /* Initialise priv data for neighbour offloading. */ + spin_lock_init(&priv->nfp_neigh_off_lock); + INIT_LIST_HEAD(&priv->nfp_neigh_off_list); + priv->nfp_tun_neigh_nb.notifier_call = nfp_tun_neigh_event_handler; + + err = register_netdevice_notifier(&priv->nfp_tun_mac_nb); + if (err) + goto err_free_mac_ida; + + err = register_netevent_notifier(&priv->nfp_tun_neigh_nb); + if (err) + goto err_unreg_mac_nb; + + /* Parse netdevs already registered for MACs that need offloaded. */ + rtnl_lock(); + for_each_netdev(&init_net, netdev) + nfp_tun_add_to_mac_offload_list(netdev, app); + rtnl_unlock(); + + return 0; + +err_unreg_mac_nb: + unregister_netdevice_notifier(&priv->nfp_tun_mac_nb); +err_free_mac_ida: + ida_destroy(&priv->nfp_mac_off_ids); + return err; +} + +void nfp_tunnel_config_stop(struct nfp_app *app) +{ + struct nfp_tun_mac_offload_entry *mac_entry; + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_route_entry *route_entry; + struct nfp_tun_mac_non_nfp_idx *mac_idx; + struct nfp_ipv4_addr_entry *ip_entry; + struct list_head *ptr, *storage; + + unregister_netdevice_notifier(&priv->nfp_tun_mac_nb); + unregister_netevent_notifier(&priv->nfp_tun_neigh_nb); + + /* Free any memory that may be occupied by MAC list. */ + list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { + mac_entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, + list); + list_del(&mac_entry->list); + kfree(mac_entry); + } + + /* Free any memory that may be occupied by MAC index list. */ + list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { + mac_idx = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, + list); + list_del(&mac_idx->list); + kfree(mac_idx); + } + + ida_destroy(&priv->nfp_mac_off_ids); + + /* Free any memory that may be occupied by ipv4 list. */ + list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + list_del(&ip_entry->list); + kfree(ip_entry); + } + + /* Free any memory that may be occupied by the route list. */ + list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + route_entry = list_entry(ptr, struct nfp_ipv4_route_entry, + list); + list_del(&route_entry->list); + kfree(route_entry); + } +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 82c290763529..955a9f44d244 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include <linux/bug.h> #include <linux/skbuff.h> #include <linux/slab.h> @@ -42,10 +43,14 @@ #include "nfp_net_repr.h" static const struct nfp_app_type *apps[] = { - &app_nic, - &app_bpf, + [NFP_APP_CORE_NIC] = &app_nic, +#ifdef CONFIG_BPF_SYSCALL + [NFP_APP_BPF_NIC] = &app_bpf, +#else + [NFP_APP_BPF_NIC] = &app_nic, +#endif #ifdef CONFIG_NFP_APP_FLOWER - &app_flower, + [NFP_APP_FLOWER_NIC] = &app_flower, #endif }; @@ -101,31 +106,21 @@ nfp_app_reprs_set(struct nfp_app *app, enum nfp_repr_type type, old = rcu_dereference_protected(app->reprs[type], lockdep_is_held(&app->pf->lock)); - if (reprs && old) { - old = ERR_PTR(-EBUSY); - goto exit_unlock; - } - rcu_assign_pointer(app->reprs[type], reprs); -exit_unlock: return old; } struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id) { struct nfp_app *app; - unsigned int i; - for (i = 0; i < ARRAY_SIZE(apps); i++) - if (apps[i]->id == id) - break; - if (i == ARRAY_SIZE(apps)) { + if (id >= ARRAY_SIZE(apps) || !apps[id]) { nfp_err(pf->cpp, "failed to find app with ID 0x%02hhx\n", id); return ERR_PTR(-EINVAL); } - if (WARN_ON(!apps[i]->name || !apps[i]->vnic_alloc)) + if (WARN_ON(!apps[id]->name || !apps[id]->vnic_alloc)) return ERR_PTR(-EINVAL); app = kzalloc(sizeof(*app), GFP_KERNEL); @@ -135,7 +130,7 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id) app->pf = pf; app->cpp = pf->cpp; app->pdev = pf->pdev; - app->type = apps[i]; + app->type = apps[id]; return app; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index af640b5c2108..54b67c9b8d5b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -36,10 +36,13 @@ #include <net/devlink.h> +#include <trace/events/devlink.h> + #include "nfp_net_repr.h" struct bpf_prog; struct net_device; +struct netdev_bpf; struct pci_dev; struct sk_buff; struct sk_buff; @@ -81,6 +84,9 @@ extern const struct nfp_app_type app_flower; * @setup_tc: setup TC ndo * @tc_busy: TC HW offload busy (rules loaded) * @xdp_offload: offload an XDP program + * @bpf_verifier_prep: verifier prep for dev-specific BPF programs + * @bpf_translate: translate call for dev-specific BPF programs + * @bpf_destroy: destroy for dev-specific BPF programs * @eswitch_mode_get: get SR-IOV eswitch mode * @sriov_enable: app-specific sriov initialisation * @sriov_disable: app-specific sriov clean-up @@ -116,6 +122,12 @@ struct nfp_app_type { bool (*tc_busy)(struct nfp_app *app, struct nfp_net *nn); int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog); + int (*bpf_verifier_prep)(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf); + int (*bpf_translate)(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog); + int (*bpf_destroy)(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog); int (*sriov_enable)(struct nfp_app *app, int num_vfs); void (*sriov_disable)(struct nfp_app *app); @@ -269,13 +281,46 @@ static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn, return app->type->xdp_offload(app, nn, prog); } +static inline int +nfp_app_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) +{ + if (!app || !app->type->bpf_verifier_prep) + return -EOPNOTSUPP; + return app->type->bpf_verifier_prep(app, nn, bpf); +} + +static inline int +nfp_app_bpf_translate(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog) +{ + if (!app || !app->type->bpf_translate) + return -EOPNOTSUPP; + return app->type->bpf_translate(app, nn, prog); +} + +static inline int +nfp_app_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, + struct bpf_prog *prog) +{ + if (!app || !app->type->bpf_destroy) + return -EOPNOTSUPP; + return app->type->bpf_destroy(app, nn, prog); +} + static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb) { + trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0, + skb->data, skb->len); + return nfp_ctrl_tx(app->ctrl, skb); } static inline void nfp_app_ctrl_rx(struct nfp_app *app, struct sk_buff *skb) { + trace_devlink_hwmsg(priv_to_devlink(app->pf), true, 0, + skb->data, skb->len); + app->type->ctrl_msg_rx(app, skb); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c new file mode 100644 index 000000000000..830f6de25f47 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -0,0 +1,257 @@ +/* + * Copyright (C) 2016-2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/bitops.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/types.h> + +#include "nfp_asm.h" + +const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { + [CMD_TGT_WRITE8_SWAP] = { 0x02, 0x42 }, + [CMD_TGT_READ8] = { 0x01, 0x43 }, + [CMD_TGT_READ32] = { 0x00, 0x5c }, + [CMD_TGT_READ32_LE] = { 0x01, 0x5c }, + [CMD_TGT_READ32_SWAP] = { 0x02, 0x5c }, + [CMD_TGT_READ_LE] = { 0x01, 0x40 }, + [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, +}; + +static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst) +{ + bool lm_id, lm_dec = false; + u16 val = swreg_value(reg); + + switch (swreg_type(reg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_NNR: + return UR_REG_NN | val; + case NN_REG_XFER: + return UR_REG_XFR | val; + case NN_REG_LMEM: + lm_id = swreg_lm_idx(reg); + + switch (swreg_lm_mode(reg)) { + case NN_LM_MOD_NONE: + if (val & ~UR_REG_LM_IDX_MAX) { + pr_err("LM offset too large\n"); + return 0; + } + return UR_REG_LM | FIELD_PREP(UR_REG_LM_IDX, lm_id) | + val; + case NN_LM_MOD_DEC: + lm_dec = true; + /* fall through */ + case NN_LM_MOD_INC: + if (val) { + pr_err("LM offset in inc/dev mode\n"); + return 0; + } + return UR_REG_LM | UR_REG_LM_POST_MOD | + FIELD_PREP(UR_REG_LM_IDX, lm_id) | + FIELD_PREP(UR_REG_LM_POST_MOD_DEC, lm_dec); + default: + pr_err("bad LM mode for unrestricted operands %d\n", + swreg_lm_mode(reg)); + return 0; + } + case NN_REG_IMM: + if (val & ~0xff) { + pr_err("immediate too large\n"); + return 0; + } + return UR_REG_IMM_encode(val); + case NN_REG_NONE: + return is_dst ? UR_REG_NO_DST : REG_NONE; + } + + pr_err("unrecognized reg encoding %08x\n", reg); + return 0; +} + +int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_ur_regs *reg) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (swreg_type(dst) == NN_REG_IMM) + return -EFAULT; + + if (swreg_type(dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (swreg_type(dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_unreg(dst, true); + + /* Decode source operands */ + if (swreg_type(lreg) == swreg_type(rreg)) + return -EFAULT; + + if (swreg_type(lreg) == NN_REG_GPR_B || + swreg_type(rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_unreg(rreg, false); + reg->breg = nfp_swreg_to_unreg(lreg, false); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_unreg(lreg, false); + reg->breg = nfp_swreg_to_unreg(rreg, false); + } + + reg->dst_lmextn = swreg_lmextn(dst); + reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg); + + return 0; +} + +static u16 nfp_swreg_to_rereg(swreg reg, bool is_dst, bool has_imm8, bool *i8) +{ + u16 val = swreg_value(reg); + bool lm_id; + + switch (swreg_type(reg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_XFER: + return RE_REG_XFR | val; + case NN_REG_LMEM: + lm_id = swreg_lm_idx(reg); + + if (swreg_lm_mode(reg) != NN_LM_MOD_NONE) { + pr_err("bad LM mode for restricted operands %d\n", + swreg_lm_mode(reg)); + return 0; + } + + if (val & ~RE_REG_LM_IDX_MAX) { + pr_err("LM offset too large\n"); + return 0; + } + + return RE_REG_LM | FIELD_PREP(RE_REG_LM_IDX, lm_id) | val; + case NN_REG_IMM: + if (val & ~(0x7f | has_imm8 << 7)) { + pr_err("immediate too large\n"); + return 0; + } + *i8 = val & 0x80; + return RE_REG_IMM_encode(val & 0x7f); + case NN_REG_NONE: + return is_dst ? RE_REG_NO_DST : REG_NONE; + case NN_REG_NNR: + pr_err("NNRs used with restricted encoding\n"); + return 0; + } + + pr_err("unrecognized reg encoding\n"); + return 0; +} + +int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_re_regs *reg, bool has_imm8) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (swreg_type(dst) == NN_REG_IMM) + return -EFAULT; + + if (swreg_type(dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (swreg_type(dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); + + /* Decode source operands */ + if (swreg_type(lreg) == swreg_type(rreg)) + return -EFAULT; + + if (swreg_type(lreg) == NN_REG_GPR_B || + swreg_type(rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + } + + reg->dst_lmextn = swreg_lmextn(dst); + reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg); + + return 0; +} + +#define NFP_USTORE_ECC_POLY_WORDS 7 +#define NFP_USTORE_OP_BITS 45 + +static const u64 nfp_ustore_ecc_polynomials[NFP_USTORE_ECC_POLY_WORDS] = { + 0x0ff800007fffULL, + 0x11f801ff801fULL, + 0x1e387e0781e1ULL, + 0x17cb8e388e22ULL, + 0x1af5b2c93244ULL, + 0x1f56d5525488ULL, + 0x0daf69a46910ULL, +}; + +static bool parity(u64 value) +{ + return hweight64(value) & 1; +} + +int nfp_ustore_check_valid_no_ecc(u64 insn) +{ + if (insn & ~GENMASK_ULL(NFP_USTORE_OP_BITS, 0)) + return -EINVAL; + + return 0; +} + +u64 nfp_ustore_calc_ecc_insn(u64 insn) +{ + u8 ecc = 0; + int i; + + for (i = 0; i < NFP_USTORE_ECC_POLY_WORDS; i++) + ecc |= parity(nfp_ustore_ecc_polynomials[i] & insn) << i; + + return insn | (u64)ecc << NFP_USTORE_OP_BITS; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index d2b535739d2b..74d0c11ab2f9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -34,6 +34,8 @@ #ifndef __NFP_ASM_H__ #define __NFP_ASM_H__ 1 +#include <linux/bitfield.h> +#include <linux/bug.h> #include <linux/types.h> #define REG_NONE 0 @@ -43,23 +45,31 @@ #define RE_REG_IMM_encode(x) \ (RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1)) #define RE_REG_IMM_MAX 0x07fULL +#define RE_REG_LM 0x050 +#define RE_REG_LM_IDX 0x008 +#define RE_REG_LM_IDX_MAX 0x7 #define RE_REG_XFR 0x080 #define UR_REG_XFR 0x180 +#define UR_REG_LM 0x200 +#define UR_REG_LM_IDX 0x020 +#define UR_REG_LM_POST_MOD 0x010 +#define UR_REG_LM_POST_MOD_DEC 0x001 +#define UR_REG_LM_IDX_MAX 0xf #define UR_REG_NN 0x280 #define UR_REG_NO_DST 0x300 #define UR_REG_IMM UR_REG_NO_DST #define UR_REG_IMM_encode(x) (UR_REG_IMM | (x)) #define UR_REG_IMM_MAX 0x0ffULL -#define OP_BR_BASE 0x0d800000020ULL -#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL -#define OP_BR_MASK 0x0000000001fULL -#define OP_BR_EV_PIP 0x00000000300ULL -#define OP_BR_CSS 0x0000003c000ULL -#define OP_BR_DEFBR 0x00000300000ULL -#define OP_BR_ADDR_LO 0x007ffc00000ULL -#define OP_BR_ADDR_HI 0x10000000000ULL +#define OP_BR_BASE 0x0d800000020ULL +#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL +#define OP_BR_MASK 0x0000000001fULL +#define OP_BR_EV_PIP 0x00000000300ULL +#define OP_BR_CSS 0x0000003c000ULL +#define OP_BR_DEFBR 0x00000300000ULL +#define OP_BR_ADDR_LO 0x007ffc00000ULL +#define OP_BR_ADDR_HI 0x10000000000ULL #define nfp_is_br(_insn) \ (((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE) @@ -82,30 +92,33 @@ enum br_ctx_signal_state { BR_CSS_NONE = 2, }; -#define OP_BBYTE_BASE 0x0c800000000ULL -#define OP_BB_A_SRC 0x000000000ffULL -#define OP_BB_BYTE 0x00000000300ULL -#define OP_BB_B_SRC 0x0000003fc00ULL -#define OP_BB_I8 0x00000040000ULL -#define OP_BB_EQ 0x00000080000ULL -#define OP_BB_DEFBR 0x00000300000ULL -#define OP_BB_ADDR_LO 0x007ffc00000ULL -#define OP_BB_ADDR_HI 0x10000000000ULL - -#define OP_BALU_BASE 0x0e800000000ULL -#define OP_BA_A_SRC 0x000000003ffULL -#define OP_BA_B_SRC 0x000000ffc00ULL -#define OP_BA_DEFBR 0x00000300000ULL -#define OP_BA_ADDR_HI 0x0007fc00000ULL - -#define OP_IMMED_A_SRC 0x000000003ffULL -#define OP_IMMED_B_SRC 0x000000ffc00ULL -#define OP_IMMED_IMM 0x0000ff00000ULL -#define OP_IMMED_WIDTH 0x00060000000ULL -#define OP_IMMED_INV 0x00080000000ULL -#define OP_IMMED_SHIFT 0x00600000000ULL -#define OP_IMMED_BASE 0x0f000000000ULL -#define OP_IMMED_WR_AB 0x20000000000ULL +#define OP_BBYTE_BASE 0x0c800000000ULL +#define OP_BB_A_SRC 0x000000000ffULL +#define OP_BB_BYTE 0x00000000300ULL +#define OP_BB_B_SRC 0x0000003fc00ULL +#define OP_BB_I8 0x00000040000ULL +#define OP_BB_EQ 0x00000080000ULL +#define OP_BB_DEFBR 0x00000300000ULL +#define OP_BB_ADDR_LO 0x007ffc00000ULL +#define OP_BB_ADDR_HI 0x10000000000ULL +#define OP_BB_SRC_LMEXTN 0x40000000000ULL + +#define OP_BALU_BASE 0x0e800000000ULL +#define OP_BA_A_SRC 0x000000003ffULL +#define OP_BA_B_SRC 0x000000ffc00ULL +#define OP_BA_DEFBR 0x00000300000ULL +#define OP_BA_ADDR_HI 0x0007fc00000ULL + +#define OP_IMMED_A_SRC 0x000000003ffULL +#define OP_IMMED_B_SRC 0x000000ffc00ULL +#define OP_IMMED_IMM 0x0000ff00000ULL +#define OP_IMMED_WIDTH 0x00060000000ULL +#define OP_IMMED_INV 0x00080000000ULL +#define OP_IMMED_SHIFT 0x00600000000ULL +#define OP_IMMED_BASE 0x0f000000000ULL +#define OP_IMMED_WR_AB 0x20000000000ULL +#define OP_IMMED_SRC_LMEXTN 0x40000000000ULL +#define OP_IMMED_DST_LMEXTN 0x80000000000ULL enum immed_width { IMMED_WIDTH_ALL = 0, @@ -119,17 +132,19 @@ enum immed_shift { IMMED_SHIFT_2B = 2, }; -#define OP_SHF_BASE 0x08000000000ULL -#define OP_SHF_A_SRC 0x000000000ffULL -#define OP_SHF_SC 0x00000000300ULL -#define OP_SHF_B_SRC 0x0000003fc00ULL -#define OP_SHF_I8 0x00000040000ULL -#define OP_SHF_SW 0x00000080000ULL -#define OP_SHF_DST 0x0000ff00000ULL -#define OP_SHF_SHIFT 0x001f0000000ULL -#define OP_SHF_OP 0x00e00000000ULL -#define OP_SHF_DST_AB 0x01000000000ULL -#define OP_SHF_WR_AB 0x20000000000ULL +#define OP_SHF_BASE 0x08000000000ULL +#define OP_SHF_A_SRC 0x000000000ffULL +#define OP_SHF_SC 0x00000000300ULL +#define OP_SHF_B_SRC 0x0000003fc00ULL +#define OP_SHF_I8 0x00000040000ULL +#define OP_SHF_SW 0x00000080000ULL +#define OP_SHF_DST 0x0000ff00000ULL +#define OP_SHF_SHIFT 0x001f0000000ULL +#define OP_SHF_OP 0x00e00000000ULL +#define OP_SHF_DST_AB 0x01000000000ULL +#define OP_SHF_WR_AB 0x20000000000ULL +#define OP_SHF_SRC_LMEXTN 0x40000000000ULL +#define OP_SHF_DST_LMEXTN 0x80000000000ULL enum shf_op { SHF_OP_NONE = 0, @@ -139,24 +154,27 @@ enum shf_op { enum shf_sc { SHF_SC_R_ROT = 0, + SHF_SC_NONE = SHF_SC_R_ROT, SHF_SC_R_SHF = 1, SHF_SC_L_SHF = 2, SHF_SC_R_DSHF = 3, }; -#define OP_ALU_A_SRC 0x000000003ffULL -#define OP_ALU_B_SRC 0x000000ffc00ULL -#define OP_ALU_DST 0x0003ff00000ULL -#define OP_ALU_SW 0x00040000000ULL -#define OP_ALU_OP 0x00f80000000ULL -#define OP_ALU_DST_AB 0x01000000000ULL -#define OP_ALU_BASE 0x0a000000000ULL -#define OP_ALU_WR_AB 0x20000000000ULL +#define OP_ALU_A_SRC 0x000000003ffULL +#define OP_ALU_B_SRC 0x000000ffc00ULL +#define OP_ALU_DST 0x0003ff00000ULL +#define OP_ALU_SW 0x00040000000ULL +#define OP_ALU_OP 0x00f80000000ULL +#define OP_ALU_DST_AB 0x01000000000ULL +#define OP_ALU_BASE 0x0a000000000ULL +#define OP_ALU_WR_AB 0x20000000000ULL +#define OP_ALU_SRC_LMEXTN 0x40000000000ULL +#define OP_ALU_DST_LMEXTN 0x80000000000ULL enum alu_op { ALU_OP_NONE = 0x00, ALU_OP_ADD = 0x01, - ALU_OP_NEG = 0x04, + ALU_OP_NOT = 0x04, ALU_OP_AND = 0x08, ALU_OP_SUB_C = 0x0d, ALU_OP_ADD_C = 0x11, @@ -170,26 +188,28 @@ enum alu_dst_ab { ALU_DST_B = 1, }; -#define OP_LDF_BASE 0x0c000000000ULL -#define OP_LDF_A_SRC 0x000000000ffULL -#define OP_LDF_SC 0x00000000300ULL -#define OP_LDF_B_SRC 0x0000003fc00ULL -#define OP_LDF_I8 0x00000040000ULL -#define OP_LDF_SW 0x00000080000ULL -#define OP_LDF_ZF 0x00000100000ULL -#define OP_LDF_BMASK 0x0000f000000ULL -#define OP_LDF_SHF 0x001f0000000ULL -#define OP_LDF_WR_AB 0x20000000000ULL - -#define OP_CMD_A_SRC 0x000000000ffULL -#define OP_CMD_CTX 0x00000000300ULL -#define OP_CMD_B_SRC 0x0000003fc00ULL -#define OP_CMD_TOKEN 0x000000c0000ULL -#define OP_CMD_XFER 0x00001f00000ULL -#define OP_CMD_CNT 0x0000e000000ULL -#define OP_CMD_SIG 0x000f0000000ULL -#define OP_CMD_TGT_CMD 0x07f00000000ULL -#define OP_CMD_MODE 0x1c0000000000ULL +#define OP_LDF_BASE 0x0c000000000ULL +#define OP_LDF_A_SRC 0x000000000ffULL +#define OP_LDF_SC 0x00000000300ULL +#define OP_LDF_B_SRC 0x0000003fc00ULL +#define OP_LDF_I8 0x00000040000ULL +#define OP_LDF_SW 0x00000080000ULL +#define OP_LDF_ZF 0x00000100000ULL +#define OP_LDF_BMASK 0x0000f000000ULL +#define OP_LDF_SHF 0x001f0000000ULL +#define OP_LDF_WR_AB 0x20000000000ULL +#define OP_LDF_SRC_LMEXTN 0x40000000000ULL +#define OP_LDF_DST_LMEXTN 0x80000000000ULL + +#define OP_CMD_A_SRC 0x000000000ffULL +#define OP_CMD_CTX 0x00000000300ULL +#define OP_CMD_B_SRC 0x0000003fc00ULL +#define OP_CMD_TOKEN 0x000000c0000ULL +#define OP_CMD_XFER 0x00001f00000ULL +#define OP_CMD_CNT 0x0000e000000ULL +#define OP_CMD_SIG 0x000f0000000ULL +#define OP_CMD_TGT_CMD 0x07f00000000ULL +#define OP_CMD_MODE 0x1c0000000000ULL struct cmd_tgt_act { u8 token; @@ -198,12 +218,17 @@ struct cmd_tgt_act { enum cmd_tgt_map { CMD_TGT_READ8, - CMD_TGT_WRITE8, + CMD_TGT_WRITE8_SWAP, + CMD_TGT_READ32, + CMD_TGT_READ32_LE, + CMD_TGT_READ32_SWAP, CMD_TGT_READ_LE, CMD_TGT_READ_SWAP_LE, __CMD_TGT_MAP_SIZE, }; +extern const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE]; + enum cmd_mode { CMD_MODE_40b_AB = 0, CMD_MODE_40b_BA = 1, @@ -215,11 +240,13 @@ enum cmd_ctx_swap { CMD_CTX_NO_SWAP = 3, }; -#define OP_LCSR_BASE 0x0fc00000000ULL -#define OP_LCSR_A_SRC 0x000000003ffULL -#define OP_LCSR_B_SRC 0x000000ffc00ULL -#define OP_LCSR_WRITE 0x00000200000ULL -#define OP_LCSR_ADDR 0x001ffc00000ULL +#define OP_LCSR_BASE 0x0fc00000000ULL +#define OP_LCSR_A_SRC 0x000000003ffULL +#define OP_LCSR_B_SRC 0x000000ffc00ULL +#define OP_LCSR_WRITE 0x00000200000ULL +#define OP_LCSR_ADDR 0x001ffc00000ULL +#define OP_LCSR_SRC_LMEXTN 0x40000000000ULL +#define OP_LCSR_DST_LMEXTN 0x80000000000ULL enum lcsr_wr_src { LCSR_WR_AREG, @@ -227,7 +254,127 @@ enum lcsr_wr_src { LCSR_WR_IMM, }; -#define OP_CARB_BASE 0x0e000000000ULL -#define OP_CARB_OR 0x00000010000ULL +#define OP_CARB_BASE 0x0e000000000ULL +#define OP_CARB_OR 0x00000010000ULL + +#define NFP_CSR_ACT_LM_ADDR0 0x64 +#define NFP_CSR_ACT_LM_ADDR1 0x6c +#define NFP_CSR_ACT_LM_ADDR2 0x94 +#define NFP_CSR_ACT_LM_ADDR3 0x9c + +/* Software register representation, independent of operand type */ +#define NN_REG_TYPE GENMASK(31, 24) +#define NN_REG_LM_IDX GENMASK(23, 22) +#define NN_REG_LM_IDX_HI BIT(23) +#define NN_REG_LM_IDX_LO BIT(22) +#define NN_REG_LM_MOD GENMASK(21, 20) +#define NN_REG_VAL GENMASK(7, 0) + +enum nfp_bpf_reg_type { + NN_REG_GPR_A = BIT(0), + NN_REG_GPR_B = BIT(1), + NN_REG_GPR_BOTH = NN_REG_GPR_A | NN_REG_GPR_B, + NN_REG_NNR = BIT(2), + NN_REG_XFER = BIT(3), + NN_REG_IMM = BIT(4), + NN_REG_NONE = BIT(5), + NN_REG_LMEM = BIT(6), +}; + +enum nfp_bpf_lm_mode { + NN_LM_MOD_NONE = 0, + NN_LM_MOD_INC, + NN_LM_MOD_DEC, +}; + +#define reg_both(x) __enc_swreg((x), NN_REG_GPR_BOTH) +#define reg_a(x) __enc_swreg((x), NN_REG_GPR_A) +#define reg_b(x) __enc_swreg((x), NN_REG_GPR_B) +#define reg_nnr(x) __enc_swreg((x), NN_REG_NNR) +#define reg_xfer(x) __enc_swreg((x), NN_REG_XFER) +#define reg_imm(x) __enc_swreg((x), NN_REG_IMM) +#define reg_none() __enc_swreg(0, NN_REG_NONE) +#define reg_lm(x, off) __enc_swreg_lm((x), NN_LM_MOD_NONE, (off)) +#define reg_lm_inc(x) __enc_swreg_lm((x), NN_LM_MOD_INC, 0) +#define reg_lm_dec(x) __enc_swreg_lm((x), NN_LM_MOD_DEC, 0) +#define __reg_lm(x, mod, off) __enc_swreg_lm((x), (mod), (off)) + +typedef __u32 __bitwise swreg; + +static inline swreg __enc_swreg(u16 id, u8 type) +{ + return (__force swreg)(id | FIELD_PREP(NN_REG_TYPE, type)); +} + +static inline swreg __enc_swreg_lm(u8 id, enum nfp_bpf_lm_mode mode, u8 off) +{ + WARN_ON(id > 3 || (off && mode != NN_LM_MOD_NONE)); + + return (__force swreg)(FIELD_PREP(NN_REG_TYPE, NN_REG_LMEM) | + FIELD_PREP(NN_REG_LM_IDX, id) | + FIELD_PREP(NN_REG_LM_MOD, mode) | + off); +} + +static inline u32 swreg_raw(swreg reg) +{ + return (__force u32)reg; +} + +static inline enum nfp_bpf_reg_type swreg_type(swreg reg) +{ + return FIELD_GET(NN_REG_TYPE, swreg_raw(reg)); +} + +static inline u16 swreg_value(swreg reg) +{ + return FIELD_GET(NN_REG_VAL, swreg_raw(reg)); +} + +static inline bool swreg_lm_idx(swreg reg) +{ + return FIELD_GET(NN_REG_LM_IDX_LO, swreg_raw(reg)); +} + +static inline bool swreg_lmextn(swreg reg) +{ + return FIELD_GET(NN_REG_LM_IDX_HI, swreg_raw(reg)); +} + +static inline enum nfp_bpf_lm_mode swreg_lm_mode(swreg reg) +{ + return FIELD_GET(NN_REG_LM_MOD, swreg_raw(reg)); +} + +struct nfp_insn_ur_regs { + enum alu_dst_ab dst_ab; + u16 dst; + u16 areg, breg; + bool swap; + bool wr_both; + bool dst_lmextn; + bool src_lmextn; +}; + +struct nfp_insn_re_regs { + enum alu_dst_ab dst_ab; + u8 dst; + u8 areg, breg; + bool swap; + bool wr_both; + bool i8; + bool dst_lmextn; + bool src_lmextn; +}; + +int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_ur_regs *reg); +int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, + struct nfp_insn_re_regs *reg, bool has_imm8); + +#define NFP_USTORE_PREFETCH_WINDOW 8 + +int nfp_ustore_check_valid_no_ecc(u64 insn); +u64 nfp_ustore_calc_ecc_insn(u64 insn); #endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index f8fa63b66739..35eaccbece36 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -346,6 +346,32 @@ exit_release_fw: return err < 0 ? err : 1; } +static void +nfp_nsp_init_ports(struct pci_dev *pdev, struct nfp_pf *pf, + struct nfp_nsp *nsp) +{ + bool needs_reinit = false; + int i; + + pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); + if (!pf->eth_tbl) + return; + + if (!nfp_nsp_has_mac_reinit(nsp)) + return; + + for (i = 0; i < pf->eth_tbl->count; i++) + needs_reinit |= pf->eth_tbl->ports[i].override_changed; + if (!needs_reinit) + return; + + kfree(pf->eth_tbl); + if (nfp_nsp_mac_reinit(nsp)) + dev_warn(&pdev->dev, "MAC reinit failed\n"); + + pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); +} + static int nfp_nsp_init(struct pci_dev *pdev, struct nfp_pf *pf) { struct nfp_nsp *nsp; @@ -366,7 +392,7 @@ static int nfp_nsp_init(struct pci_dev *pdev, struct nfp_pf *pf) if (err < 0) goto exit_close_nsp; - pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); + nfp_nsp_init_ports(pdev, pf, nsp); pf->nspi = __nfp_nsp_identify(nsp); if (pf->nspi) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index d51d8237b984..7f9857c276b1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -394,6 +394,7 @@ struct nfp_net_rx_ring { * @tx_lso: Counter of LSO packets sent * @tx_errors: How many TX errors were encountered * @tx_busy: How often was TX busy (no space)? + * @rx_replace_buf_alloc_fail: Counter of RX buffer allocation failures * @irq_vector: Interrupt vector number (use for talking to the OS) * @handler: Interrupt handler for this ring vector * @name: Name of the interrupt vector @@ -437,6 +438,8 @@ struct nfp_net_r_vector { u64 hw_csum_tx_inner; u64 tx_gather; u64 tx_lso; + + u64 rx_replace_buf_alloc_fail; u64 tx_errors; u64 tx_busy; @@ -473,7 +476,6 @@ struct nfp_stat_pair { * @dev: Backpointer to struct device * @netdev: Backpointer to net_device structure * @is_vf: Is the driver attached to a VF? - * @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf * @bpf_offload_xdp: Offloaded BPF program is XDP * @chained_metadata_format: Firemware will use new metadata format * @rx_dma_dir: Mapping direction for RX buffers @@ -499,7 +501,6 @@ struct nfp_net_dp { struct net_device *netdev; u8 is_vf:1; - u8 bpf_offload_skip_sw:1; u8 bpf_offload_xdp:1; u8 chained_metadata_format:1; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index e118b5f23996..1a603fdd9e80 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -177,9 +177,9 @@ static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline) return timed_out ? -EIO : 0; } -static void nfp_net_reconfig_timer(unsigned long data) +static void nfp_net_reconfig_timer(struct timer_list *t) { - struct nfp_net *nn = (void *)data; + struct nfp_net *nn = from_timer(nn, t, reconfig_timer); spin_lock_bh(&nn->reconfig_lock); @@ -1185,7 +1185,7 @@ static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) } else { struct page *page; - page = alloc_page(GFP_KERNEL | __GFP_COLD); + page = alloc_page(GFP_KERNEL); frag = page ? page_address(page) : NULL; } if (!frag) { @@ -1209,15 +1209,15 @@ static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) if (!dp->xdp_prog) { frag = napi_alloc_frag(dp->fl_bufsz); + if (unlikely(!frag)) + return NULL; } else { struct page *page; - page = alloc_page(GFP_ATOMIC | __GFP_COLD); - frag = page ? page_address(page) : NULL; - } - if (!frag) { - nn_dp_warn(dp, "Failed to alloc receive page frag\n"); - return NULL; + page = dev_alloc_page(); + if (unlikely(!page)) + return NULL; + frag = page_address(page); } *dma_addr = nfp_net_dma_map_rx(dp, frag); @@ -1514,6 +1514,11 @@ nfp_net_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, { u64_stats_update_begin(&r_vec->rx_sync); r_vec->rx_drops++; + /* If we have both skb and rxbuf the replacement buffer allocation + * must have failed, count this as an alloc failure. + */ + if (skb && rxbuf) + r_vec->rx_replace_buf_alloc_fail++; u64_stats_update_end(&r_vec->rx_sync); /* skb is build based on the frag, free_skb() would free the frag @@ -1582,26 +1587,6 @@ nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, return true; } -static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, void *hard_start, - unsigned int *off, unsigned int *len) -{ - struct xdp_buff xdp; - void *orig_data; - int ret; - - xdp.data_hard_start = hard_start; - xdp.data = data + *off; - xdp.data_end = data + *off + *len; - - orig_data = xdp.data; - ret = bpf_prog_run_xdp(prog, &xdp); - - *len -= xdp.data - orig_data; - *off += xdp.data - orig_data; - - return ret; -} - /** * nfp_net_rx() - receive up to @budget packets on @rx_ring * @rx_ring: RX ring to receive from @@ -1637,6 +1622,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) struct nfp_meta_parsed meta; struct net_device *netdev; dma_addr_t new_dma_addr; + u32 meta_len_xdp = 0; void *new_frag; idx = D_IDX(rx_ring, rx_ring->rd_p); @@ -1715,16 +1701,24 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF && dp->bpf_offload_xdp) && !meta.portid) { + void *orig_data = rxbuf->frag + pkt_off; unsigned int dma_off; - void *hard_start; + struct xdp_buff xdp; int act; - hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM; + xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM; + xdp.data = orig_data; + xdp.data_meta = orig_data; + xdp.data_end = orig_data + pkt_len; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + pkt_len -= xdp.data - orig_data; + pkt_off += xdp.data - orig_data; - act = nfp_net_run_xdp(xdp_prog, rxbuf->frag, hard_start, - &pkt_off, &pkt_len); switch (act) { case XDP_PASS: + meta_len_xdp = xdp.data - xdp.data_meta; break; case XDP_TX: dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; @@ -1792,6 +1786,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(rxd->rxd.vlan)); + if (meta_len_xdp) + skb_metadata_set(skb, meta_len_xdp); napi_gro_receive(&rx_ring->r_vec->napi, skb); } @@ -3382,7 +3378,7 @@ nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog, u32 flags, return 0; } -static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp) +static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) { struct nfp_net *nn = netdev_priv(netdev); @@ -3397,6 +3393,14 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp) xdp->prog_attached = XDP_ATTACHED_HW; xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0; return 0; + case BPF_OFFLOAD_VERIFIER_PREP: + return nfp_app_bpf_verifier_prep(nn->app, nn, xdp); + case BPF_OFFLOAD_TRANSLATE: + return nfp_app_bpf_translate(nn->app, nn, + xdp->offload.prog); + case BPF_OFFLOAD_DESTROY: + return nfp_app_bpf_destroy(nn->app, nn, + xdp->offload.prog); default: return -EINVAL; } @@ -3445,7 +3449,7 @@ const struct net_device_ops nfp_net_netdev_ops = { .ndo_get_phys_port_name = nfp_port_get_phys_port_name, .ndo_udp_tunnel_add = nfp_net_add_vxlan_port, .ndo_udp_tunnel_del = nfp_net_del_vxlan_port, - .ndo_xdp = nfp_net_xdp, + .ndo_bpf = nfp_net_xdp, }; /** @@ -3546,8 +3550,7 @@ struct nfp_net *nfp_net_alloc(struct pci_dev *pdev, bool needs_netdev, spin_lock_init(&nn->reconfig_lock); spin_lock_init(&nn->link_status_lock); - setup_timer(&nn->reconfig_timer, - nfp_net_reconfig_timer, (unsigned long)nn); + timer_setup(&nn->reconfig_timer, nfp_net_reconfig_timer, 0); return nn; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index b0a452ba9039..782d452e0fc2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -255,7 +255,7 @@ * @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code */ #define NFP_NET_CFG_BPF_ABI 0x0080 -#define NFP_NET_BPF_ABI 1 +#define NFP_NET_BPF_ABI 2 #define NFP_NET_CFG_BPF_CAP 0x0081 #define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */ #define NFP_NET_CFG_BPF_MAX_LEN 0x0082 diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index dc016dfec64d..60c8d733a37d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -181,7 +181,8 @@ static const struct nfp_et_stat nfp_mac_et_stats[] = { #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) #define NN_ET_SWITCH_STATS_LEN 9 -#define NN_ET_RVEC_GATHER_STATS 7 +#define NN_RVEC_GATHER_STATS 8 +#define NN_RVEC_PER_Q_STATS 3 static void nfp_net_get_nspinfo(struct nfp_app *app, char *version) { @@ -243,6 +244,30 @@ nfp_app_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) nfp_get_drvinfo(app, app->pdev, "*", drvinfo); } +static void +nfp_net_set_fec_link_mode(struct nfp_eth_table_port *eth_port, + struct ethtool_link_ksettings *c) +{ + unsigned int modes; + + ethtool_link_ksettings_add_link_mode(c, supported, FEC_NONE); + if (!nfp_eth_can_support_fec(eth_port)) { + ethtool_link_ksettings_add_link_mode(c, advertising, FEC_NONE); + return; + } + + modes = nfp_eth_supported_fec_modes(eth_port); + if (modes & NFP_FEC_BASER) { + ethtool_link_ksettings_add_link_mode(c, supported, FEC_BASER); + ethtool_link_ksettings_add_link_mode(c, advertising, FEC_BASER); + } + + if (modes & NFP_FEC_REED_SOLOMON) { + ethtool_link_ksettings_add_link_mode(c, supported, FEC_RS); + ethtool_link_ksettings_add_link_mode(c, advertising, FEC_RS); + } +} + /** * nfp_net_get_link_ksettings - Get Link Speed settings * @netdev: network interface device structure @@ -277,9 +302,11 @@ nfp_net_get_link_ksettings(struct net_device *netdev, port = nfp_port_from_netdev(netdev); eth_port = nfp_port_get_eth_port(port); - if (eth_port) + if (eth_port) { cmd->base.autoneg = eth_port->aneg != NFP_ANEG_DISABLED ? AUTONEG_ENABLE : AUTONEG_DISABLE; + nfp_net_set_fec_link_mode(eth_port, cmd); + } if (!netif_carrier_ok(netdev)) return 0; @@ -327,7 +354,7 @@ nfp_net_set_link_ksettings(struct net_device *netdev, return -EOPNOTSUPP; if (netif_running(netdev)) { - netdev_warn(netdev, "Changing settings not allowed on an active interface. It may cause the port to be disabled until reboot.\n"); + netdev_warn(netdev, "Changing settings not allowed on an active interface. It may cause the port to be disabled until driver reload.\n"); return -EBUSY; } @@ -427,7 +454,7 @@ static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); - return NN_ET_RVEC_GATHER_STATS + nn->dp.num_r_vecs * 3; + return NN_RVEC_GATHER_STATS + nn->dp.num_r_vecs * NN_RVEC_PER_Q_STATS; } static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) @@ -444,6 +471,7 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) data = nfp_pr_et(data, "hw_rx_csum_ok"); data = nfp_pr_et(data, "hw_rx_csum_inner_ok"); data = nfp_pr_et(data, "hw_rx_csum_err"); + data = nfp_pr_et(data, "rx_replace_buf_alloc_fail"); data = nfp_pr_et(data, "hw_tx_csum"); data = nfp_pr_et(data, "hw_tx_inner_csum"); data = nfp_pr_et(data, "tx_gather"); @@ -454,9 +482,9 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) { - u64 gathered_stats[NN_ET_RVEC_GATHER_STATS] = {}; + u64 gathered_stats[NN_RVEC_GATHER_STATS] = {}; struct nfp_net *nn = netdev_priv(netdev); - u64 tmp[NN_ET_RVEC_GATHER_STATS]; + u64 tmp[NN_RVEC_GATHER_STATS]; unsigned int i, j; for (i = 0; i < nn->dp.num_r_vecs; i++) { @@ -468,25 +496,26 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) tmp[0] = nn->r_vecs[i].hw_csum_rx_ok; tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok; tmp[2] = nn->r_vecs[i].hw_csum_rx_error; + tmp[3] = nn->r_vecs[i].rx_replace_buf_alloc_fail; } while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start)); do { start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync); data[1] = nn->r_vecs[i].tx_pkts; data[2] = nn->r_vecs[i].tx_busy; - tmp[3] = nn->r_vecs[i].hw_csum_tx; - tmp[4] = nn->r_vecs[i].hw_csum_tx_inner; - tmp[5] = nn->r_vecs[i].tx_gather; - tmp[6] = nn->r_vecs[i].tx_lso; + tmp[4] = nn->r_vecs[i].hw_csum_tx; + tmp[5] = nn->r_vecs[i].hw_csum_tx_inner; + tmp[6] = nn->r_vecs[i].tx_gather; + tmp[7] = nn->r_vecs[i].tx_lso; } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); - data += 3; + data += NN_RVEC_PER_Q_STATS; - for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++) + for (j = 0; j < NN_RVEC_GATHER_STATS; j++) gathered_stats[j] += tmp[j]; } - for (j = 0; j < NN_ET_RVEC_GATHER_STATS; j++) + for (j = 0; j < NN_RVEC_GATHER_STATS; j++) *data++ = gathered_stats[j]; return data; @@ -683,6 +712,91 @@ static int nfp_port_get_sset_count(struct net_device *netdev, int sset) } } +static int nfp_port_fec_ethtool_to_nsp(u32 fec) +{ + switch (fec) { + case ETHTOOL_FEC_AUTO: + return NFP_FEC_AUTO_BIT; + case ETHTOOL_FEC_OFF: + return NFP_FEC_DISABLED_BIT; + case ETHTOOL_FEC_RS: + return NFP_FEC_REED_SOLOMON_BIT; + case ETHTOOL_FEC_BASER: + return NFP_FEC_BASER_BIT; + default: + /* NSP only supports a single mode at a time */ + return -EOPNOTSUPP; + } +} + +static u32 nfp_port_fec_nsp_to_ethtool(u32 fec) +{ + u32 result = 0; + + if (fec & NFP_FEC_AUTO) + result |= ETHTOOL_FEC_AUTO; + if (fec & NFP_FEC_BASER) + result |= ETHTOOL_FEC_BASER; + if (fec & NFP_FEC_REED_SOLOMON) + result |= ETHTOOL_FEC_RS; + if (fec & NFP_FEC_DISABLED) + result |= ETHTOOL_FEC_OFF; + + return result ?: ETHTOOL_FEC_NONE; +} + +static int +nfp_port_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *param) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + + param->active_fec = ETHTOOL_FEC_NONE_BIT; + param->fec = ETHTOOL_FEC_NONE_BIT; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (!nfp_eth_can_support_fec(eth_port)) + return 0; + + param->fec = nfp_port_fec_nsp_to_ethtool(eth_port->fec_modes_supported); + param->active_fec = nfp_port_fec_nsp_to_ethtool(eth_port->fec); + + return 0; +} + +static int +nfp_port_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *param) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + int err, fec; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (!nfp_eth_can_support_fec(eth_port)) + return -EOPNOTSUPP; + + fec = nfp_port_fec_ethtool_to_nsp(param->fec); + if (fec < 0) + return fec; + + err = nfp_eth_set_fec(port->app->cpp, eth_port->index, fec); + if (!err) + /* Only refresh if we did something */ + nfp_net_refresh_port_table(port); + + return err < 0 ? err : 0; +} + /* RX network flow classification (RSS, filters, etc) */ static u32 ethtool_flow_to_nfp_flag(u32 flow_type) @@ -1141,6 +1255,8 @@ static const struct ethtool_ops nfp_net_ethtool_ops = { .set_channels = nfp_net_set_channels, .get_link_ksettings = nfp_net_get_link_ksettings, .set_link_ksettings = nfp_net_set_link_ksettings, + .get_fecparam = nfp_port_get_fecparam, + .set_fecparam = nfp_port_set_fecparam, }; const struct ethtool_ops nfp_port_ethtool_ops = { @@ -1152,6 +1268,10 @@ const struct ethtool_ops nfp_port_ethtool_ops = { .set_dump = nfp_app_set_dump, .get_dump_flag = nfp_app_get_dump_flag, .get_dump_data = nfp_app_get_dump_data, + .get_link_ksettings = nfp_net_get_link_ksettings, + .set_link_ksettings = nfp_net_set_link_ksettings, + .get_fecparam = nfp_port_get_fecparam, + .set_fecparam = nfp_port_set_fecparam, }; void nfp_net_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index ff373acd28f3..c505014121c4 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -597,7 +597,7 @@ nfp_net_eth_port_update(struct nfp_cpp *cpp, struct nfp_port *port, return -EIO; } if (eth_port->override_changed) { - nfp_warn(cpp, "Port #%d config changed, unregistering. Reboot required before port will be operational again.\n", port->eth_id); + nfp_warn(cpp, "Port #%d config changed, unregistering. Driver reload required before port will be operational again.\n", port->eth_id); port->type = NFP_PORT_INVALID; } @@ -611,6 +611,7 @@ int nfp_net_refresh_port_table_sync(struct nfp_pf *pf) struct nfp_eth_table *eth_table; struct nfp_net *nn, *next; struct nfp_port *port; + int err; lockdep_assert_held(&pf->lock); @@ -640,6 +641,11 @@ int nfp_net_refresh_port_table_sync(struct nfp_pf *pf) kfree(eth_table); + /* Resync repr state. This may cause reprs to be removed. */ + err = nfp_reprs_resync_phys_ports(pf->app); + if (err) + return err; + /* Shoot off the ports which became invalid */ list_for_each_entry_safe(nn, next, &pf->vnics, vnic_list) { if (!nn->port || nn->port->type != NFP_PORT_INVALID) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index d540a9dc77b3..1bce8c131bb9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -390,3 +390,50 @@ struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs) return reprs; } + +int nfp_reprs_resync_phys_ports(struct nfp_app *app) +{ + struct nfp_reprs *reprs, *old_reprs; + struct nfp_repr *repr; + int i; + + old_reprs = + rcu_dereference_protected(app->reprs[NFP_REPR_TYPE_PHYS_PORT], + lockdep_is_held(&app->pf->lock)); + if (!old_reprs) + return 0; + + reprs = nfp_reprs_alloc(old_reprs->num_reprs); + if (!reprs) + return -ENOMEM; + + for (i = 0; i < old_reprs->num_reprs; i++) { + if (!old_reprs->reprs[i]) + continue; + + repr = netdev_priv(old_reprs->reprs[i]); + if (repr->port->type == NFP_PORT_INVALID) + continue; + + reprs->reprs[i] = old_reprs->reprs[i]; + } + + old_reprs = nfp_app_reprs_set(app, NFP_REPR_TYPE_PHYS_PORT, reprs); + synchronize_rcu(); + + /* Now we free up removed representors */ + for (i = 0; i < old_reprs->num_reprs; i++) { + if (!old_reprs->reprs[i]) + continue; + + repr = netdev_priv(old_reprs->reprs[i]); + if (repr->port->type != NFP_PORT_INVALID) + continue; + + nfp_app_repr_stop(app, repr); + nfp_repr_clean(repr); + } + + kfree(old_reprs); + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h index 32179cad062a..5d4d897bc9c6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h @@ -124,5 +124,6 @@ void nfp_reprs_clean_and_free_by_type(struct nfp_app *app, enum nfp_repr_type type); struct nfp_reprs *nfp_reprs_alloc(unsigned int num_reprs); +int nfp_reprs_resync_phys_ports(struct nfp_app *app); #endif /* NFP_NET_REPR_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c index e6d2e06b050c..8b1b962cf1d1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c @@ -112,7 +112,13 @@ int nfp_app_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) writew(get_unaligned_be16(mac + 4), app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_MAC_LO); - return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC"); + err = nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_MAC, "MAC"); + if (!err) + nfp_info(app->pf->cpp, + "MAC %pM set on VF %d, reload the VF driver to make this change effective.\n", + mac, vf); + + return err; } int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index 37364555c42b..14a6d1ba51a9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -477,6 +477,11 @@ int nfp_nsp_device_soft_reset(struct nfp_nsp *state) return nfp_nsp_command(state, SPCODE_SOFT_RESET, 0, 0, 0); } +int nfp_nsp_mac_reinit(struct nfp_nsp *state) +{ + return nfp_nsp_command(state, SPCODE_MAC_INIT, 0, 0, 0); +} + int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw) { return nfp_nsp_command_buf(state, SPCODE_FW_LOAD, fw->size, fw->data, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h index e2f028027c6f..650ca1a5bd21 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h @@ -48,6 +48,12 @@ u16 nfp_nsp_get_abi_ver_minor(struct nfp_nsp *state); int nfp_nsp_wait(struct nfp_nsp *state); int nfp_nsp_device_soft_reset(struct nfp_nsp *state); int nfp_nsp_load_fw(struct nfp_nsp *state, const struct firmware *fw); +int nfp_nsp_mac_reinit(struct nfp_nsp *state); + +static inline bool nfp_nsp_has_mac_reinit(struct nfp_nsp *state) +{ + return nfp_nsp_get_abi_ver_minor(state) > 20; +} enum nfp_eth_interface { NFP_INTERFACE_NONE = 0, @@ -73,6 +79,18 @@ enum nfp_eth_aneg { NFP_ANEG_DISABLED, }; +enum nfp_eth_fec { + NFP_FEC_AUTO_BIT = 0, + NFP_FEC_BASER_BIT, + NFP_FEC_REED_SOLOMON_BIT, + NFP_FEC_DISABLED_BIT, +}; + +#define NFP_FEC_AUTO BIT(NFP_FEC_AUTO_BIT) +#define NFP_FEC_BASER BIT(NFP_FEC_BASER_BIT) +#define NFP_FEC_REED_SOLOMON BIT(NFP_FEC_REED_SOLOMON_BIT) +#define NFP_FEC_DISABLED BIT(NFP_FEC_DISABLED_BIT) + /** * struct nfp_eth_table - ETH table information * @count: number of table entries @@ -87,6 +105,7 @@ enum nfp_eth_aneg { * @speed: interface speed (in Mbps) * @interface: interface (module) plugged in * @media: media type of the @interface + * @fec: forward error correction mode * @aneg: auto negotiation mode * @mac_addr: interface MAC address * @label_port: port id @@ -99,6 +118,7 @@ enum nfp_eth_aneg { * @port_type: one of %PORT_* defines for ethtool * @port_lanes: total number of lanes on the port (sum of lanes of all subports) * @is_split: is interface part of a split port + * @fec_modes_supported: bitmap of FEC modes supported */ struct nfp_eth_table { unsigned int count; @@ -114,6 +134,7 @@ struct nfp_eth_table { unsigned int interface; enum nfp_eth_media media; + enum nfp_eth_fec fec; enum nfp_eth_aneg aneg; u8 mac_addr[ETH_ALEN]; @@ -133,6 +154,8 @@ struct nfp_eth_table { unsigned int port_lanes; bool is_split; + + unsigned int fec_modes_supported; } ports[0]; }; @@ -143,6 +166,19 @@ __nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp); int nfp_eth_set_mod_enable(struct nfp_cpp *cpp, unsigned int idx, bool enable); int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed); +int +nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode); + +static inline bool nfp_eth_can_support_fec(struct nfp_eth_table_port *eth_port) +{ + return !!eth_port->fec_modes_supported; +} + +static inline unsigned int +nfp_eth_supported_fec_modes(struct nfp_eth_table_port *eth_port) +{ + return eth_port->fec_modes_supported; +} struct nfp_nsp *nfp_eth_config_start(struct nfp_cpp *cpp, unsigned int idx); int nfp_eth_config_commit_end(struct nfp_nsp *nsp); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c index f6f7c085f8e0..7ca589660e4d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -55,6 +55,8 @@ #define NSP_ETH_PORT_INDEX GENMASK_ULL(15, 8) #define NSP_ETH_PORT_LABEL GENMASK_ULL(53, 48) #define NSP_ETH_PORT_PHYLABEL GENMASK_ULL(59, 54) +#define NSP_ETH_PORT_FEC_SUPP_BASER BIT_ULL(60) +#define NSP_ETH_PORT_FEC_SUPP_RS BIT_ULL(61) #define NSP_ETH_PORT_LANES_MASK cpu_to_le64(NSP_ETH_PORT_LANES) @@ -67,6 +69,7 @@ #define NSP_ETH_STATE_MEDIA GENMASK_ULL(21, 20) #define NSP_ETH_STATE_OVRD_CHNG BIT_ULL(22) #define NSP_ETH_STATE_ANEG GENMASK_ULL(25, 23) +#define NSP_ETH_STATE_FEC GENMASK_ULL(27, 26) #define NSP_ETH_CTRL_CONFIGURED BIT_ULL(0) #define NSP_ETH_CTRL_ENABLED BIT_ULL(1) @@ -75,6 +78,7 @@ #define NSP_ETH_CTRL_SET_RATE BIT_ULL(4) #define NSP_ETH_CTRL_SET_LANES BIT_ULL(5) #define NSP_ETH_CTRL_SET_ANEG BIT_ULL(6) +#define NSP_ETH_CTRL_SET_FEC BIT_ULL(7) enum nfp_eth_raw { NSP_ETH_RAW_PORT = 0, @@ -152,6 +156,7 @@ nfp_eth_port_translate(struct nfp_nsp *nsp, const union eth_table_entry *src, unsigned int index, struct nfp_eth_table_port *dst) { unsigned int rate; + unsigned int fec; u64 port, state; port = le64_to_cpu(src->port); @@ -183,6 +188,18 @@ nfp_eth_port_translate(struct nfp_nsp *nsp, const union eth_table_entry *src, dst->override_changed = FIELD_GET(NSP_ETH_STATE_OVRD_CHNG, state); dst->aneg = FIELD_GET(NSP_ETH_STATE_ANEG, state); + + if (nfp_nsp_get_abi_ver_minor(nsp) < 22) + return; + + fec = FIELD_GET(NSP_ETH_PORT_FEC_SUPP_BASER, port); + dst->fec_modes_supported |= fec << NFP_FEC_BASER_BIT; + fec = FIELD_GET(NSP_ETH_PORT_FEC_SUPP_RS, port); + dst->fec_modes_supported |= fec << NFP_FEC_REED_SOLOMON_BIT; + if (dst->fec_modes_supported) + dst->fec_modes_supported |= NFP_FEC_AUTO | NFP_FEC_DISABLED; + + dst->fec = 1 << FIELD_GET(NSP_ETH_STATE_FEC, state); } static void @@ -469,10 +486,10 @@ int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, bool configed) return nfp_eth_config_commit_end(nsp); } -/* Force inline, FIELD_* macroes require masks to be compilation-time known */ -static __always_inline int +static int nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, - const u64 mask, unsigned int val, const u64 ctrl_bit) + const u64 mask, const unsigned int shift, + unsigned int val, const u64 ctrl_bit) { union eth_table_entry *entries = nfp_nsp_config_entries(nsp); unsigned int idx = nfp_nsp_config_idx(nsp); @@ -489,11 +506,11 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, /* Check if we are already in requested state */ reg = le64_to_cpu(entries[idx].raw[raw_idx]); - if (val == FIELD_GET(mask, reg)) + if (val == (reg & mask) >> shift) return 0; reg &= ~mask; - reg |= FIELD_PREP(mask, val); + reg |= (val << shift) & mask; entries[idx].raw[raw_idx] = cpu_to_le64(reg); entries[idx].control |= cpu_to_le64(ctrl_bit); @@ -503,6 +520,13 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, return 0; } +#define NFP_ETH_SET_BIT_CONFIG(nsp, raw_idx, mask, val, ctrl_bit) \ + ({ \ + __BF_FIELD_CHECK(mask, 0ULL, val, "NFP_ETH_SET_BIT_CONFIG: "); \ + nfp_eth_set_bit_config(nsp, raw_idx, mask, __bf_shf(mask), \ + val, ctrl_bit); \ + }) + /** * __nfp_eth_set_aneg() - set PHY autonegotiation control bit * @nsp: NFP NSP handle returned from nfp_eth_config_start() @@ -515,12 +539,59 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, */ int __nfp_eth_set_aneg(struct nfp_nsp *nsp, enum nfp_eth_aneg mode) { - return nfp_eth_set_bit_config(nsp, NSP_ETH_RAW_STATE, + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, NSP_ETH_STATE_ANEG, mode, NSP_ETH_CTRL_SET_ANEG); } /** + * __nfp_eth_set_fec() - set PHY forward error correction control bit + * @nsp: NFP NSP handle returned from nfp_eth_config_start() + * @mode: Desired fec mode + * + * Set the PHY module forward error correction mode. + * Will write to hwinfo overrides in the flash (persistent config). + * + * Return: 0 or -ERRNO. + */ +static int __nfp_eth_set_fec(struct nfp_nsp *nsp, enum nfp_eth_fec mode) +{ + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, + NSP_ETH_STATE_FEC, mode, + NSP_ETH_CTRL_SET_FEC); +} + +/** + * nfp_eth_set_fec() - set PHY forward error correction control mode + * @cpp: NFP CPP handle + * @idx: NFP chip-wide port index + * @mode: Desired fec mode + * + * Return: + * 0 - configuration successful; + * 1 - no changes were needed; + * -ERRNO - configuration failed. + */ +int +nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode) +{ + struct nfp_nsp *nsp; + int err; + + nsp = nfp_eth_config_start(cpp, idx); + if (IS_ERR(nsp)) + return PTR_ERR(nsp); + + err = __nfp_eth_set_fec(nsp, mode); + if (err) { + nfp_eth_config_cleanup_end(nsp); + return err; + } + + return nfp_eth_config_commit_end(nsp); +} + +/** * __nfp_eth_set_speed() - set interface speed/rate * @nsp: NFP NSP handle returned from nfp_eth_config_start() * @speed: Desired speed (per lane) @@ -544,7 +615,7 @@ int __nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed) return -EINVAL; } - return nfp_eth_set_bit_config(nsp, NSP_ETH_RAW_STATE, + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_STATE, NSP_ETH_STATE_RATE, rate, NSP_ETH_CTRL_SET_RATE); } @@ -561,6 +632,6 @@ int __nfp_eth_set_speed(struct nfp_nsp *nsp, unsigned int speed) */ int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes) { - return nfp_eth_set_bit_config(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES, + return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES, lanes, NSP_ETH_CTRL_SET_LANES); } diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c index 4a67c55aa9f1..052b3d2c07a1 100644 --- a/drivers/net/ethernet/nuvoton/w90p910_ether.c +++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c @@ -253,10 +253,10 @@ static void update_linkspeed(struct net_device *dev) netif_carrier_on(dev); } -static void w90p910_check_link(unsigned long dev_id) +static void w90p910_check_link(struct timer_list *t) { - struct net_device *dev = (struct net_device *) dev_id; - struct w90p910_ether *ether = netdev_priv(dev); + struct w90p910_ether *ether = from_timer(ether, t, check_timer); + struct net_device *dev = ether->mii.dev; update_linkspeed(dev); mod_timer(ðer->check_timer, jiffies + msecs_to_jiffies(1000)); @@ -957,8 +957,7 @@ static int w90p910_ether_setup(struct net_device *dev) ether->mii.mdio_read = w90p910_mdio_read; ether->mii.mdio_write = w90p910_mdio_write; - setup_timer(ðer->check_timer, w90p910_check_link, - (unsigned long)dev); + timer_setup(ðer->check_timer, w90p910_check_link, 0); return 0; } diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 994a83a1f0a5..ac8439ceea10 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1024,12 +1024,18 @@ static void free_rings(struct net_device *dev) if (!nv_optimized(np)) { if (np->rx_ring.orig) - pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (np->rx_ring_size + np->tx_ring_size), - np->rx_ring.orig, np->ring_addr); + dma_free_coherent(&np->pci_dev->dev, + sizeof(struct ring_desc) * + (np->rx_ring_size + + np->tx_ring_size), + np->rx_ring.orig, np->ring_addr); } else { if (np->rx_ring.ex) - pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (np->rx_ring_size + np->tx_ring_size), - np->rx_ring.ex, np->ring_addr); + dma_free_coherent(&np->pci_dev->dev, + sizeof(struct ring_desc_ex) * + (np->rx_ring_size + + np->tx_ring_size), + np->rx_ring.ex, np->ring_addr); } kfree(np->rx_skb); kfree(np->tx_skb); @@ -1813,12 +1819,12 @@ static int nv_alloc_rx(struct net_device *dev) struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + NV_RX_ALLOC_PAD); if (skb) { np->put_rx_ctx->skb = skb; - np->put_rx_ctx->dma = pci_map_single(np->pci_dev, + np->put_rx_ctx->dma = dma_map_single(&np->pci_dev->dev, skb->data, skb_tailroom(skb), - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(np->pci_dev, - np->put_rx_ctx->dma)) { + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_rx_ctx->dma))) { kfree_skb(skb); goto packet_dropped; } @@ -1854,12 +1860,12 @@ static int nv_alloc_rx_optimized(struct net_device *dev) struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + NV_RX_ALLOC_PAD); if (skb) { np->put_rx_ctx->skb = skb; - np->put_rx_ctx->dma = pci_map_single(np->pci_dev, + np->put_rx_ctx->dma = dma_map_single(&np->pci_dev->dev, skb->data, skb_tailroom(skb), - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(np->pci_dev, - np->put_rx_ctx->dma)) { + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_rx_ctx->dma))) { kfree_skb(skb); goto packet_dropped; } @@ -1884,10 +1890,9 @@ packet_dropped: } /* If rx bufs are exhausted called after 50ms to attempt to refresh */ -static void nv_do_rx_refill(unsigned long data) +static void nv_do_rx_refill(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct fe_priv *np = netdev_priv(dev); + struct fe_priv *np = from_timer(np, t, oom_kick); /* Just reschedule NAPI rx processing */ napi_schedule(&np->napi); @@ -1977,9 +1982,9 @@ static void nv_unmap_txskb(struct fe_priv *np, struct nv_skb_map *tx_skb) { if (tx_skb->dma) { if (tx_skb->dma_single) - pci_unmap_single(np->pci_dev, tx_skb->dma, + dma_unmap_single(&np->pci_dev->dev, tx_skb->dma, tx_skb->dma_len, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); else pci_unmap_page(np->pci_dev, tx_skb->dma, tx_skb->dma_len, @@ -2047,10 +2052,10 @@ static void nv_drain_rx(struct net_device *dev) } wmb(); if (np->rx_skb[i].skb) { - pci_unmap_single(np->pci_dev, np->rx_skb[i].dma, + dma_unmap_single(&np->pci_dev->dev, np->rx_skb[i].dma, (skb_end_pointer(np->rx_skb[i].skb) - - np->rx_skb[i].skb->data), - PCI_DMA_FROMDEVICE); + np->rx_skb[i].skb->data), + DMA_FROM_DEVICE); dev_kfree_skb(np->rx_skb[i].skb); np->rx_skb[i].skb = NULL; } @@ -2221,13 +2226,12 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) /* setup the header buffer */ do { - prev_tx = put_tx; - prev_tx_ctx = np->put_tx_ctx; bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size; - np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(np->pci_dev, - np->put_tx_ctx->dma)) { + np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev, + skb->data + offset, bcnt, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma))) { /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); @@ -2256,8 +2260,6 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) offset = 0; do { - prev_tx = put_tx; - prev_tx_ctx = np->put_tx_ctx; if (!start_tx_ctx) start_tx_ctx = tmp_tx_ctx = np->put_tx_ctx; @@ -2267,7 +2269,8 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) frag, offset, bcnt, DMA_TO_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, np->put_tx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma))) { /* Unwind the mapped fragments */ do { @@ -2297,6 +2300,16 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) } while (frag_size); } + if (unlikely(put_tx == np->first_tx.orig)) + prev_tx = np->last_tx.orig; + else + prev_tx = put_tx - 1; + + if (unlikely(np->put_tx_ctx == np->first_tx_ctx)) + prev_tx_ctx = np->last_tx_ctx; + else + prev_tx_ctx = np->put_tx_ctx - 1; + /* set last fragment flag */ prev_tx->flaglen |= cpu_to_le32(tx_flags_extra); @@ -2370,13 +2383,12 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, /* setup the header buffer */ do { - prev_tx = put_tx; - prev_tx_ctx = np->put_tx_ctx; bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size; - np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(np->pci_dev, - np->put_tx_ctx->dma)) { + np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev, + skb->data + offset, bcnt, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma))) { /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); @@ -2406,8 +2418,6 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, offset = 0; do { - prev_tx = put_tx; - prev_tx_ctx = np->put_tx_ctx; bcnt = (frag_size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : frag_size; if (!start_tx_ctx) start_tx_ctx = tmp_tx_ctx = np->put_tx_ctx; @@ -2417,7 +2427,8 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, bcnt, DMA_TO_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, np->put_tx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma))) { /* Unwind the mapped fragments */ do { @@ -2447,6 +2458,16 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, } while (frag_size); } + if (unlikely(put_tx == np->first_tx.ex)) + prev_tx = np->last_tx.ex; + else + prev_tx = put_tx - 1; + + if (unlikely(np->put_tx_ctx == np->first_tx_ctx)) + prev_tx_ctx = np->last_tx_ctx; + else + prev_tx_ctx = np->put_tx_ctx - 1; + /* set last fragment flag */ prev_tx->flaglen |= cpu_to_le32(NV_TX2_LASTPACKET); @@ -2810,9 +2831,9 @@ static int nv_rx_process(struct net_device *dev, int limit) * TODO: check if a prefetch of the first cacheline improves * the performance. */ - pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma, - np->get_rx_ctx->dma_len, - PCI_DMA_FROMDEVICE); + dma_unmap_single(&np->pci_dev->dev, np->get_rx_ctx->dma, + np->get_rx_ctx->dma_len, + DMA_FROM_DEVICE); skb = np->get_rx_ctx->skb; np->get_rx_ctx->skb = NULL; @@ -2916,9 +2937,9 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) * TODO: check if a prefetch of the first cacheline improves * the performance. */ - pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma, - np->get_rx_ctx->dma_len, - PCI_DMA_FROMDEVICE); + dma_unmap_single(&np->pci_dev->dev, np->get_rx_ctx->dma, + np->get_rx_ctx->dma_len, + DMA_FROM_DEVICE); skb = np->get_rx_ctx->skb; np->get_rx_ctx->skb = NULL; @@ -4061,10 +4082,10 @@ static void nv_free_irq(struct net_device *dev) } } -static void nv_do_nic_poll(unsigned long data) +static void nv_do_nic_poll(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct fe_priv *np = netdev_priv(dev); + struct fe_priv *np = from_timer(np, t, nic_poll); + struct net_device *dev = np->dev; u8 __iomem *base = get_hwbase(dev); u32 mask = 0; unsigned long flags; @@ -4172,16 +4193,18 @@ static void nv_do_nic_poll(unsigned long data) #ifdef CONFIG_NET_POLL_CONTROLLER static void nv_poll_controller(struct net_device *dev) { - nv_do_nic_poll((unsigned long) dev); + struct fe_priv *np = netdev_priv(dev); + + nv_do_nic_poll(&np->nic_poll); } #endif -static void nv_do_stats_poll(unsigned long data) +static void nv_do_stats_poll(struct timer_list *t) __acquires(&netdev_priv(dev)->hwstats_lock) __releases(&netdev_priv(dev)->hwstats_lock) { - struct net_device *dev = (struct net_device *) data; - struct fe_priv *np = netdev_priv(dev); + struct fe_priv *np = from_timer(np, t, stats_poll); + struct net_device *dev = np->dev; /* If lock is currently taken, the stats are being refreshed * and hence fresh enough */ @@ -4591,13 +4614,17 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri /* allocate new rings */ if (!nv_optimized(np)) { - rxtx_ring = pci_alloc_consistent(np->pci_dev, - sizeof(struct ring_desc) * (ring->rx_pending + ring->tx_pending), - &ring_addr); + rxtx_ring = dma_alloc_coherent(&np->pci_dev->dev, + sizeof(struct ring_desc) * + (ring->rx_pending + + ring->tx_pending), + &ring_addr, GFP_ATOMIC); } else { - rxtx_ring = pci_alloc_consistent(np->pci_dev, - sizeof(struct ring_desc_ex) * (ring->rx_pending + ring->tx_pending), - &ring_addr); + rxtx_ring = dma_alloc_coherent(&np->pci_dev->dev, + sizeof(struct ring_desc_ex) * + (ring->rx_pending + + ring->tx_pending), + &ring_addr, GFP_ATOMIC); } rx_skbuff = kmalloc(sizeof(struct nv_skb_map) * ring->rx_pending, GFP_KERNEL); tx_skbuff = kmalloc(sizeof(struct nv_skb_map) * ring->tx_pending, GFP_KERNEL); @@ -4605,12 +4632,18 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri /* fall back to old rings */ if (!nv_optimized(np)) { if (rxtx_ring) - pci_free_consistent(np->pci_dev, sizeof(struct ring_desc) * (ring->rx_pending + ring->tx_pending), - rxtx_ring, ring_addr); + dma_free_coherent(&np->pci_dev->dev, + sizeof(struct ring_desc) * + (ring->rx_pending + + ring->tx_pending), + rxtx_ring, ring_addr); } else { if (rxtx_ring) - pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (ring->rx_pending + ring->tx_pending), - rxtx_ring, ring_addr); + dma_free_coherent(&np->pci_dev->dev, + sizeof(struct ring_desc_ex) * + (ring->rx_pending + + ring->tx_pending), + rxtx_ring, ring_addr); } kfree(rx_skbuff); @@ -5070,11 +5103,11 @@ static int nv_loopback_test(struct net_device *dev) ret = 0; goto out; } - test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data, + test_dma_addr = dma_map_single(&np->pci_dev->dev, tx_skb->data, skb_tailroom(tx_skb), - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(np->pci_dev, - test_dma_addr)) { + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + test_dma_addr))) { dev_kfree_skb_any(tx_skb); goto out; } @@ -5129,9 +5162,9 @@ static int nv_loopback_test(struct net_device *dev) } } - pci_unmap_single(np->pci_dev, test_dma_addr, - (skb_end_pointer(tx_skb) - tx_skb->data), - PCI_DMA_TODEVICE); + dma_unmap_single(&np->pci_dev->dev, test_dma_addr, + (skb_end_pointer(tx_skb) - tx_skb->data), + DMA_TO_DEVICE); dev_kfree_skb_any(tx_skb); out: /* stop engines */ @@ -5627,10 +5660,9 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) u64_stats_init(&np->swstats_rx_syncp); u64_stats_init(&np->swstats_tx_syncp); - setup_timer(&np->oom_kick, nv_do_rx_refill, (unsigned long)dev); - setup_timer(&np->nic_poll, nv_do_nic_poll, (unsigned long)dev); - setup_deferrable_timer(&np->stats_poll, nv_do_stats_poll, - (unsigned long)dev); + timer_setup(&np->oom_kick, nv_do_rx_refill, 0); + timer_setup(&np->nic_poll, nv_do_nic_poll, 0); + timer_setup(&np->stats_poll, nv_do_stats_poll, TIMER_DEFERRABLE); err = pci_enable_device(pci_dev); if (err) @@ -5736,16 +5768,21 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) np->tx_ring_size = TX_RING_DEFAULT; if (!nv_optimized(np)) { - np->rx_ring.orig = pci_alloc_consistent(pci_dev, - sizeof(struct ring_desc) * (np->rx_ring_size + np->tx_ring_size), - &np->ring_addr); + np->rx_ring.orig = dma_alloc_coherent(&pci_dev->dev, + sizeof(struct ring_desc) * + (np->rx_ring_size + + np->tx_ring_size), + &np->ring_addr, + GFP_ATOMIC); if (!np->rx_ring.orig) goto out_unmap; np->tx_ring.orig = &np->rx_ring.orig[np->rx_ring_size]; } else { - np->rx_ring.ex = pci_alloc_consistent(pci_dev, - sizeof(struct ring_desc_ex) * (np->rx_ring_size + np->tx_ring_size), - &np->ring_addr); + np->rx_ring.ex = dma_alloc_coherent(&pci_dev->dev, + sizeof(struct ring_desc_ex) * + (np->rx_ring_size + + np->tx_ring_size), + &np->ring_addr, GFP_ATOMIC); if (!np->rx_ring.ex) goto out_unmap; np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size]; diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h index 8d710a3b4db0..697e29dd4bd3 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h @@ -613,7 +613,6 @@ struct pch_gbe_privdata { * @rx_ring: Pointer of Rx descriptor ring structure * @rx_buffer_len: Receive buffer length * @tx_queue_len: Transmit queue length - * @have_msi: PCI MSI mode flag * @pch_gbe_privdata: PCI Device ID driver_data */ @@ -623,6 +622,7 @@ struct pch_gbe_adapter { atomic_t irq_sem; struct net_device *netdev; struct pci_dev *pdev; + int irq; struct net_device *polling_netdev; struct napi_struct napi; struct pch_gbe_hw hw; @@ -637,7 +637,6 @@ struct pch_gbe_adapter { struct pch_gbe_rx_ring *rx_ring; unsigned long rx_buffer_len; unsigned long tx_queue_len; - bool have_msi; bool rx_stop_flag; int hwts_tx_en; int hwts_rx_en; diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 5ae9681a2da7..457ee80307ea 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -781,11 +781,8 @@ static void pch_gbe_free_irq(struct pch_gbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; - free_irq(adapter->pdev->irq, netdev); - if (adapter->have_msi) { - pci_disable_msi(adapter->pdev); - netdev_dbg(netdev, "call pci_disable_msi\n"); - } + free_irq(adapter->irq, netdev); + pci_free_irq_vectors(adapter->pdev); } /** @@ -799,7 +796,7 @@ static void pch_gbe_irq_disable(struct pch_gbe_adapter *adapter) atomic_inc(&adapter->irq_sem); iowrite32(0, &hw->reg->INT_EN); ioread32(&hw->reg->INT_ST); - synchronize_irq(adapter->pdev->irq); + synchronize_irq(adapter->irq); netdev_dbg(adapter->netdev, "INT_EN reg : 0x%08x\n", ioread32(&hw->reg->INT_EN)); @@ -1903,30 +1900,23 @@ static int pch_gbe_request_irq(struct pch_gbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; int err; - int flags; - flags = IRQF_SHARED; - adapter->have_msi = false; - err = pci_enable_msi(adapter->pdev); - netdev_dbg(netdev, "call pci_enable_msi\n"); - if (err) { - netdev_dbg(netdev, "call pci_enable_msi - Error: %d\n", err); - } else { - flags = 0; - adapter->have_msi = true; - } - err = request_irq(adapter->pdev->irq, &pch_gbe_intr, - flags, netdev->name, netdev); + err = pci_alloc_irq_vectors(adapter->pdev, 1, 1, PCI_IRQ_ALL_TYPES); + if (err < 0) + return err; + + adapter->irq = pci_irq_vector(adapter->pdev, 0); + + err = request_irq(adapter->irq, &pch_gbe_intr, IRQF_SHARED, + netdev->name, netdev); if (err) netdev_err(netdev, "Unable to allocate interrupt Error: %d\n", err); - netdev_dbg(netdev, - "adapter->have_msi : %d flags : 0x%04x return : 0x%04x\n", - adapter->have_msi, flags, err); + netdev_dbg(netdev, "have_msi : %d return : 0x%04x\n", + pci_dev_msi_enabled(adapter->pdev), err); return err; } - /** * pch_gbe_up - Up GbE network device * @adapter: Board private structure @@ -2399,9 +2389,9 @@ static void pch_gbe_netpoll(struct net_device *netdev) { struct pch_gbe_adapter *adapter = netdev_priv(netdev); - disable_irq(adapter->pdev->irq); - pch_gbe_intr(adapter->pdev->irq, netdev); - enable_irq(adapter->pdev->irq); + disable_irq(adapter->irq); + pch_gbe_intr(adapter->irq, netdev); + enable_irq(adapter->irq); } #endif diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index 482b85e4d665..c9529c29a0a7 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -413,13 +413,13 @@ that case. /* The rest of these values should never change. */ -static void hamachi_timer(unsigned long data); +static void hamachi_timer(struct timer_list *t); enum capability_flags {CanHaveMII=1, }; static const struct chip_info { u16 vendor_id, device_id, device_id_mask, pad; const char *name; - void (*media_timer)(unsigned long data); + void (*media_timer)(struct timer_list *t); int flags; } chip_tbl[] = { {0x1318, 0x0911, 0xffff, 0, "Hamachi GNIC-II", hamachi_timer, 0}, @@ -547,7 +547,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int location, int value); static int hamachi_open(struct net_device *dev); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); -static void hamachi_timer(unsigned long data); +static void hamachi_timer(struct timer_list *t); static void hamachi_tx_timeout(struct net_device *dev); static void hamachi_init_ring(struct net_device *dev); static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb, @@ -979,10 +979,8 @@ static int hamachi_open(struct net_device *dev) dev->name, readw(ioaddr + RxStatus), readw(ioaddr + TxStatus)); } /* Set the timer to check for link beat. */ - init_timer(&hmp->timer); + timer_setup(&hmp->timer, hamachi_timer, 0); hmp->timer.expires = RUN_AT((24*HZ)/10); /* 2.4 sec. */ - hmp->timer.data = (unsigned long)dev; - hmp->timer.function = hamachi_timer; /* timer handler */ add_timer(&hmp->timer); return 0; @@ -1019,10 +1017,10 @@ static inline int hamachi_tx(struct net_device *dev) return 0; } -static void hamachi_timer(unsigned long data) +static void hamachi_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct hamachi_private *hmp = netdev_priv(dev); + struct hamachi_private *hmp = from_timer(hmp, t, timer); + struct net_device *dev = hmp->mii_if.dev; void __iomem *ioaddr = hmp->base; int next_tick = 10*HZ; diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index fa7770da6ef8..54224d1822e3 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -343,7 +343,7 @@ static int mdio_read(void __iomem *ioaddr, int phy_id, int location); static void mdio_write(void __iomem *ioaddr, int phy_id, int location, int value); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int yellowfin_open(struct net_device *dev); -static void yellowfin_timer(unsigned long data); +static void yellowfin_timer(struct timer_list *t); static void yellowfin_tx_timeout(struct net_device *dev); static int yellowfin_init_ring(struct net_device *dev); static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb, @@ -632,10 +632,8 @@ static int yellowfin_open(struct net_device *dev) } /* Set the timer to check for link beat. */ - init_timer(&yp->timer); + timer_setup(&yp->timer, yellowfin_timer, 0); yp->timer.expires = jiffies + 3*HZ; - yp->timer.data = (unsigned long)dev; - yp->timer.function = yellowfin_timer; /* timer handler */ add_timer(&yp->timer); out: return rc; @@ -645,10 +643,10 @@ err_free_irq: goto out; } -static void yellowfin_timer(unsigned long data) +static void yellowfin_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct yellowfin_private *yp = netdev_priv(dev); + struct yellowfin_private *yp = from_timer(yp, t, timer); + struct net_device *dev = pci_get_drvdata(yp->pci_dev); void __iomem *ioaddr = yp->base; int next_tick = 60*HZ; diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index c2e24afbaeb2..26ddf092e3ec 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -117,4 +117,7 @@ config QED_ISCSI config QED_FCOE bool +config QED_OOO + bool + endif # NET_VENDOR_QLOGIC diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index 82a0b90185df..c70cf2ad81c0 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -7,5 +7,6 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o qed-$(CONFIG_QED_LL2) += qed_ll2.o qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o qed_iwarp.o -qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed_ooo.o +qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed-$(CONFIG_QED_FCOE) += qed_fcoe.o +qed-$(CONFIG_QED_OOO) += qed_ooo.o diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index af106be8cc08..afd07ad91631 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -2069,6 +2069,12 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn, num_srqs = min_t(u32, 32 * 1024, p_params->num_srqs); + if (p_hwfn->mcp_info->func_info.protocol == QED_PCI_ETH_RDMA) { + DP_NOTICE(p_hwfn, + "Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only\n"); + p_hwfn->hw_info.personality = QED_PCI_ETH_ROCE; + } + switch (p_hwfn->hw_info.personality) { case QED_PCI_ETH_IWARP: /* Each QP requires one connection */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 8f6ccc0c39e5..6e15d3c10ebf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -2308,7 +2308,7 @@ static int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app) DP_VERBOSE(hwfn, QED_MSG_DCB, "selector = %d protocol = %d pri = %d\n", app->selector, app->protocol, app->priority); - if (app->priority < 0 || app->priority >= QED_MAX_PFC_PRIORITIES) { + if (app->priority >= QED_MAX_PFC_PRIORITIES) { DP_INFO(hwfn, "Invalid priority %d\n", app->priority); return -EINVAL; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 9d989c96278c..409041eab189 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -41,6 +41,7 @@ #include "qed_rdma.h" #include "qed_reg_addr.h" #include "qed_sp.h" +#include "qed_ooo.h" #define QED_IWARP_ORD_DEFAULT 32 #define QED_IWARP_IRD_DEFAULT 32 @@ -119,6 +120,13 @@ static void qed_iwarp_cid_cleaned(struct qed_hwfn *p_hwfn, u32 cid) spin_unlock_bh(&p_hwfn->p_rdma_info->lock); } +void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn, + struct iwarp_init_func_params *p_ramrod) +{ + p_ramrod->ll2_ooo_q_index = RESC_START(p_hwfn, QED_LL2_QUEUE) + + p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle; +} + static int qed_iwarp_alloc_cid(struct qed_hwfn *p_hwfn, u32 *cid) { int rc; @@ -1402,12 +1410,22 @@ int qed_iwarp_alloc(struct qed_hwfn *p_hwfn) INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_free_list); spin_lock_init(&p_hwfn->p_rdma_info->iwarp.iw_lock); - return qed_iwarp_prealloc_ep(p_hwfn, true); + rc = qed_iwarp_prealloc_ep(p_hwfn, true); + if (rc) + return rc; + + return qed_ooo_alloc(p_hwfn); } void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn) { + struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + + qed_ooo_free(p_hwfn); qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1); + kfree(iwarp_info->mpa_bufs); + kfree(iwarp_info->partial_fpdus); + kfree(iwarp_info->mpa_intermediate_buf); } int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams) @@ -1705,6 +1723,569 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, return 0; } +static struct qed_iwarp_fpdu *qed_iwarp_get_curr_fpdu(struct qed_hwfn *p_hwfn, + u16 cid) +{ + struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct qed_iwarp_fpdu *partial_fpdu; + u32 idx; + + idx = cid - qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_IWARP); + if (idx >= iwarp_info->max_num_partial_fpdus) { + DP_ERR(p_hwfn, "Invalid cid %x max_num_partial_fpdus=%x\n", cid, + iwarp_info->max_num_partial_fpdus); + return NULL; + } + + partial_fpdu = &iwarp_info->partial_fpdus[idx]; + + return partial_fpdu; +} + +enum qed_iwarp_mpa_pkt_type { + QED_IWARP_MPA_PKT_PACKED, + QED_IWARP_MPA_PKT_PARTIAL, + QED_IWARP_MPA_PKT_UNALIGNED +}; + +#define QED_IWARP_INVALID_FPDU_LENGTH 0xffff +#define QED_IWARP_MPA_FPDU_LENGTH_SIZE (2) +#define QED_IWARP_MPA_CRC32_DIGEST_SIZE (4) + +/* Pad to multiple of 4 */ +#define QED_IWARP_PDU_DATA_LEN_WITH_PAD(data_len) ALIGN(data_len, 4) +#define QED_IWARP_FPDU_LEN_WITH_PAD(_mpa_len) \ + (QED_IWARP_PDU_DATA_LEN_WITH_PAD((_mpa_len) + \ + QED_IWARP_MPA_FPDU_LENGTH_SIZE) + \ + QED_IWARP_MPA_CRC32_DIGEST_SIZE) + +/* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */ +#define QED_IWARP_MAX_BDS_PER_FPDU 3 + +char *pkt_type_str[] = { + "QED_IWARP_MPA_PKT_PACKED", + "QED_IWARP_MPA_PKT_PARTIAL", + "QED_IWARP_MPA_PKT_UNALIGNED" +}; + +static int +qed_iwarp_recycle_pkt(struct qed_hwfn *p_hwfn, + struct qed_iwarp_fpdu *fpdu, + struct qed_iwarp_ll2_buff *buf); + +static enum qed_iwarp_mpa_pkt_type +qed_iwarp_mpa_classify(struct qed_hwfn *p_hwfn, + struct qed_iwarp_fpdu *fpdu, + u16 tcp_payload_len, u8 *mpa_data) +{ + enum qed_iwarp_mpa_pkt_type pkt_type; + u16 mpa_len; + + if (fpdu->incomplete_bytes) { + pkt_type = QED_IWARP_MPA_PKT_UNALIGNED; + goto out; + } + + /* special case of one byte remaining... + * lower byte will be read next packet + */ + if (tcp_payload_len == 1) { + fpdu->fpdu_length = *mpa_data << BITS_PER_BYTE; + pkt_type = QED_IWARP_MPA_PKT_PARTIAL; + goto out; + } + + mpa_len = ntohs(*((u16 *)(mpa_data))); + fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len); + + if (fpdu->fpdu_length <= tcp_payload_len) + pkt_type = QED_IWARP_MPA_PKT_PACKED; + else + pkt_type = QED_IWARP_MPA_PKT_PARTIAL; + +out: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "MPA_ALIGN: %s: fpdu_length=0x%x tcp_payload_len:0x%x\n", + pkt_type_str[pkt_type], fpdu->fpdu_length, tcp_payload_len); + + return pkt_type; +} + +static void +qed_iwarp_init_fpdu(struct qed_iwarp_ll2_buff *buf, + struct qed_iwarp_fpdu *fpdu, + struct unaligned_opaque_data *pkt_data, + u16 tcp_payload_size, u8 placement_offset) +{ + fpdu->mpa_buf = buf; + fpdu->pkt_hdr = buf->data_phys_addr + placement_offset; + fpdu->pkt_hdr_size = pkt_data->tcp_payload_offset; + fpdu->mpa_frag = buf->data_phys_addr + pkt_data->first_mpa_offset; + fpdu->mpa_frag_virt = (u8 *)(buf->data) + pkt_data->first_mpa_offset; + + if (tcp_payload_size == 1) + fpdu->incomplete_bytes = QED_IWARP_INVALID_FPDU_LENGTH; + else if (tcp_payload_size < fpdu->fpdu_length) + fpdu->incomplete_bytes = fpdu->fpdu_length - tcp_payload_size; + else + fpdu->incomplete_bytes = 0; /* complete fpdu */ + + fpdu->mpa_frag_len = fpdu->fpdu_length - fpdu->incomplete_bytes; +} + +static int +qed_iwarp_cp_pkt(struct qed_hwfn *p_hwfn, + struct qed_iwarp_fpdu *fpdu, + struct unaligned_opaque_data *pkt_data, + struct qed_iwarp_ll2_buff *buf, u16 tcp_payload_size) +{ + u8 *tmp_buf = p_hwfn->p_rdma_info->iwarp.mpa_intermediate_buf; + int rc; + + /* need to copy the data from the partial packet stored in fpdu + * to the new buf, for this we also need to move the data currently + * placed on the buf. The assumption is that the buffer is big enough + * since fpdu_length <= mss, we use an intermediate buffer since + * we may need to copy the new data to an overlapping location + */ + if ((fpdu->mpa_frag_len + tcp_payload_size) > (u16)buf->buff_size) { + DP_ERR(p_hwfn, + "MPA ALIGN: Unexpected: buffer is not large enough for split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n", + buf->buff_size, fpdu->mpa_frag_len, + tcp_payload_size, fpdu->incomplete_bytes); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "MPA ALIGN Copying fpdu: [%p, %d] [%p, %d]\n", + fpdu->mpa_frag_virt, fpdu->mpa_frag_len, + (u8 *)(buf->data) + pkt_data->first_mpa_offset, + tcp_payload_size); + + memcpy(tmp_buf, fpdu->mpa_frag_virt, fpdu->mpa_frag_len); + memcpy(tmp_buf + fpdu->mpa_frag_len, + (u8 *)(buf->data) + pkt_data->first_mpa_offset, + tcp_payload_size); + + rc = qed_iwarp_recycle_pkt(p_hwfn, fpdu, fpdu->mpa_buf); + if (rc) + return rc; + + /* If we managed to post the buffer copy the data to the new buffer + * o/w this will occur in the next round... + */ + memcpy((u8 *)(buf->data), tmp_buf, + fpdu->mpa_frag_len + tcp_payload_size); + + fpdu->mpa_buf = buf; + /* fpdu->pkt_hdr remains as is */ + /* fpdu->mpa_frag is overridden with new buf */ + fpdu->mpa_frag = buf->data_phys_addr; + fpdu->mpa_frag_virt = buf->data; + fpdu->mpa_frag_len += tcp_payload_size; + + fpdu->incomplete_bytes -= tcp_payload_size; + + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "MPA ALIGN: split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n", + buf->buff_size, fpdu->mpa_frag_len, tcp_payload_size, + fpdu->incomplete_bytes); + + return 0; +} + +static void +qed_iwarp_update_fpdu_length(struct qed_hwfn *p_hwfn, + struct qed_iwarp_fpdu *fpdu, u8 *mpa_data) +{ + u16 mpa_len; + + /* Update incomplete packets if needed */ + if (fpdu->incomplete_bytes == QED_IWARP_INVALID_FPDU_LENGTH) { + /* Missing lower byte is now available */ + mpa_len = fpdu->fpdu_length | *mpa_data; + fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len); + fpdu->mpa_frag_len = fpdu->fpdu_length; + /* one byte of hdr */ + fpdu->incomplete_bytes = fpdu->fpdu_length - 1; + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "MPA_ALIGN: Partial header mpa_len=%x fpdu_length=%x incomplete_bytes=%x\n", + mpa_len, fpdu->fpdu_length, fpdu->incomplete_bytes); + } +} + +#define QED_IWARP_IS_RIGHT_EDGE(_curr_pkt) \ + (GET_FIELD((_curr_pkt)->flags, \ + UNALIGNED_OPAQUE_DATA_PKT_REACHED_WIN_RIGHT_EDGE)) + +/* This function is used to recycle a buffer using the ll2 drop option. It + * uses the mechanism to ensure that all buffers posted to tx before this one + * were completed. The buffer sent here will be sent as a cookie in the tx + * completion function and can then be reposted to rx chain when done. The flow + * that requires this is the flow where a FPDU splits over more than 3 tcp + * segments. In this case the driver needs to re-post a rx buffer instead of + * the one received, but driver can't simply repost a buffer it copied from + * as there is a case where the buffer was originally a packed FPDU, and is + * partially posted to FW. Driver needs to ensure FW is done with it. + */ +static int +qed_iwarp_recycle_pkt(struct qed_hwfn *p_hwfn, + struct qed_iwarp_fpdu *fpdu, + struct qed_iwarp_ll2_buff *buf) +{ + struct qed_ll2_tx_pkt_info tx_pkt; + u8 ll2_handle; + int rc; + + memset(&tx_pkt, 0, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.tx_dest = QED_LL2_TX_DEST_DROP; + tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; + tx_pkt.first_frag = fpdu->pkt_hdr; + tx_pkt.first_frag_len = fpdu->pkt_hdr_size; + buf->piggy_buf = NULL; + tx_pkt.cookie = buf; + + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; + + rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true); + if (rc) + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Can't drop packet rc=%d\n", rc); + + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "MPA_ALIGN: send drop tx packet [%lx, 0x%x], buf=%p, rc=%d\n", + (unsigned long int)tx_pkt.first_frag, + tx_pkt.first_frag_len, buf, rc); + + return rc; +} + +static int +qed_iwarp_win_right_edge(struct qed_hwfn *p_hwfn, struct qed_iwarp_fpdu *fpdu) +{ + struct qed_ll2_tx_pkt_info tx_pkt; + u8 ll2_handle; + int rc; + + memset(&tx_pkt, 0, sizeof(tx_pkt)); + tx_pkt.num_of_bds = 1; + tx_pkt.tx_dest = QED_LL2_TX_DEST_LB; + tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; + + tx_pkt.first_frag = fpdu->pkt_hdr; + tx_pkt.first_frag_len = fpdu->pkt_hdr_size; + tx_pkt.enable_ip_cksum = true; + tx_pkt.enable_l4_cksum = true; + tx_pkt.calc_ip_len = true; + /* vlan overload with enum iwarp_ll2_tx_queues */ + tx_pkt.vlan = IWARP_LL2_ALIGNED_RIGHT_TRIMMED_TX_QUEUE; + + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; + + rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true); + if (rc) + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Can't send right edge rc=%d\n", rc); + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "MPA_ALIGN: Sent right edge FPDU num_bds=%d [%lx, 0x%x], rc=%d\n", + tx_pkt.num_of_bds, + (unsigned long int)tx_pkt.first_frag, + tx_pkt.first_frag_len, rc); + + return rc; +} + +static int +qed_iwarp_send_fpdu(struct qed_hwfn *p_hwfn, + struct qed_iwarp_fpdu *fpdu, + struct unaligned_opaque_data *curr_pkt, + struct qed_iwarp_ll2_buff *buf, + u16 tcp_payload_size, enum qed_iwarp_mpa_pkt_type pkt_type) +{ + struct qed_ll2_tx_pkt_info tx_pkt; + u8 ll2_handle; + int rc; + + memset(&tx_pkt, 0, sizeof(tx_pkt)); + + /* An unaligned packet means it's split over two tcp segments. So the + * complete packet requires 3 bds, one for the header, one for the + * part of the fpdu of the first tcp segment, and the last fragment + * will point to the remainder of the fpdu. A packed pdu, requires only + * two bds, one for the header and one for the data. + */ + tx_pkt.num_of_bds = (pkt_type == QED_IWARP_MPA_PKT_UNALIGNED) ? 3 : 2; + tx_pkt.tx_dest = QED_LL2_TX_DEST_LB; + tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; /* offset in words */ + + /* Send the mpa_buf only with the last fpdu (in case of packed) */ + if (pkt_type == QED_IWARP_MPA_PKT_UNALIGNED || + tcp_payload_size <= fpdu->fpdu_length) + tx_pkt.cookie = fpdu->mpa_buf; + + tx_pkt.first_frag = fpdu->pkt_hdr; + tx_pkt.first_frag_len = fpdu->pkt_hdr_size; + tx_pkt.enable_ip_cksum = true; + tx_pkt.enable_l4_cksum = true; + tx_pkt.calc_ip_len = true; + /* vlan overload with enum iwarp_ll2_tx_queues */ + tx_pkt.vlan = IWARP_LL2_ALIGNED_TX_QUEUE; + + /* special case of unaligned packet and not packed, need to send + * both buffers as cookie to release. + */ + if (tcp_payload_size == fpdu->incomplete_bytes) + fpdu->mpa_buf->piggy_buf = buf; + + ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle; + + /* Set first fragment to header */ + rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true); + if (rc) + goto out; + + /* Set second fragment to first part of packet */ + rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn, ll2_handle, + fpdu->mpa_frag, + fpdu->mpa_frag_len); + if (rc) + goto out; + + if (!fpdu->incomplete_bytes) + goto out; + + /* Set third fragment to second part of the packet */ + rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn, + ll2_handle, + buf->data_phys_addr + + curr_pkt->first_mpa_offset, + fpdu->incomplete_bytes); +out: + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "MPA_ALIGN: Sent FPDU num_bds=%d first_frag_len=%x, mpa_frag_len=0x%x, incomplete_bytes:0x%x rc=%d\n", + tx_pkt.num_of_bds, + tx_pkt.first_frag_len, + fpdu->mpa_frag_len, + fpdu->incomplete_bytes, rc); + + return rc; +} + +static void +qed_iwarp_mpa_get_data(struct qed_hwfn *p_hwfn, + struct unaligned_opaque_data *curr_pkt, + u32 opaque_data0, u32 opaque_data1) +{ + u64 opaque_data; + + opaque_data = HILO_64(opaque_data1, opaque_data0); + *curr_pkt = *((struct unaligned_opaque_data *)&opaque_data); + + curr_pkt->first_mpa_offset = curr_pkt->tcp_payload_offset + + le16_to_cpu(curr_pkt->first_mpa_offset); + curr_pkt->cid = le32_to_cpu(curr_pkt->cid); +} + +/* This function is called when an unaligned or incomplete MPA packet arrives + * driver needs to align the packet, perhaps using previous data and send + * it down to FW once it is aligned. + */ +static int +qed_iwarp_process_mpa_pkt(struct qed_hwfn *p_hwfn, + struct qed_iwarp_ll2_mpa_buf *mpa_buf) +{ + struct unaligned_opaque_data *curr_pkt = &mpa_buf->data; + struct qed_iwarp_ll2_buff *buf = mpa_buf->ll2_buf; + enum qed_iwarp_mpa_pkt_type pkt_type; + struct qed_iwarp_fpdu *fpdu; + int rc = -EINVAL; + u8 *mpa_data; + + fpdu = qed_iwarp_get_curr_fpdu(p_hwfn, curr_pkt->cid & 0xffff); + if (!fpdu) { /* something corrupt with cid, post rx back */ + DP_ERR(p_hwfn, "Invalid cid, drop and post back to rx cid=%x\n", + curr_pkt->cid); + goto err; + } + + do { + mpa_data = ((u8 *)(buf->data) + curr_pkt->first_mpa_offset); + + pkt_type = qed_iwarp_mpa_classify(p_hwfn, fpdu, + mpa_buf->tcp_payload_len, + mpa_data); + + switch (pkt_type) { + case QED_IWARP_MPA_PKT_PARTIAL: + qed_iwarp_init_fpdu(buf, fpdu, + curr_pkt, + mpa_buf->tcp_payload_len, + mpa_buf->placement_offset); + + if (!QED_IWARP_IS_RIGHT_EDGE(curr_pkt)) { + mpa_buf->tcp_payload_len = 0; + break; + } + + rc = qed_iwarp_win_right_edge(p_hwfn, fpdu); + + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Can't send FPDU:reset rc=%d\n", rc); + memset(fpdu, 0, sizeof(*fpdu)); + break; + } + + mpa_buf->tcp_payload_len = 0; + break; + case QED_IWARP_MPA_PKT_PACKED: + qed_iwarp_init_fpdu(buf, fpdu, + curr_pkt, + mpa_buf->tcp_payload_len, + mpa_buf->placement_offset); + + rc = qed_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf, + mpa_buf->tcp_payload_len, + pkt_type); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Can't send FPDU:reset rc=%d\n", rc); + memset(fpdu, 0, sizeof(*fpdu)); + break; + } + + mpa_buf->tcp_payload_len -= fpdu->fpdu_length; + curr_pkt->first_mpa_offset += fpdu->fpdu_length; + break; + case QED_IWARP_MPA_PKT_UNALIGNED: + qed_iwarp_update_fpdu_length(p_hwfn, fpdu, mpa_data); + if (mpa_buf->tcp_payload_len < fpdu->incomplete_bytes) { + /* special handling of fpdu split over more + * than 2 segments + */ + if (QED_IWARP_IS_RIGHT_EDGE(curr_pkt)) { + rc = qed_iwarp_win_right_edge(p_hwfn, + fpdu); + /* packet will be re-processed later */ + if (rc) + return rc; + } + + rc = qed_iwarp_cp_pkt(p_hwfn, fpdu, curr_pkt, + buf, + mpa_buf->tcp_payload_len); + if (rc) /* packet will be re-processed later */ + return rc; + + mpa_buf->tcp_payload_len = 0; + break; + } + + rc = qed_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf, + mpa_buf->tcp_payload_len, + pkt_type); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Can't send FPDU:delay rc=%d\n", rc); + /* don't reset fpdu -> we need it for next + * classify + */ + break; + } + + mpa_buf->tcp_payload_len -= fpdu->incomplete_bytes; + curr_pkt->first_mpa_offset += fpdu->incomplete_bytes; + /* The framed PDU was sent - no more incomplete bytes */ + fpdu->incomplete_bytes = 0; + break; + } + } while (mpa_buf->tcp_payload_len && !rc); + + return rc; + +err: + qed_iwarp_ll2_post_rx(p_hwfn, + buf, + p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle); + return rc; +} + +static void qed_iwarp_process_pending_pkts(struct qed_hwfn *p_hwfn) +{ + struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; + struct qed_iwarp_ll2_mpa_buf *mpa_buf = NULL; + int rc; + + while (!list_empty(&iwarp_info->mpa_buf_pending_list)) { + mpa_buf = list_first_entry(&iwarp_info->mpa_buf_pending_list, + struct qed_iwarp_ll2_mpa_buf, + list_entry); + + rc = qed_iwarp_process_mpa_pkt(p_hwfn, mpa_buf); + + /* busy means break and continue processing later, don't + * remove the buf from the pending list. + */ + if (rc == -EBUSY) + break; + + list_del(&mpa_buf->list_entry); + list_add_tail(&mpa_buf->list_entry, &iwarp_info->mpa_buf_list); + + if (rc) { /* different error, don't continue */ + DP_NOTICE(p_hwfn, "process pkts failed rc=%d\n", rc); + break; + } + } +} + +static void +qed_iwarp_ll2_comp_mpa_pkt(void *cxt, struct qed_ll2_comp_rx_data *data) +{ + struct qed_iwarp_ll2_mpa_buf *mpa_buf; + struct qed_iwarp_info *iwarp_info; + struct qed_hwfn *p_hwfn = cxt; + + iwarp_info = &p_hwfn->p_rdma_info->iwarp; + mpa_buf = list_first_entry(&iwarp_info->mpa_buf_list, + struct qed_iwarp_ll2_mpa_buf, list_entry); + if (!mpa_buf) { + DP_ERR(p_hwfn, "No free mpa buf\n"); + goto err; + } + + list_del(&mpa_buf->list_entry); + qed_iwarp_mpa_get_data(p_hwfn, &mpa_buf->data, + data->opaque_data_0, data->opaque_data_1); + + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "LL2 MPA CompRx payload_len:0x%x\tfirst_mpa_offset:0x%x\ttcp_payload_offset:0x%x\tflags:0x%x\tcid:0x%x\n", + data->length.packet_length, mpa_buf->data.first_mpa_offset, + mpa_buf->data.tcp_payload_offset, mpa_buf->data.flags, + mpa_buf->data.cid); + + mpa_buf->ll2_buf = data->cookie; + mpa_buf->tcp_payload_len = data->length.packet_length - + mpa_buf->data.first_mpa_offset; + mpa_buf->data.first_mpa_offset += data->u.placement_offset; + mpa_buf->placement_offset = data->u.placement_offset; + + list_add_tail(&mpa_buf->list_entry, &iwarp_info->mpa_buf_pending_list); + + qed_iwarp_process_pending_pkts(p_hwfn); + return; +err: + qed_iwarp_ll2_post_rx(p_hwfn, data->cookie, + iwarp_info->ll2_mpa_handle); +} + static void qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data) { @@ -1725,6 +2306,14 @@ qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data) memset(&cm_info, 0, sizeof(cm_info)); ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle; + + /* Check if packet was received with errors... */ + if (data->err_flags) { + DP_NOTICE(p_hwfn, "Error received on SYN packet: 0x%x\n", + data->err_flags); + goto err; + } + if (GET_FIELD(data->parse_flags, PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED) && GET_FIELD(data->parse_flags, PARSING_AND_ERR_FLAGS_L4CHKSMERROR)) { @@ -1839,10 +2428,25 @@ static void qed_iwarp_ll2_comp_tx_pkt(void *cxt, u8 connection_handle, bool b_last_fragment, bool b_last_packet) { struct qed_iwarp_ll2_buff *buffer = cookie; + struct qed_iwarp_ll2_buff *piggy; struct qed_hwfn *p_hwfn = cxt; + if (!buffer) /* can happen in packed mpa unaligned... */ + return; + /* this was originally an rx packet, post it back */ + piggy = buffer->piggy_buf; + if (piggy) { + buffer->piggy_buf = NULL; + qed_iwarp_ll2_post_rx(p_hwfn, piggy, connection_handle); + } + qed_iwarp_ll2_post_rx(p_hwfn, buffer, connection_handle); + + if (connection_handle == p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle) + qed_iwarp_process_pending_pkts(p_hwfn); + + return; } static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle, @@ -1855,12 +2459,44 @@ static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle, if (!buffer) return; + if (buffer->piggy_buf) { + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + buffer->piggy_buf->buff_size, + buffer->piggy_buf->data, + buffer->piggy_buf->data_phys_addr); + + kfree(buffer->piggy_buf); + } + dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size, buffer->data, buffer->data_phys_addr); kfree(buffer); } +/* The only slowpath for iwarp ll2 is unalign flush. When this completion + * is received, need to reset the FPDU. + */ +void +qed_iwarp_ll2_slowpath(void *cxt, + u8 connection_handle, + u32 opaque_data_0, u32 opaque_data_1) +{ + struct unaligned_opaque_data unalign_data; + struct qed_hwfn *p_hwfn = cxt; + struct qed_iwarp_fpdu *fpdu; + + qed_iwarp_mpa_get_data(p_hwfn, &unalign_data, + opaque_data_0, opaque_data_1); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "(0x%x) Flush fpdu\n", + unalign_data.cid); + + fpdu = qed_iwarp_get_curr_fpdu(p_hwfn, (u16)unalign_data.cid); + if (fpdu) + memset(fpdu, 0, sizeof(*fpdu)); +} + static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; @@ -1876,6 +2512,26 @@ static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL; } + if (iwarp_info->ll2_ooo_handle != QED_IWARP_HANDLE_INVAL) { + rc = qed_ll2_terminate_connection(p_hwfn, + iwarp_info->ll2_ooo_handle); + if (rc) + DP_INFO(p_hwfn, "Failed to terminate ooo connection\n"); + + qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_ooo_handle); + iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL; + } + + if (iwarp_info->ll2_mpa_handle != QED_IWARP_HANDLE_INVAL) { + rc = qed_ll2_terminate_connection(p_hwfn, + iwarp_info->ll2_mpa_handle); + if (rc) + DP_INFO(p_hwfn, "Failed to terminate mpa connection\n"); + + qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_mpa_handle); + iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL; + } + qed_llh_remove_mac_filter(p_hwfn, p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr); return rc; @@ -1927,10 +2583,15 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, struct qed_iwarp_info *iwarp_info; struct qed_ll2_acquire_data data; struct qed_ll2_cbs cbs; + u32 mpa_buff_size; + u16 n_ooo_bufs; int rc = 0; + int i; iwarp_info = &p_hwfn->p_rdma_info->iwarp; iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL; + iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL; + iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL; iwarp_info->max_mtu = params->max_mtu; @@ -1978,6 +2639,91 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, if (rc) goto err; + /* Start OOO connection */ + data.input.conn_type = QED_LL2_TYPE_OOO; + data.input.mtu = params->max_mtu; + + n_ooo_bufs = (QED_IWARP_MAX_OOO * QED_IWARP_RCV_WND_SIZE_DEF) / + iwarp_info->max_mtu; + n_ooo_bufs = min_t(u32, n_ooo_bufs, QED_IWARP_LL2_OOO_MAX_RX_SIZE); + + data.input.rx_num_desc = n_ooo_bufs; + data.input.rx_num_ooo_buffers = n_ooo_bufs; + + data.input.tx_max_bds_per_packet = 1; /* will never be fragmented */ + data.input.tx_num_desc = QED_IWARP_LL2_OOO_DEF_TX_SIZE; + data.p_connection_handle = &iwarp_info->ll2_ooo_handle; + + rc = qed_ll2_acquire_connection(p_hwfn, &data); + if (rc) + goto err; + + rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_ooo_handle); + if (rc) + goto err; + + /* Start Unaligned MPA connection */ + cbs.rx_comp_cb = qed_iwarp_ll2_comp_mpa_pkt; + cbs.slowpath_cb = qed_iwarp_ll2_slowpath; + + memset(&data, 0, sizeof(data)); + data.input.conn_type = QED_LL2_TYPE_IWARP; + data.input.mtu = params->max_mtu; + /* FW requires that once a packet arrives OOO, it must have at + * least 2 rx buffers available on the unaligned connection + * for handling the case that it is a partial fpdu. + */ + data.input.rx_num_desc = n_ooo_bufs * 2; + data.input.tx_num_desc = data.input.rx_num_desc; + data.input.tx_max_bds_per_packet = QED_IWARP_MAX_BDS_PER_FPDU; + data.p_connection_handle = &iwarp_info->ll2_mpa_handle; + data.input.secondary_queue = true; + data.cbs = &cbs; + + rc = qed_ll2_acquire_connection(p_hwfn, &data); + if (rc) + goto err; + + rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_mpa_handle); + if (rc) + goto err; + + mpa_buff_size = QED_IWARP_MAX_BUF_SIZE(params->max_mtu); + rc = qed_iwarp_ll2_alloc_buffers(p_hwfn, + data.input.rx_num_desc, + mpa_buff_size, + iwarp_info->ll2_mpa_handle); + if (rc) + goto err; + + iwarp_info->partial_fpdus = kcalloc((u16)p_hwfn->p_rdma_info->num_qps, + sizeof(*iwarp_info->partial_fpdus), + GFP_KERNEL); + if (!iwarp_info->partial_fpdus) + goto err; + + iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps; + + iwarp_info->mpa_intermediate_buf = kzalloc(mpa_buff_size, GFP_KERNEL); + if (!iwarp_info->mpa_intermediate_buf) + goto err; + + /* The mpa_bufs array serves for pending RX packets received on the + * mpa ll2 that don't have place on the tx ring and require later + * processing. We can't fail on allocation of such a struct therefore + * we allocate enough to take care of all rx packets + */ + iwarp_info->mpa_bufs = kcalloc(data.input.rx_num_desc, + sizeof(*iwarp_info->mpa_bufs), + GFP_KERNEL); + if (!iwarp_info->mpa_bufs) + goto err; + + INIT_LIST_HEAD(&iwarp_info->mpa_buf_pending_list); + INIT_LIST_HEAD(&iwarp_info->mpa_buf_list); + for (i = 0; i < data.input.rx_num_desc; i++) + list_add_tail(&iwarp_info->mpa_bufs[i].list_entry, + &iwarp_info->mpa_buf_list); return rc; err: qed_iwarp_ll2_stop(p_hwfn, p_ptt); @@ -2014,6 +2760,7 @@ int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, qed_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP, qed_iwarp_async_event); + qed_ooo_setup(p_hwfn); return qed_iwarp_ll2_start(p_hwfn, params, p_ptt); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h index 148ef3c33a5d..c1ecd743305f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h @@ -47,18 +47,51 @@ enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state); #define QED_IWARP_LL2_SYN_TX_SIZE (128) #define QED_IWARP_LL2_SYN_RX_SIZE (256) #define QED_IWARP_MAX_SYN_PKT_SIZE (128) -#define QED_IWARP_HANDLE_INVAL (0xff) + +#define QED_IWARP_LL2_OOO_DEF_TX_SIZE (256) +#define QED_IWARP_MAX_OOO (16) +#define QED_IWARP_LL2_OOO_MAX_RX_SIZE (16384) + +#define QED_IWARP_HANDLE_INVAL (0xff) struct qed_iwarp_ll2_buff { + struct qed_iwarp_ll2_buff *piggy_buf; void *data; dma_addr_t data_phys_addr; u32 buff_size; }; +struct qed_iwarp_ll2_mpa_buf { + struct list_head list_entry; + struct qed_iwarp_ll2_buff *ll2_buf; + struct unaligned_opaque_data data; + u16 tcp_payload_len; + u8 placement_offset; +}; + +/* In some cases a fpdu will arrive with only one byte of the header, in this + * case the fpdu_length will be partial (contain only higher byte and + * incomplete bytes will contain the invalid value + */ +#define QED_IWARP_INVALID_INCOMPLETE_BYTES 0xffff + +struct qed_iwarp_fpdu { + struct qed_iwarp_ll2_buff *mpa_buf; + void *mpa_frag_virt; + dma_addr_t mpa_frag; + dma_addr_t pkt_hdr; + u16 mpa_frag_len; + u16 fpdu_length; + u16 incomplete_bytes; + u8 pkt_hdr_size; +}; + struct qed_iwarp_info { struct list_head listen_list; /* qed_iwarp_listener */ struct list_head ep_list; /* qed_iwarp_ep */ struct list_head ep_free_list; /* pre-allocated ep's */ + struct list_head mpa_buf_list; /* list of mpa_bufs */ + struct list_head mpa_buf_pending_list; spinlock_t iw_lock; /* for iwarp resources */ spinlock_t qp_lock; /* for teardown races */ u32 rcv_wnd_scale; @@ -67,9 +100,15 @@ struct qed_iwarp_info { u8 crc_needed; u8 tcp_flags; u8 ll2_syn_handle; + u8 ll2_ooo_handle; + u8 ll2_mpa_handle; u8 peer2peer; enum mpa_negotiation_mode mpa_rev; enum mpa_rtr_type rtr_type; + struct qed_iwarp_fpdu *partial_fpdus; + struct qed_iwarp_ll2_mpa_buf *mpa_bufs; + u8 *mpa_intermediate_buf; + u16 max_num_partial_fpdus; }; enum qed_iwarp_ep_state { @@ -147,6 +186,9 @@ int qed_iwarp_alloc(struct qed_hwfn *p_hwfn); int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_rdma_start_in_params *params); +void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn, + struct iwarp_init_func_params *p_ramrod); + int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index c06ad4f0758e..047f556ca62e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -413,6 +413,7 @@ static void qed_ll2_rxq_parse_reg(struct qed_hwfn *p_hwfn, struct qed_ll2_comp_rx_data *data) { data->parse_flags = le16_to_cpu(p_cqe->rx_cqe_fp.parse_flags.flags); + data->err_flags = le16_to_cpu(p_cqe->rx_cqe_fp.err_flags.flags); data->length.packet_length = le16_to_cpu(p_cqe->rx_cqe_fp.packet_length); data->vlan = le16_to_cpu(p_cqe->rx_cqe_fp.vlan); @@ -422,6 +423,41 @@ static void qed_ll2_rxq_parse_reg(struct qed_hwfn *p_hwfn, } static int +qed_ll2_handle_slowpath(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn, + union core_rx_cqe_union *p_cqe, + unsigned long *p_lock_flags) +{ + struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct core_rx_slow_path_cqe *sp_cqe; + + sp_cqe = &p_cqe->rx_cqe_sp; + if (sp_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH) { + DP_NOTICE(p_hwfn, + "LL2 - unexpected Rx CQE slowpath ramrod_cmd_id:%d\n", + sp_cqe->ramrod_cmd_id); + return -EINVAL; + } + + if (!p_ll2_conn->cbs.slowpath_cb) { + DP_NOTICE(p_hwfn, + "LL2 - received RX_QUEUE_FLUSH but no callback was provided\n"); + return -EINVAL; + } + + spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags); + + p_ll2_conn->cbs.slowpath_cb(p_ll2_conn->cbs.cookie, + p_ll2_conn->my_id, + le32_to_cpu(sp_cqe->opaque_data.data[0]), + le32_to_cpu(sp_cqe->opaque_data.data[1])); + + spin_lock_irqsave(&p_rx->lock, *p_lock_flags); + + return 0; +} + +static int qed_ll2_rxq_handle_completion(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn, union core_rx_cqe_union *p_cqe, @@ -494,8 +530,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) switch (cqe->rx_cqe_sp.type) { case CORE_RX_CQE_TYPE_SLOW_PATH: - DP_NOTICE(p_hwfn, "LL2 - unexpected Rx CQE slowpath\n"); - rc = -EINVAL; + rc = qed_ll2_handle_slowpath(p_hwfn, p_ll2_conn, + cqe, &flags); break; case CORE_RX_CQE_TYPE_GSI_OFFLOAD: case CORE_RX_CQE_TYPE_REGULAR: @@ -893,7 +929,7 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn, p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg; p_ramrod->inner_vlan_removal_en = p_ll2_conn->input.rx_vlan_removal_en; p_ramrod->queue_id = p_ll2_conn->queue_id; - p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_OOO) ? 0 : 1; + p_ramrod->main_func_queue = p_ll2_conn->main_func_queue ? 1 : 0; if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) && p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE) && @@ -1104,6 +1140,7 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_info) { struct qed_ll2_tx_packet *p_descq; + u32 desc_size; u32 capacity; int rc = 0; @@ -1121,13 +1158,17 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn, goto out; capacity = qed_chain_get_capacity(&p_ll2_info->tx_queue.txq_chain); - p_descq = kcalloc(capacity, sizeof(struct qed_ll2_tx_packet), - GFP_KERNEL); + /* First element is part of the packet, rest are flexibly added */ + desc_size = (sizeof(*p_descq) + + (p_ll2_info->input.tx_max_bds_per_packet - 1) * + sizeof(p_descq->bds_set)); + + p_descq = kcalloc(capacity, desc_size, GFP_KERNEL); if (!p_descq) { rc = -ENOMEM; goto out; } - p_ll2_info->tx_queue.descq_array = p_descq; + p_ll2_info->tx_queue.descq_mem = p_descq; DP_VERBOSE(p_hwfn, QED_MSG_LL2, "Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n", @@ -1208,6 +1249,7 @@ qed_ll2_set_cbs(struct qed_ll2_info *p_ll2_info, const struct qed_ll2_cbs *cbs) p_ll2_info->cbs.rx_release_cb = cbs->rx_release_cb; p_ll2_info->cbs.tx_comp_cb = cbs->tx_comp_cb; p_ll2_info->cbs.tx_release_cb = cbs->tx_release_cb; + p_ll2_info->cbs.slowpath_cb = cbs->slowpath_cb; p_ll2_info->cbs.cookie = cbs->cookie; return 0; @@ -1259,6 +1301,11 @@ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data) p_ll2_info->tx_dest = (data->input.tx_dest == QED_LL2_TX_DEST_NW) ? CORE_TX_DEST_NW : CORE_TX_DEST_LB; + if (data->input.conn_type == QED_LL2_TYPE_OOO || + data->input.secondary_queue) + p_ll2_info->main_func_queue = false; + else + p_ll2_info->main_func_queue = true; /* Correct maximum number of Tx BDs */ p_tx_max = &p_ll2_info->input.tx_max_bds_per_packet; @@ -1358,11 +1405,13 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle) { struct qed_hwfn *p_hwfn = cxt; struct qed_ll2_info *p_ll2_conn; + struct qed_ll2_tx_packet *p_pkt; struct qed_ll2_rx_queue *p_rx; struct qed_ll2_tx_queue *p_tx; struct qed_ptt *p_ptt; int rc = -EINVAL; u32 i, capacity; + u32 desc_size; u8 qid; p_ptt = qed_ptt_acquire(p_hwfn); @@ -1396,9 +1445,15 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle) INIT_LIST_HEAD(&p_tx->sending_descq); spin_lock_init(&p_tx->lock); capacity = qed_chain_get_capacity(&p_tx->txq_chain); - for (i = 0; i < capacity; i++) - list_add_tail(&p_tx->descq_array[i].list_entry, - &p_tx->free_descq); + /* First element is part of the packet, rest are flexibly added */ + desc_size = (sizeof(*p_pkt) + + (p_ll2_conn->input.tx_max_bds_per_packet - 1) * + sizeof(p_pkt->bds_set)); + + for (i = 0; i < capacity; i++) { + p_pkt = p_tx->descq_mem + desc_size * i; + list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); + } p_tx->cur_completing_bd_idx = 0; p_tx->bds_idx = 0; p_tx->b_completing_packet = false; @@ -1578,11 +1633,28 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, roce_flavor = (pkt->qed_roce_flavor == QED_LL2_ROCE) ? CORE_ROCE : CORE_RROCE; - tx_dest = (pkt->tx_dest == QED_LL2_TX_DEST_NW) ? CORE_TX_DEST_NW - : CORE_TX_DEST_LB; + switch (pkt->tx_dest) { + case QED_LL2_TX_DEST_NW: + tx_dest = CORE_TX_DEST_NW; + break; + case QED_LL2_TX_DEST_LB: + tx_dest = CORE_TX_DEST_LB; + break; + case QED_LL2_TX_DEST_DROP: + tx_dest = CORE_TX_DEST_DROP; + break; + default: + tx_dest = CORE_TX_DEST_LB; + break; + } start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain); - start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan); + if (QED_IS_IWARP_PERSONALITY(p_hwfn) && + p_ll2->input.conn_type == QED_LL2_TYPE_OOO) + start_bd->nw_vlan_or_lb_echo = + cpu_to_le16(IWARP_LL2_IN_ORDER_TX_QUEUE); + else + start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan); SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W, cpu_to_le16(pkt->l4_hdr_offset_w)); SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest); @@ -1590,6 +1662,9 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, SET_FIELD(bd_data, CORE_TX_BD_DATA_START_BD, 0x1); SET_FIELD(bd_data, CORE_TX_BD_DATA_NBDS, pkt->num_of_bds); SET_FIELD(bd_data, CORE_TX_BD_DATA_ROCE_FLAV, roce_flavor); + SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_CSUM, !!(pkt->enable_ip_cksum)); + SET_FIELD(bd_data, CORE_TX_BD_DATA_L4_CSUM, !!(pkt->enable_l4_cksum)); + SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_LEN, !!(pkt->calc_ip_len)); start_bd->bd_data.as_bitfield = cpu_to_le16(bd_data); DMA_REGPAIR_LE(start_bd->addr, pkt->first_frag); start_bd->nbytes = cpu_to_le16(pkt->first_frag_len); @@ -1697,7 +1772,7 @@ int qed_ll2_prepare_tx_packet(void *cxt, p_tx = &p_ll2_conn->tx_queue; p_tx_chain = &p_tx->txq_chain; - if (pkt->num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET) + if (pkt->num_of_bds > p_ll2_conn->input.tx_max_bds_per_packet) return -EIO; spin_lock_irqsave(&p_tx->lock, flags); @@ -1857,7 +1932,7 @@ void qed_ll2_release_connection(void *cxt, u8 connection_handle) qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index); } - kfree(p_ll2_conn->tx_queue.descq_array); + kfree(p_ll2_conn->tx_queue.descq_mem); qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain); kfree(p_ll2_conn->rx_queue.descq_array); diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h index a822528e9c63..f65817012e97 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -63,17 +63,14 @@ struct qed_ll2_rx_packet { struct qed_ll2_tx_packet { struct list_head list_entry; u16 bd_used; - u16 vlan; - u16 l4_hdr_offset_w; - u8 bd_flags; bool notify_fw; void *cookie; - + /* Flexible Array of bds_set determined by max_bds_per_packet */ struct { struct core_tx_bd *txq_bd; dma_addr_t tx_frag; u16 frag_len; - } bds_set[ETH_TX_MAX_BDS_PER_NON_LSO_PACKET]; + } bds_set[1]; }; struct qed_ll2_rx_queue { @@ -101,7 +98,7 @@ struct qed_ll2_tx_queue { struct list_head active_descq; struct list_head free_descq; struct list_head sending_descq; - struct qed_ll2_tx_packet *descq_array; + void *descq_mem; /* memory for variable sized qed_ll2_tx_packet*/ struct qed_ll2_tx_packet *cur_send_packet; struct qed_ll2_tx_packet cur_completing_packet; u16 cur_completing_bd_idx; @@ -124,6 +121,7 @@ struct qed_ll2_info { bool b_active; enum core_tx_dest tx_dest; u8 tx_stats_en; + bool main_func_queue; struct qed_ll2_rx_queue rx_queue; struct qed_ll2_tx_queue tx_queue; struct qed_ll2_cbs cbs; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 376485d99357..8b99c7d26f34 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1691,12 +1691,12 @@ qed_mcp_get_shmem_proto_mfw(struct qed_hwfn *p_hwfn, case FW_MB_PARAM_GET_PF_RDMA_ROCE: *p_proto = QED_PCI_ETH_ROCE; break; + case FW_MB_PARAM_GET_PF_RDMA_IWARP: + *p_proto = QED_PCI_ETH_IWARP; + break; case FW_MB_PARAM_GET_PF_RDMA_BOTH: - DP_NOTICE(p_hwfn, - "Current day drivers don't support RoCE & iWARP. Default to RoCE-only\n"); - *p_proto = QED_PCI_ETH_ROCE; + *p_proto = QED_PCI_ETH_RDMA; break; - case FW_MB_PARAM_GET_PF_RDMA_IWARP: default: DP_NOTICE(p_hwfn, "MFW answers GET_PF_RDMA_PROTOCOL but param is %08x\n", diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c index 000636530111..6172354b451c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c @@ -103,18 +103,28 @@ int qed_ooo_alloc(struct qed_hwfn *p_hwfn) { u16 max_num_archipelagos = 0, cid_base; struct qed_ooo_info *p_ooo_info; + enum protocol_type proto; u16 max_num_isles = 0; u32 i; - if (p_hwfn->hw_info.personality != QED_PCI_ISCSI) { + switch (p_hwfn->hw_info.personality) { + case QED_PCI_ISCSI: + proto = PROTOCOLID_ISCSI; + break; + case QED_PCI_ETH_RDMA: + case QED_PCI_ETH_IWARP: + proto = PROTOCOLID_IWARP; + break; + default: DP_NOTICE(p_hwfn, "Failed to allocate qed_ooo_info: unknown personality\n"); return -EINVAL; } - max_num_archipelagos = p_hwfn->pf_params.iscsi_pf_params.num_cons; + max_num_archipelagos = (u16)qed_cxt_get_proto_cid_count(p_hwfn, proto, + NULL); max_num_isles = QED_MAX_NUM_ISLES + max_num_archipelagos; - cid_base = (u16)qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ISCSI); + cid_base = (u16)qed_cxt_get_proto_cid_start(p_hwfn, proto); if (!max_num_archipelagos) { DP_NOTICE(p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.h b/drivers/net/ethernet/qlogic/qed/qed_ooo.h index e8ed40b848f5..49c4e75b15b1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ooo.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.h @@ -83,7 +83,7 @@ struct qed_ooo_info { u16 cid_base; }; -#if IS_ENABLED(CONFIG_QED_ISCSI) +#if IS_ENABLED(CONFIG_QED_OOO) void qed_ooo_save_history_entry(struct qed_hwfn *p_hwfn, struct qed_ooo_info *p_ooo_info, struct ooo_opaque *p_cqe); diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 6fb99518a61f..c8c4b3940564 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -156,7 +156,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, return rc; p_hwfn->p_rdma_info = p_rdma_info; - p_rdma_info->proto = PROTOCOLID_ROCE; + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + p_rdma_info->proto = PROTOCOLID_IWARP; + else + p_rdma_info->proto = PROTOCOLID_ROCE; num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, NULL); @@ -206,11 +209,11 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, goto free_pd_map; } - /* Allocate bitmap for cq's. The maximum number of CQs is bounded to - * twice the number of QPs. + /* Allocate bitmap for cq's. The maximum number of CQs is bound to + * the number of connections we support. (num_qps in iWARP or + * num_qps/2 in RoCE). */ - rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, - p_rdma_info->num_qps * 2, "CQ"); + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, num_cons, "CQ"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate cq bitmap, rc = %d\n", rc); @@ -219,10 +222,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, /* Allocate bitmap for toggle bit for cq icids * We toggle the bit every time we create or resize cq for a given icid. - * The maximum number of CQs is bounded to twice the number of QPs. + * Size needs to equal the size of the cq bmap. */ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits, - p_rdma_info->num_qps * 2, "Toggle"); + num_cons, "Toggle"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate toogle bits, rc = %d\n", rc); @@ -548,10 +551,13 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn, if (rc) return rc; - if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) { + qed_iwarp_init_fw_ramrod(p_hwfn, + &p_ent->ramrod.iwarp_init_func.iwarp); p_ramrod = &p_ent->ramrod.iwarp_init_func.rdma; - else + } else { p_ramrod = &p_ent->ramrod.roce_init_func.rdma; + } p_params_header = &p_ramrod->params_header; p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 46d0c3cb83a5..a1d33f35aad3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -377,6 +377,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, p_ramrod->personality = PERSONALITY_ISCSI; break; case QED_PCI_ETH_ROCE: + case QED_PCI_ETH_IWARP: p_ramrod->personality = PERSONALITY_RDMA_AND_ETH; break; default: diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index adb700512baa..a3a70ade411f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -503,7 +503,7 @@ void qede_fill_rss_params(struct qede_dev *edev, void qede_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti); void qede_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti); -int qede_xdp(struct net_device *dev, struct netdev_xdp *xdp); +int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp); #ifdef CONFIG_DCB void qede_set_dcbnl_ops(struct net_device *ndev); diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index f79e36e4060a..c1a0708a7d7c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1065,7 +1065,7 @@ static int qede_xdp_set(struct qede_dev *edev, struct bpf_prog *prog) return 0; } -int qede_xdp(struct net_device *dev, struct netdev_xdp *xdp) +int qede_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct qede_dev *edev = netdev_priv(dev); diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 6fc854b120b0..48ec4c56cddf 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1004,6 +1004,7 @@ static bool qede_rx_xdp(struct qede_dev *edev, xdp.data_hard_start = page_address(bd->data); xdp.data = xdp.data_hard_start + *data_offset; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *len; /* Queues always have a full reset currently, so for the time diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index e5ee9f274a71..8f9b3eb82137 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -556,7 +556,7 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_udp_tunnel_add = qede_udp_tunnel_add, .ndo_udp_tunnel_del = qede_udp_tunnel_del, .ndo_features_check = qede_features_check, - .ndo_xdp = qede_xdp, + .ndo_bpf = qede_xdp, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = qede_rx_flow_steer, #endif @@ -594,7 +594,7 @@ static const struct net_device_ops qede_netdev_vf_xdp_ops = { .ndo_udp_tunnel_add = qede_udp_tunnel_add, .ndo_udp_tunnel_del = qede_udp_tunnel_del, .ndo_features_check = qede_features_check, - .ndo_xdp = qede_xdp, + .ndo_bpf = qede_xdp, }; /* ------------------------------------------------------------------------- diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 2991179c2fd0..05479d435469 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3891,10 +3891,8 @@ static int ql3xxx_probe(struct pci_dev *pdev, INIT_DELAYED_WORK(&qdev->tx_timeout_work, ql_tx_timeout_work); INIT_DELAYED_WORK(&qdev->link_state_work, ql_link_state_machine_work); - init_timer(&qdev->adapter_timer); - qdev->adapter_timer.function = ql3xxx_timer; + setup_timer(&qdev->adapter_timer, ql3xxx_timer, (unsigned long)qdev); qdev->adapter_timer.expires = jiffies + HZ * 2; /* two second delay */ - qdev->adapter_timer.data = (unsigned long)qdev; if (!cards_found) { pr_alert("%s\n", DRV_STRING); diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 29fea74bff2e..7b97a9969046 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -1092,8 +1092,7 @@ static int ql_get_next_chunk(struct ql_adapter *qdev, struct rx_ring *rx_ring, { if (!rx_ring->pg_chunk.page) { u64 map; - rx_ring->pg_chunk.page = alloc_pages(__GFP_COLD | __GFP_COMP | - GFP_ATOMIC, + rx_ring->pg_chunk.page = alloc_pages(__GFP_COMP | GFP_ATOMIC, qdev->lbq_buf_order); if (unlikely(!rx_ring->pg_chunk.page)) { netif_err(qdev, drv, qdev->ndev, diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c b/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c index 384c8bc874f3..4be65d6761b3 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_mpi.c @@ -213,7 +213,6 @@ static int ql_idc_req_aen(struct ql_adapter *qdev) /* Get the status data and start up a thread to * handle the request. */ - mbcp = &qdev->idc_mbc; mbcp->out_count = 4; status = ql_get_mb_sts(qdev, mbcp); if (status) { diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index 3ed9033e56db..9cbb27263742 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -309,22 +309,12 @@ void emac_mac_mode_config(struct emac_adapter *adpt) /* Config descriptor rings */ static void emac_mac_dma_rings_config(struct emac_adapter *adpt) { - static const unsigned short tpd_q_offset[] = { - EMAC_DESC_CTRL_8, EMAC_H1TPD_BASE_ADDR_LO, - EMAC_H2TPD_BASE_ADDR_LO, EMAC_H3TPD_BASE_ADDR_LO}; - static const unsigned short rfd_q_offset[] = { - EMAC_DESC_CTRL_2, EMAC_DESC_CTRL_10, - EMAC_DESC_CTRL_12, EMAC_DESC_CTRL_13}; - static const unsigned short rrd_q_offset[] = { - EMAC_DESC_CTRL_5, EMAC_DESC_CTRL_14, - EMAC_DESC_CTRL_15, EMAC_DESC_CTRL_16}; - /* TPD (Transmit Packet Descriptor) */ writel(upper_32_bits(adpt->tx_q.tpd.dma_addr), adpt->base + EMAC_DESC_CTRL_1); writel(lower_32_bits(adpt->tx_q.tpd.dma_addr), - adpt->base + tpd_q_offset[0]); + adpt->base + EMAC_DESC_CTRL_8); writel(adpt->tx_q.tpd.count & TPD_RING_SIZE_BMSK, adpt->base + EMAC_DESC_CTRL_9); @@ -334,9 +324,9 @@ static void emac_mac_dma_rings_config(struct emac_adapter *adpt) adpt->base + EMAC_DESC_CTRL_0); writel(lower_32_bits(adpt->rx_q.rfd.dma_addr), - adpt->base + rfd_q_offset[0]); + adpt->base + EMAC_DESC_CTRL_2); writel(lower_32_bits(adpt->rx_q.rrd.dma_addr), - adpt->base + rrd_q_offset[0]); + adpt->base + EMAC_DESC_CTRL_5); writel(adpt->rx_q.rfd.count & RFD_RING_SIZE_BMSK, adpt->base + EMAC_DESC_CTRL_3); diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c index 29ba37a08372..e8ab512ee7e3 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -68,10 +68,10 @@ static void emac_sgmii_link_init(struct emac_adapter *adpt) writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2); } -static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits) +static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u8 irq_bits) { struct emac_sgmii *phy = &adpt->phy; - u32 status; + u8 status; writel_relaxed(irq_bits, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR); writel_relaxed(IRQ_GLOBAL_CLEAR, phy->base + EMAC_SGMII_PHY_IRQ_CMD); @@ -86,9 +86,8 @@ static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits) EMAC_SGMII_PHY_INTERRUPT_STATUS, status, !(status & irq_bits), 1, SGMII_PHY_IRQ_CLR_WAIT_TIME)) { - netdev_err(adpt->netdev, - "error: failed clear SGMII irq: status:0x%x bits:0x%x\n", - status, irq_bits); + net_err_ratelimited("%s: failed to clear SGMII irq: status:0x%x bits:0x%x\n", + adpt->netdev->name, status, irq_bits); return -EIO; } @@ -109,7 +108,7 @@ static irqreturn_t emac_sgmii_interrupt(int irq, void *data) { struct emac_adapter *adpt = data; struct emac_sgmii *phy = &adpt->phy; - u32 status; + u8 status; status = readl(phy->base + EMAC_SGMII_PHY_INTERRUPT_STATUS); status &= SGMII_ISR_MASK; @@ -139,10 +138,8 @@ static irqreturn_t emac_sgmii_interrupt(int irq, void *data) atomic_set(&phy->decode_error_count, 0); } - if (emac_sgmii_irq_clear(adpt, status)) { - netdev_warn(adpt->netdev, "failed to clear SGMII interrupt\n"); + if (emac_sgmii_irq_clear(adpt, status)) schedule_work(&adpt->work_thread); - } return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 759543512117..70c92b649b29 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -130,7 +130,7 @@ static int emac_start_xmit(struct sk_buff *skb, struct net_device *netdev) return emac_mac_tx_buf_send(adpt, &adpt->tx_q, skb); } -irqreturn_t emac_isr(int _irq, void *data) +static irqreturn_t emac_isr(int _irq, void *data) { struct emac_irq *irq = data; struct emac_adapter *adpt = @@ -148,9 +148,8 @@ irqreturn_t emac_isr(int _irq, void *data) goto exit; if (status & ISR_ERROR) { - netif_warn(adpt, intr, adpt->netdev, - "warning: error irq status 0x%lx\n", - status & ISR_ERROR); + net_err_ratelimited("%s: error interrupt 0x%lx\n", + adpt->netdev->name, status & ISR_ERROR); /* reset MAC */ schedule_work(&adpt->work_thread); } @@ -169,7 +168,8 @@ irqreturn_t emac_isr(int _irq, void *data) emac_mac_tx_process(adpt, &adpt->tx_q); if (status & ISR_OVER) - net_warn_ratelimited("warning: TX/RX overflow\n"); + net_warn_ratelimited("%s: TX/RX overflow interrupt\n", + adpt->netdev->name); exit: /* enable the interrupt */ @@ -615,20 +615,11 @@ static int emac_probe(struct platform_device *pdev) u32 reg; int ret; - /* The EMAC itself is capable of 64-bit DMA, so try that first. */ - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + /* The TPD buffer address is limited to 45 bits. */ + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(45)); if (ret) { - /* Some platforms may restrict the EMAC's address bus to less - * then the size of DDR. In this case, we need to try a - * smaller mask. We could try every possible smaller mask, - * but that's overkill. Instead, just fall to 32-bit, which - * should always work. - */ - ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (ret) { - dev_err(&pdev->dev, "could not set DMA mask\n"); - return ret; - } + dev_err(&pdev->dev, "could not set DMA mask\n"); + return ret; } netdev = alloc_etherdev(sizeof(struct emac_adapter)); diff --git a/drivers/net/ethernet/qualcomm/rmnet/Kconfig b/drivers/net/ethernet/qualcomm/rmnet/Kconfig index 6e2587af47a4..9bb06d284644 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/Kconfig +++ b/drivers/net/ethernet/qualcomm/rmnet/Kconfig @@ -5,6 +5,7 @@ menuconfig RMNET tristate "RmNet MAP driver" default n + select GRO_CELLS ---help--- If you select this, you will enable the RMNET module which is used for handling data in the multiplexing and aggregation protocol (MAP) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 1e33aea59f50..71bee1af71ef 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -61,23 +61,6 @@ rmnet_get_port_rtnl(const struct net_device *real_dev) return rtnl_dereference(real_dev->rx_handler_data); } -static struct rmnet_endpoint* -rmnet_get_endpoint(struct net_device *dev, int config_id) -{ - struct rmnet_endpoint *ep; - struct rmnet_port *port; - - if (!rmnet_is_real_dev_registered(dev)) { - ep = rmnet_vnd_get_endpoint(dev); - } else { - port = rmnet_get_port_rtnl(dev); - - ep = &port->muxed_ep[config_id]; - } - - return ep; -} - static int rmnet_unregister_real_device(struct net_device *real_dev, struct rmnet_port *port) { @@ -98,7 +81,7 @@ static int rmnet_unregister_real_device(struct net_device *real_dev, static int rmnet_register_real_device(struct net_device *real_dev) { struct rmnet_port *port; - int rc; + int rc, entry; ASSERT_RTNL(); @@ -119,27 +102,41 @@ static int rmnet_register_real_device(struct net_device *real_dev) /* hold on to real dev for MAP data */ dev_hold(real_dev); + for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++) + INIT_HLIST_HEAD(&port->muxed_ep[entry]); + netdev_dbg(real_dev, "registered with rmnet\n"); return 0; } -static void rmnet_set_endpoint_config(struct net_device *dev, - u8 mux_id, u8 rmnet_mode, - struct net_device *egress_dev) +static void rmnet_unregister_bridge(struct net_device *dev, + struct rmnet_port *port) { - struct rmnet_endpoint *ep; + struct net_device *rmnet_dev, *bridge_dev; + struct rmnet_port *bridge_port; + + if (port->rmnet_mode != RMNET_EPMODE_BRIDGE) + return; - netdev_dbg(dev, "id %d mode %d dev %s\n", - mux_id, rmnet_mode, egress_dev->name); + /* bridge slave handling */ + if (!port->nr_rmnet_devs) { + rmnet_dev = netdev_master_upper_dev_get_rcu(dev); + netdev_upper_dev_unlink(dev, rmnet_dev); - ep = rmnet_get_endpoint(dev, mux_id); - /* This config is cleared on every set, so its ok to not - * clear it on a device delete. - */ - memset(ep, 0, sizeof(struct rmnet_endpoint)); - ep->rmnet_mode = rmnet_mode; - ep->egress_dev = egress_dev; - ep->mux_id = mux_id; + bridge_dev = port->bridge_ep; + + bridge_port = rmnet_get_port_rtnl(bridge_dev); + bridge_port->bridge_ep = NULL; + bridge_port->rmnet_mode = RMNET_EPMODE_VND; + } else { + bridge_dev = port->bridge_ep; + + bridge_port = rmnet_get_port_rtnl(bridge_dev); + rmnet_dev = netdev_master_upper_dev_get_rcu(bridge_dev); + netdev_upper_dev_unlink(bridge_dev, rmnet_dev); + + rmnet_unregister_real_device(bridge_dev, bridge_port); + } } static int rmnet_newlink(struct net *src_net, struct net_device *dev, @@ -153,6 +150,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, RMNET_EGRESS_FORMAT_MAP; struct net_device *real_dev; int mode = RMNET_EPMODE_VND; + struct rmnet_endpoint *ep; struct rmnet_port *port; int err = 0; u16 mux_id; @@ -164,6 +162,10 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, if (!data[IFLA_VLAN_ID]) return -EINVAL; + ep = kzalloc(sizeof(*ep), GFP_ATOMIC); + if (!ep) + return -ENOMEM; + mux_id = nla_get_u16(data[IFLA_VLAN_ID]); err = rmnet_register_real_device(real_dev); @@ -171,11 +173,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, goto err0; port = rmnet_get_port_rtnl(real_dev); - err = rmnet_vnd_newlink(mux_id, dev, port, real_dev); + err = rmnet_vnd_newlink(mux_id, dev, port, real_dev, ep); if (err) goto err1; - err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL); + err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL, extack); if (err) goto err2; @@ -183,13 +185,13 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, ingress_format, egress_format); port->egress_data_format = egress_format; port->ingress_data_format = ingress_format; + port->rmnet_mode = mode; - rmnet_set_endpoint_config(real_dev, mux_id, mode, dev); - rmnet_set_endpoint_config(dev, mux_id, mode, real_dev); + hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]); return 0; err2: - rmnet_vnd_dellink(mux_id, port); + rmnet_vnd_dellink(mux_id, port, ep); err1: rmnet_unregister_real_device(real_dev, port); err0: @@ -199,6 +201,7 @@ err0: static void rmnet_dellink(struct net_device *dev, struct list_head *head) { struct net_device *real_dev; + struct rmnet_endpoint *ep; struct rmnet_port *port; u8 mux_id; @@ -212,8 +215,15 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head) port = rmnet_get_port_rtnl(real_dev); mux_id = rmnet_vnd_get_mux(dev); - rmnet_vnd_dellink(mux_id, port); netdev_upper_dev_unlink(dev, real_dev); + + ep = rmnet_get_endpoint(port, mux_id); + if (ep) { + hlist_del_init_rcu(&ep->hlnode); + rmnet_unregister_bridge(dev, port); + rmnet_vnd_dellink(mux_id, port, ep); + kfree(ep); + } rmnet_unregister_real_device(real_dev, port); unregister_netdevice_queue(dev, head); @@ -222,11 +232,16 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head) static int rmnet_dev_walk_unreg(struct net_device *rmnet_dev, void *data) { struct rmnet_walk_data *d = data; + struct rmnet_endpoint *ep; u8 mux_id; mux_id = rmnet_vnd_get_mux(rmnet_dev); - - rmnet_vnd_dellink(mux_id, d->port); + ep = rmnet_get_endpoint(d->port, mux_id); + if (ep) { + hlist_del_init_rcu(&ep->hlnode); + rmnet_vnd_dellink(mux_id, d->port, ep); + kfree(ep); + } netdev_upper_dev_unlink(rmnet_dev, d->real_dev); unregister_netdevice_queue(rmnet_dev, d->head); @@ -252,6 +267,8 @@ static void rmnet_force_unassociate_device(struct net_device *dev) d.port = port; rcu_read_lock(); + rmnet_unregister_bridge(dev, port); + netdev_walk_all_lower_dev_rcu(real_dev, rmnet_dev_walk_unreg, &d); rcu_read_unlock(); unregister_netdevice_many(&list); @@ -324,6 +341,77 @@ struct rmnet_port *rmnet_get_port(struct net_device *real_dev) return NULL; } +struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id) +{ + struct rmnet_endpoint *ep; + + hlist_for_each_entry_rcu(ep, &port->muxed_ep[mux_id], hlnode) { + if (ep->mux_id == mux_id) + return ep; + } + + return NULL; +} + +int rmnet_add_bridge(struct net_device *rmnet_dev, + struct net_device *slave_dev, + struct netlink_ext_ack *extack) +{ + struct rmnet_priv *priv = netdev_priv(rmnet_dev); + struct net_device *real_dev = priv->real_dev; + struct rmnet_port *port, *slave_port; + int err; + + port = rmnet_get_port(real_dev); + + /* If there is more than one rmnet dev attached, its probably being + * used for muxing. Skip the briding in that case + */ + if (port->nr_rmnet_devs > 1) + return -EINVAL; + + if (rmnet_is_real_dev_registered(slave_dev)) + return -EBUSY; + + err = rmnet_register_real_device(slave_dev); + if (err) + return -EBUSY; + + err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL, + extack); + if (err) + return -EINVAL; + + slave_port = rmnet_get_port(slave_dev); + slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE; + slave_port->bridge_ep = real_dev; + + port->rmnet_mode = RMNET_EPMODE_BRIDGE; + port->bridge_ep = slave_dev; + + netdev_dbg(slave_dev, "registered with rmnet as slave\n"); + return 0; +} + +int rmnet_del_bridge(struct net_device *rmnet_dev, + struct net_device *slave_dev) +{ + struct rmnet_priv *priv = netdev_priv(rmnet_dev); + struct net_device *real_dev = priv->real_dev; + struct rmnet_port *port, *slave_port; + + port = rmnet_get_port(real_dev); + port->rmnet_mode = RMNET_EPMODE_VND; + port->bridge_ep = NULL; + + netdev_upper_dev_unlink(slave_dev, rmnet_dev); + slave_port = rmnet_get_port(slave_dev); + rmnet_unregister_real_device(slave_dev, slave_port); + + netdev_dbg(slave_dev, "removed from rmnet as slave\n"); + return 0; +} + /* Startup/Shutdown */ static int __init rmnet_init(void) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index dde4e9f14f4a..c19259eea99e 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -14,19 +14,17 @@ */ #include <linux/skbuff.h> +#include <net/gro_cells.h> #ifndef _RMNET_CONFIG_H_ #define _RMNET_CONFIG_H_ #define RMNET_MAX_LOGICAL_EP 255 -/* Information about the next device to deliver the packet to. - * Exact usage of this parameter depends on the rmnet_mode. - */ struct rmnet_endpoint { - u8 rmnet_mode; u8 mux_id; struct net_device *egress_dev; + struct hlist_node hlnode; }; /* One instance of this structure is instantiated for each real_dev associated @@ -34,22 +32,41 @@ struct rmnet_endpoint { */ struct rmnet_port { struct net_device *dev; - struct rmnet_endpoint local_ep; - struct rmnet_endpoint muxed_ep[RMNET_MAX_LOGICAL_EP]; u32 ingress_data_format; u32 egress_data_format; - struct net_device *rmnet_devices[RMNET_MAX_LOGICAL_EP]; u8 nr_rmnet_devs; + u8 rmnet_mode; + struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP]; + struct net_device *bridge_ep; }; extern struct rtnl_link_ops rmnet_link_ops; +struct rmnet_vnd_stats { + u64 rx_pkts; + u64 rx_bytes; + u64 tx_pkts; + u64 tx_bytes; + u32 tx_drops; +}; + +struct rmnet_pcpu_stats { + struct rmnet_vnd_stats stats; + struct u64_stats_sync syncp; +}; + struct rmnet_priv { - struct rmnet_endpoint local_ep; u8 mux_id; struct net_device *real_dev; + struct rmnet_pcpu_stats __percpu *pcpu_stats; + struct gro_cells gro_cells; }; struct rmnet_port *rmnet_get_port(struct net_device *real_dev); - +struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id); +int rmnet_add_bridge(struct net_device *rmnet_dev, + struct net_device *slave_dev, + struct netlink_ext_ack *extack); +int rmnet_del_bridge(struct net_device *rmnet_dev, + struct net_device *slave_dev); #endif /* _RMNET_CONFIG_H_ */ diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 540c7622dcb1..29842ccc91a9 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -43,60 +43,23 @@ static void rmnet_set_skb_proto(struct sk_buff *skb) /* Generic handler */ -static rx_handler_result_t -rmnet_bridge_handler(struct sk_buff *skb, struct rmnet_endpoint *ep) +static void +rmnet_deliver_skb(struct sk_buff *skb) { - if (!ep->egress_dev) - kfree_skb(skb); - else - rmnet_egress_handler(skb, ep); - - return RX_HANDLER_CONSUMED; -} - -static rx_handler_result_t -rmnet_deliver_skb(struct sk_buff *skb, struct rmnet_endpoint *ep) -{ - switch (ep->rmnet_mode) { - case RMNET_EPMODE_NONE: - return RX_HANDLER_PASS; - - case RMNET_EPMODE_BRIDGE: - return rmnet_bridge_handler(skb, ep); - - case RMNET_EPMODE_VND: - skb_reset_transport_header(skb); - skb_reset_network_header(skb); - rmnet_vnd_rx_fixup(skb, skb->dev); - - skb->pkt_type = PACKET_HOST; - skb_set_mac_header(skb, 0); - netif_receive_skb(skb); - return RX_HANDLER_CONSUMED; - - default: - kfree_skb(skb); - return RX_HANDLER_CONSUMED; - } -} - -static rx_handler_result_t -rmnet_ingress_deliver_packet(struct sk_buff *skb, - struct rmnet_port *port) -{ - if (!port) { - kfree_skb(skb); - return RX_HANDLER_CONSUMED; - } + struct rmnet_priv *priv = netdev_priv(skb->dev); - skb->dev = port->local_ep.egress_dev; + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + rmnet_vnd_rx_fixup(skb, skb->dev); - return rmnet_deliver_skb(skb, &port->local_ep); + skb->pkt_type = PACKET_HOST; + skb_set_mac_header(skb, 0); + gro_cells_receive(&priv->gro_cells, skb); } /* MAP handler */ -static rx_handler_result_t +static void __rmnet_map_ingress_handler(struct sk_buff *skb, struct rmnet_port *port) { @@ -109,53 +72,50 @@ __rmnet_map_ingress_handler(struct sk_buff *skb, & RMNET_INGRESS_FORMAT_MAP_COMMANDS) return rmnet_map_command(skb, port); - kfree_skb(skb); - return RX_HANDLER_CONSUMED; + goto free_skb; } mux_id = RMNET_MAP_GET_MUX_ID(skb); len = RMNET_MAP_GET_LENGTH(skb) - RMNET_MAP_GET_PAD(skb); - if (mux_id >= RMNET_MAX_LOGICAL_EP) { - kfree_skb(skb); - return RX_HANDLER_CONSUMED; - } + if (mux_id >= RMNET_MAX_LOGICAL_EP) + goto free_skb; - ep = &port->muxed_ep[mux_id]; + ep = rmnet_get_endpoint(port, mux_id); + if (!ep) + goto free_skb; - if (port->ingress_data_format & RMNET_INGRESS_FORMAT_DEMUXING) - skb->dev = ep->egress_dev; + skb->dev = ep->egress_dev; /* Subtract MAP header */ skb_pull(skb, sizeof(struct rmnet_map_header)); skb_trim(skb, len); rmnet_set_skb_proto(skb); - return rmnet_deliver_skb(skb, ep); + rmnet_deliver_skb(skb); + return; + +free_skb: + kfree_skb(skb); } -static rx_handler_result_t +static void rmnet_map_ingress_handler(struct sk_buff *skb, struct rmnet_port *port) { struct sk_buff *skbn; - int rc; if (port->ingress_data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) { while ((skbn = rmnet_map_deaggregate(skb)) != NULL) __rmnet_map_ingress_handler(skbn, port); consume_skb(skb); - rc = RX_HANDLER_CONSUMED; } else { - rc = __rmnet_map_ingress_handler(skb, port); + __rmnet_map_ingress_handler(skb, port); } - - return rc; } static int rmnet_map_egress_handler(struct sk_buff *skb, - struct rmnet_port *port, - struct rmnet_endpoint *ep, + struct rmnet_port *port, u8 mux_id, struct net_device *orig_dev) { int required_headroom, additional_header_len; @@ -174,10 +134,10 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, return RMNET_MAP_CONSUMED; if (port->egress_data_format & RMNET_EGRESS_FORMAT_MUXING) { - if (ep->mux_id == 0xff) + if (mux_id == 0xff) map_header->mux_id = 0; else - map_header->mux_id = ep->mux_id; + map_header->mux_id = mux_id; } skb->protocol = htons(ETH_P_MAP); @@ -185,6 +145,15 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, return RMNET_MAP_SUCCESS; } +static void +rmnet_bridge_handler(struct sk_buff *skb, struct net_device *bridge_dev) +{ + if (bridge_dev) { + skb->dev = bridge_dev; + dev_queue_xmit(skb); + } +} + /* Ingress / Egress Entry Points */ /* Processes packet as per ingress data format for receiving device. Logical @@ -193,56 +162,45 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, */ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) { - struct rmnet_port *port; struct sk_buff *skb = *pskb; + struct rmnet_port *port; struct net_device *dev; - int rc; if (!skb) - return RX_HANDLER_CONSUMED; + goto done; dev = skb->dev; port = rmnet_get_port(dev); - if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) { - rc = rmnet_map_ingress_handler(skb, port); - } else { - switch (ntohs(skb->protocol)) { - case ETH_P_MAP: - if (port->local_ep.rmnet_mode == - RMNET_EPMODE_BRIDGE) { - rc = rmnet_ingress_deliver_packet(skb, port); - } else { - kfree_skb(skb); - rc = RX_HANDLER_CONSUMED; - } - break; - - case ETH_P_IP: - case ETH_P_IPV6: - rc = rmnet_ingress_deliver_packet(skb, port); - break; - - default: - rc = RX_HANDLER_PASS; - } + switch (port->rmnet_mode) { + case RMNET_EPMODE_VND: + if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) + rmnet_map_ingress_handler(skb, port); + break; + case RMNET_EPMODE_BRIDGE: + rmnet_bridge_handler(skb, port->bridge_ep); + break; } - return rc; +done: + return RX_HANDLER_CONSUMED; } /* Modifies packet as per logical endpoint configuration and egress data format * for egress device configured in logical endpoint. Packet is then transmitted * on the egress device. */ -void rmnet_egress_handler(struct sk_buff *skb, - struct rmnet_endpoint *ep) +void rmnet_egress_handler(struct sk_buff *skb) { struct net_device *orig_dev; struct rmnet_port *port; + struct rmnet_priv *priv; + u8 mux_id; orig_dev = skb->dev; - skb->dev = ep->egress_dev; + priv = netdev_priv(orig_dev); + skb->dev = priv->real_dev; + mux_id = priv->mux_id; port = rmnet_get_port(skb->dev); if (!port) { @@ -251,7 +209,7 @@ void rmnet_egress_handler(struct sk_buff *skb, } if (port->egress_data_format & RMNET_EGRESS_FORMAT_MAP) { - switch (rmnet_map_egress_handler(skb, port, ep, orig_dev)) { + switch (rmnet_map_egress_handler(skb, port, mux_id, orig_dev)) { case RMNET_MAP_CONSUMED: return; @@ -264,8 +222,7 @@ void rmnet_egress_handler(struct sk_buff *skb, } } - if (ep->rmnet_mode == RMNET_EPMODE_VND) - rmnet_vnd_tx_fixup(skb, orig_dev); + rmnet_vnd_tx_fixup(skb, orig_dev); dev_queue_xmit(skb); } diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h index f2638cf5693c..3537e4ceedb3 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h @@ -18,8 +18,7 @@ #include "rmnet_config.h" -void rmnet_egress_handler(struct sk_buff *skb, - struct rmnet_endpoint *ep); +void rmnet_egress_handler(struct sk_buff *skb); rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h index ce2302c25b12..3af3fe7b5457 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h @@ -80,7 +80,6 @@ u8 rmnet_map_demultiplex(struct sk_buff *skb); struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb); struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb, int hdrlen, int pad); -rx_handler_result_t rmnet_map_command(struct sk_buff *skb, - struct rmnet_port *port); +void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port); #endif /* _RMNET_MAP_H_ */ diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c index d1ea5e21b982..51e604923ac1 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c @@ -17,7 +17,7 @@ #include "rmnet_vnd.h" static u8 rmnet_map_do_flow_control(struct sk_buff *skb, - struct rmnet_port *rdinfo, + struct rmnet_port *port, int enable) { struct rmnet_map_control_command *cmd; @@ -37,7 +37,7 @@ static u8 rmnet_map_do_flow_control(struct sk_buff *skb, return RX_HANDLER_CONSUMED; } - ep = &rdinfo->muxed_ep[mux_id]; + ep = rmnet_get_endpoint(port, mux_id); vnd = ep->egress_dev; ip_family = cmd->flow_control.ip_family; @@ -76,8 +76,7 @@ static void rmnet_map_send_ack(struct sk_buff *skb, /* Process MAP command frame and send N/ACK message as appropriate. Message cmd * name is decoded here and appropriate handler is called. */ -rx_handler_result_t rmnet_map_command(struct sk_buff *skb, - struct rmnet_port *port) +void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port) { struct rmnet_map_control_command *cmd; unsigned char command_name; @@ -102,5 +101,4 @@ rx_handler_result_t rmnet_map_command(struct sk_buff *skb, } if (rc == RMNET_MAP_COMMAND_ACK) rmnet_map_send_ack(skb, rc); - return RX_HANDLER_CONSUMED; } diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h index 7967198fdd90..49102f922b31 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h @@ -19,23 +19,15 @@ #define RMNET_TX_QUEUE_LEN 1000 /* Constants */ -#define RMNET_EGRESS_FORMAT__RESERVED__ BIT(0) #define RMNET_EGRESS_FORMAT_MAP BIT(1) #define RMNET_EGRESS_FORMAT_AGGREGATION BIT(2) #define RMNET_EGRESS_FORMAT_MUXING BIT(3) -#define RMNET_EGRESS_FORMAT_MAP_CKSUMV3 BIT(4) -#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4 BIT(5) -#define RMNET_INGRESS_FIX_ETHERNET BIT(0) #define RMNET_INGRESS_FORMAT_MAP BIT(1) #define RMNET_INGRESS_FORMAT_DEAGGREGATION BIT(2) #define RMNET_INGRESS_FORMAT_DEMUXING BIT(3) #define RMNET_INGRESS_FORMAT_MAP_COMMANDS BIT(4) -#define RMNET_INGRESS_FORMAT_MAP_CKSUMV3 BIT(5) -#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4 BIT(6) -/* Pass the frame up the stack with no modifications to skb->dev */ -#define RMNET_EPMODE_NONE (0) /* Replace skb->dev to a virtual rmnet device and pass up the stack */ #define RMNET_EPMODE_VND (1) /* Pass the frame directly to another device with dev_queue_xmit() */ diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 7f90d5587653..9caa5e387450 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -27,14 +27,28 @@ void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev) { - dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + struct rmnet_priv *priv = netdev_priv(dev); + struct rmnet_pcpu_stats *pcpu_ptr; + + pcpu_ptr = this_cpu_ptr(priv->pcpu_stats); + + u64_stats_update_begin(&pcpu_ptr->syncp); + pcpu_ptr->stats.rx_pkts++; + pcpu_ptr->stats.rx_bytes += skb->len; + u64_stats_update_end(&pcpu_ptr->syncp); } void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev) { - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + struct rmnet_priv *priv = netdev_priv(dev); + struct rmnet_pcpu_stats *pcpu_ptr; + + pcpu_ptr = this_cpu_ptr(priv->pcpu_stats); + + u64_stats_update_begin(&pcpu_ptr->syncp); + pcpu_ptr->stats.tx_pkts++; + pcpu_ptr->stats.tx_bytes += skb->len; + u64_stats_update_end(&pcpu_ptr->syncp); } /* Network Device Operations */ @@ -45,10 +59,10 @@ static netdev_tx_t rmnet_vnd_start_xmit(struct sk_buff *skb, struct rmnet_priv *priv; priv = netdev_priv(dev); - if (priv->local_ep.egress_dev) { - rmnet_egress_handler(skb, &priv->local_ep); + if (priv->real_dev) { + rmnet_egress_handler(skb); } else { - dev->stats.tx_dropped++; + this_cpu_inc(priv->pcpu_stats->stats.tx_drops); kfree_skb(skb); } return NETDEV_TX_OK; @@ -70,10 +84,72 @@ static int rmnet_vnd_get_iflink(const struct net_device *dev) return priv->real_dev->ifindex; } +static int rmnet_vnd_init(struct net_device *dev) +{ + struct rmnet_priv *priv = netdev_priv(dev); + int err; + + priv->pcpu_stats = alloc_percpu(struct rmnet_pcpu_stats); + if (!priv->pcpu_stats) + return -ENOMEM; + + err = gro_cells_init(&priv->gro_cells, dev); + if (err) { + free_percpu(priv->pcpu_stats); + return err; + } + + return 0; +} + +static void rmnet_vnd_uninit(struct net_device *dev) +{ + struct rmnet_priv *priv = netdev_priv(dev); + + gro_cells_destroy(&priv->gro_cells); + free_percpu(priv->pcpu_stats); +} + +static void rmnet_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *s) +{ + struct rmnet_priv *priv = netdev_priv(dev); + struct rmnet_vnd_stats total_stats; + struct rmnet_pcpu_stats *pcpu_ptr; + unsigned int cpu, start; + + memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats)); + + for_each_possible_cpu(cpu) { + pcpu_ptr = this_cpu_ptr(priv->pcpu_stats); + + do { + start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp); + total_stats.rx_pkts += pcpu_ptr->stats.rx_pkts; + total_stats.rx_bytes += pcpu_ptr->stats.rx_bytes; + total_stats.tx_pkts += pcpu_ptr->stats.tx_pkts; + total_stats.tx_bytes += pcpu_ptr->stats.tx_bytes; + } while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start)); + + total_stats.tx_drops += pcpu_ptr->stats.tx_drops; + } + + s->rx_packets = total_stats.rx_pkts; + s->rx_bytes = total_stats.rx_bytes; + s->tx_packets = total_stats.tx_pkts; + s->tx_bytes = total_stats.tx_bytes; + s->tx_dropped = total_stats.tx_drops; +} + static const struct net_device_ops rmnet_vnd_ops = { .ndo_start_xmit = rmnet_vnd_start_xmit, .ndo_change_mtu = rmnet_vnd_change_mtu, .ndo_get_iflink = rmnet_vnd_get_iflink, + .ndo_add_slave = rmnet_add_bridge, + .ndo_del_slave = rmnet_del_bridge, + .ndo_init = rmnet_vnd_init, + .ndo_uninit = rmnet_vnd_uninit, + .ndo_get_stats64 = rmnet_get_stats64, }; /* Called by kernel whenever a new rmnet<n> device is created. Sets MTU, @@ -100,17 +176,19 @@ void rmnet_vnd_setup(struct net_device *rmnet_dev) int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, struct rmnet_port *port, - struct net_device *real_dev) + struct net_device *real_dev, + struct rmnet_endpoint *ep) { struct rmnet_priv *priv; int rc; - if (port->rmnet_devices[id]) + if (ep->egress_dev) return -EINVAL; rc = register_netdevice(rmnet_dev); if (!rc) { - port->rmnet_devices[id] = rmnet_dev; + ep->egress_dev = rmnet_dev; + ep->mux_id = id; port->nr_rmnet_devs++; rmnet_dev->rtnl_link_ops = &rmnet_link_ops; @@ -125,12 +203,13 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, return rc; } -int rmnet_vnd_dellink(u8 id, struct rmnet_port *port) +int rmnet_vnd_dellink(u8 id, struct rmnet_port *port, + struct rmnet_endpoint *ep) { - if (id >= RMNET_MAX_LOGICAL_EP || !port->rmnet_devices[id]) + if (id >= RMNET_MAX_LOGICAL_EP || !ep->egress_dev) return -EINVAL; - port->rmnet_devices[id] = NULL; + ep->egress_dev = NULL; port->nr_rmnet_devs--; return 0; } @@ -143,21 +222,6 @@ u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev) return priv->mux_id; } -/* Gets the logical endpoint configuration for a RmNet virtual network device - * node. Caller should confirm that devices is a RmNet VND before calling. - */ -struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *rmnet_dev) -{ - struct rmnet_priv *priv; - - if (!rmnet_dev) - return NULL; - - priv = netdev_priv(rmnet_dev); - - return &priv->local_ep; -} - int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable) { netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h index 8a4042f0f6bf..71e4c3286951 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h @@ -17,11 +17,12 @@ #define _RMNET_VND_H_ int rmnet_vnd_do_flow_control(struct net_device *dev, int enable); -struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *dev); int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, struct rmnet_port *port, - struct net_device *real_dev); -int rmnet_vnd_dellink(u8 id, struct rmnet_port *port); + struct net_device *real_dev, + struct rmnet_endpoint *ep); +int rmnet_vnd_dellink(u8 id, struct rmnet_port *port, + struct rmnet_endpoint *ep); void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev); void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev); u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev); diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index bed34684994f..7e011c1c1e6e 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -170,6 +170,7 @@ struct net_local { spinlock_t lock; struct net_device *next_module; struct timer_list timer; /* Media selection timer. */ + struct net_device *dev; /* Timer dev. */ unsigned long last_rx_time; /* Last Rx, in jiffies, to handle Rx hang. */ int saved_tx_size; unsigned int tx_unit_busy:1; @@ -184,7 +185,7 @@ struct net_local { #define TIMED_CHECKER (HZ/4) #ifdef TIMED_CHECKER #include <linux/timer.h> -static void atp_timed_checker(unsigned long ignored); +static void atp_timed_checker(struct timer_list *t); #endif /* Index to functions, as function prototypes. */ @@ -438,10 +439,9 @@ static int net_open(struct net_device *dev) hardware_init(dev); - init_timer(&lp->timer); + lp->dev = dev; + timer_setup(&lp->timer, atp_timed_checker, 0); lp->timer.expires = jiffies + TIMED_CHECKER; - lp->timer.data = (unsigned long)dev; - lp->timer.function = atp_timed_checker; /* timer handler */ add_timer(&lp->timer); netif_start_queue(dev); @@ -710,11 +710,11 @@ static irqreturn_t atp_interrupt(int irq, void *dev_instance) #ifdef TIMED_CHECKER /* This following code fixes a rare (and very difficult to track down) problem where the adapter forgets its ethernet address. */ -static void atp_timed_checker(unsigned long data) +static void atp_timed_checker(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; + struct net_local *lp = from_timer(lp, t, timer); + struct net_device *dev = lp->dev; long ioaddr = dev->base_addr; - struct net_local *lp = netdev_priv(dev); int tickssofar = jiffies - lp->last_rx_time; int i; diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index a3c949ea7d1a..dcb8c39382e7 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -399,6 +399,12 @@ enum rtl_registers { RxMaxSize = 0xda, CPlusCmd = 0xe0, IntrMitigate = 0xe2, + +#define RTL_COALESCE_MASK 0x0f +#define RTL_COALESCE_SHIFT 4 +#define RTL_COALESCE_T_MAX (RTL_COALESCE_MASK) +#define RTL_COALESCE_FRAME_MAX (RTL_COALESCE_MASK << 2) + RxDescAddrLow = 0xe4, RxDescAddrHigh = 0xe8, EarlyTxThres = 0xec, /* 8169. Unit of 32 bytes. */ @@ -795,6 +801,7 @@ struct rtl8169_private { u16 cp_cmd; u16 event_slow; + const struct rtl_coalesce_info *coalesce_info; struct mdio_ops { void (*write)(struct rtl8169_private *, int, int); @@ -1975,8 +1982,6 @@ static int rtl8169_set_speed_xmii(struct net_device *dev, rtl_writephy(tp, MII_ADVERTISE, auto_nego); rtl_writephy(tp, MII_CTRL1000, giga_ctrl); } else { - giga_ctrl = 0; - if (speed == SPEED_10) bmcr = 0; else if (speed == SPEED_100) @@ -2363,10 +2368,229 @@ static int rtl8169_nway_reset(struct net_device *dev) return mii_nway_restart(&tp->mii); } +/* + * Interrupt coalescing + * + * > 1 - the availability of the IntrMitigate (0xe2) register through the + * > 8169, 8168 and 810x line of chipsets + * + * 8169, 8168, and 8136(810x) serial chipsets support it. + * + * > 2 - the Tx timer unit at gigabit speed + * + * The unit of the timer depends on both the speed and the setting of CPlusCmd + * (0xe0) bit 1 and bit 0. + * + * For 8169 + * bit[1:0] \ speed 1000M 100M 10M + * 0 0 320ns 2.56us 40.96us + * 0 1 2.56us 20.48us 327.7us + * 1 0 5.12us 40.96us 655.4us + * 1 1 10.24us 81.92us 1.31ms + * + * For the other + * bit[1:0] \ speed 1000M 100M 10M + * 0 0 5us 2.56us 40.96us + * 0 1 40us 20.48us 327.7us + * 1 0 80us 40.96us 655.4us + * 1 1 160us 81.92us 1.31ms + */ + +/* rx/tx scale factors for one particular CPlusCmd[0:1] value */ +struct rtl_coalesce_scale { + /* Rx / Tx */ + u32 nsecs[2]; +}; + +/* rx/tx scale factors for all CPlusCmd[0:1] cases */ +struct rtl_coalesce_info { + u32 speed; + struct rtl_coalesce_scale scalev[4]; /* each CPlusCmd[0:1] case */ +}; + +/* produce (r,t) pairs with each being in series of *1, *8, *8*2, *8*2*2 */ +#define rxtx_x1822(r, t) { \ + {{(r), (t)}}, \ + {{(r)*8, (t)*8}}, \ + {{(r)*8*2, (t)*8*2}}, \ + {{(r)*8*2*2, (t)*8*2*2}}, \ +} +static const struct rtl_coalesce_info rtl_coalesce_info_8169[] = { + /* speed delays: rx00 tx00 */ + { SPEED_10, rxtx_x1822(40960, 40960) }, + { SPEED_100, rxtx_x1822( 2560, 2560) }, + { SPEED_1000, rxtx_x1822( 320, 320) }, + { 0 }, +}; + +static const struct rtl_coalesce_info rtl_coalesce_info_8168_8136[] = { + /* speed delays: rx00 tx00 */ + { SPEED_10, rxtx_x1822(40960, 40960) }, + { SPEED_100, rxtx_x1822( 2560, 2560) }, + { SPEED_1000, rxtx_x1822( 5000, 5000) }, + { 0 }, +}; +#undef rxtx_x1822 + +/* get rx/tx scale vector corresponding to current speed */ +static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev) +{ + struct rtl8169_private *tp = netdev_priv(dev); + struct ethtool_link_ksettings ecmd; + const struct rtl_coalesce_info *ci; + int rc; + + rc = rtl8169_get_link_ksettings(dev, &ecmd); + if (rc < 0) + return ERR_PTR(rc); + + for (ci = tp->coalesce_info; ci->speed != 0; ci++) { + if (ecmd.base.speed == ci->speed) { + return ci; + } + } + + return ERR_PTR(-ELNRNG); +} + +static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) +{ + struct rtl8169_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + const struct rtl_coalesce_info *ci; + const struct rtl_coalesce_scale *scale; + struct { + u32 *max_frames; + u32 *usecs; + } coal_settings [] = { + { &ec->rx_max_coalesced_frames, &ec->rx_coalesce_usecs }, + { &ec->tx_max_coalesced_frames, &ec->tx_coalesce_usecs } + }, *p = coal_settings; + int i; + u16 w; + + memset(ec, 0, sizeof(*ec)); + + /* get rx/tx scale corresponding to current speed and CPlusCmd[0:1] */ + ci = rtl_coalesce_info(dev); + if (IS_ERR(ci)) + return PTR_ERR(ci); + + scale = &ci->scalev[RTL_R16(CPlusCmd) & 3]; + + /* read IntrMitigate and adjust according to scale */ + for (w = RTL_R16(IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) { + *p->max_frames = (w & RTL_COALESCE_MASK) << 2; + w >>= RTL_COALESCE_SHIFT; + *p->usecs = w & RTL_COALESCE_MASK; + } + + for (i = 0; i < 2; i++) { + p = coal_settings + i; + *p->usecs = (*p->usecs * scale->nsecs[i]) / 1000; + + /* + * ethtool_coalesce says it is illegal to set both usecs and + * max_frames to 0. + */ + if (!*p->usecs && !*p->max_frames) + *p->max_frames = 1; + } + + return 0; +} + +/* choose appropriate scale factor and CPlusCmd[0:1] for (speed, nsec) */ +static const struct rtl_coalesce_scale *rtl_coalesce_choose_scale( + struct net_device *dev, u32 nsec, u16 *cp01) +{ + const struct rtl_coalesce_info *ci; + u16 i; + + ci = rtl_coalesce_info(dev); + if (IS_ERR(ci)) + return ERR_CAST(ci); + + for (i = 0; i < 4; i++) { + u32 rxtx_maxscale = max(ci->scalev[i].nsecs[0], + ci->scalev[i].nsecs[1]); + if (nsec <= rxtx_maxscale * RTL_COALESCE_T_MAX) { + *cp01 = i; + return &ci->scalev[i]; + } + } + + return ERR_PTR(-EINVAL); +} + +static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) +{ + struct rtl8169_private *tp = netdev_priv(dev); + void __iomem *ioaddr = tp->mmio_addr; + const struct rtl_coalesce_scale *scale; + struct { + u32 frames; + u32 usecs; + } coal_settings [] = { + { ec->rx_max_coalesced_frames, ec->rx_coalesce_usecs }, + { ec->tx_max_coalesced_frames, ec->tx_coalesce_usecs } + }, *p = coal_settings; + u16 w = 0, cp01; + int i; + + scale = rtl_coalesce_choose_scale(dev, + max(p[0].usecs, p[1].usecs) * 1000, &cp01); + if (IS_ERR(scale)) + return PTR_ERR(scale); + + for (i = 0; i < 2; i++, p++) { + u32 units; + + /* + * accept max_frames=1 we returned in rtl_get_coalesce. + * accept it not only when usecs=0 because of e.g. the following scenario: + * + * - both rx_usecs=0 & rx_frames=0 in hardware (no delay on RX) + * - rtl_get_coalesce returns rx_usecs=0, rx_frames=1 + * - then user does `ethtool -C eth0 rx-usecs 100` + * + * since ethtool sends to kernel whole ethtool_coalesce + * settings, if we do not handle rx_usecs=!0, rx_frames=1 + * we'll reject it below in `frames % 4 != 0`. + */ + if (p->frames == 1) { + p->frames = 0; + } + + units = p->usecs * 1000 / scale->nsecs[i]; + if (p->frames > RTL_COALESCE_FRAME_MAX || p->frames % 4) + return -EINVAL; + + w <<= RTL_COALESCE_SHIFT; + w |= units; + w <<= RTL_COALESCE_SHIFT; + w |= p->frames >> 2; + } + + rtl_lock_work(tp); + + RTL_W16(IntrMitigate, swab16(w)); + + tp->cp_cmd = (tp->cp_cmd & ~3) | cp01; + RTL_W16(CPlusCmd, tp->cp_cmd); + RTL_R16(CPlusCmd); + + rtl_unlock_work(tp); + + return 0; +} + static const struct ethtool_ops rtl8169_ethtool_ops = { .get_drvinfo = rtl8169_get_drvinfo, .get_regs_len = rtl8169_get_regs_len, .get_link = ethtool_op_get_link, + .get_coalesce = rtl_get_coalesce, + .set_coalesce = rtl_set_coalesce, .set_settings = rtl8169_set_settings, .get_msglevel = rtl8169_get_msglevel, .set_msglevel = rtl8169_set_msglevel, @@ -4401,10 +4625,9 @@ static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag) schedule_work(&tp->wk.work); } -static void rtl8169_phy_timer(unsigned long __opaque) +static void rtl8169_phy_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)__opaque; - struct rtl8169_private *tp = netdev_priv(dev); + struct rtl8169_private *tp = from_timer(tp, t, timer); rtl_schedule_task(tp, RTL_FLAG_TASK_PHY_PENDING); } @@ -8062,6 +8285,7 @@ static const struct rtl_cfg_info { unsigned int align; u16 event_slow; unsigned features; + const struct rtl_coalesce_info *coalesce_info; u8 default_ver; } rtl_cfg_infos [] = { [RTL_CFG_0] = { @@ -8070,6 +8294,7 @@ static const struct rtl_cfg_info { .align = 0, .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver, .features = RTL_FEATURE_GMII, + .coalesce_info = rtl_coalesce_info_8169, .default_ver = RTL_GIGA_MAC_VER_01, }, [RTL_CFG_1] = { @@ -8078,6 +8303,7 @@ static const struct rtl_cfg_info { .align = 8, .event_slow = SYSErr | LinkChg | RxOverflow, .features = RTL_FEATURE_GMII | RTL_FEATURE_MSI, + .coalesce_info = rtl_coalesce_info_8168_8136, .default_ver = RTL_GIGA_MAC_VER_11, }, [RTL_CFG_2] = { @@ -8087,6 +8313,7 @@ static const struct rtl_cfg_info { .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver | PCSTimeout, .features = RTL_FEATURE_MSI, + .coalesce_info = rtl_coalesce_info_8168_8136, .default_ver = RTL_GIGA_MAC_VER_13, } }; @@ -8450,11 +8677,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->hw_start = cfg->hw_start; tp->event_slow = cfg->event_slow; + tp->coalesce_info = cfg->coalesce_info; tp->opts1_mask = (tp->mac_version != RTL_GIGA_MAC_VER_01) ? ~(RxBOVF | RxFOVF) : ~0; - setup_timer(&tp->timer, rtl8169_phy_timer, (unsigned long)dev); + timer_setup(&tp->timer, rtl8169_phy_timer, 0); tp->rtl_fw = RTL_FIRMWARE_UNKNOWN; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index fdf30bfa403b..2b962d349f5f 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -403,8 +403,9 @@ static void ravb_emac_init(struct net_device *ndev) /* Receive frame limit set register */ ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR); - /* PAUSE prohibition */ + /* EMAC Mode: PAUSE prohibition; Duplex; RX Checksum; TX; RX */ ravb_write(ndev, ECMR_ZPF | (priv->duplex ? ECMR_DM : 0) | + (ndev->features & NETIF_F_RXCSUM ? ECMR_RCSC : 0) | ECMR_TE | ECMR_RE, ECMR); ravb_set_rate(ndev); @@ -520,6 +521,19 @@ static void ravb_get_tx_tstamp(struct net_device *ndev) } } +static void ravb_rx_csum(struct sk_buff *skb) +{ + u8 *hw_csum; + + /* The hardware checksum is 2 bytes appended to packet data */ + if (unlikely(skb->len < 2)) + return; + hw_csum = skb_tail_pointer(skb) - 2; + skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum)); + skb->ip_summed = CHECKSUM_COMPLETE; + skb_trim(skb, skb->len - 2); +} + /* Packet receive function for Ethernet AVB */ static bool ravb_rx(struct net_device *ndev, int *quota, int q) { @@ -587,8 +601,11 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) ts.tv_nsec = le32_to_cpu(desc->ts_n); shhwtstamps->hwtstamp = timespec64_to_ktime(ts); } + skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, ndev); + if (ndev->features & NETIF_F_RXCSUM) + ravb_rx_csum(skb); napi_gro_receive(&priv->napi[q], skb); stats->rx_packets++; stats->rx_bytes += pkt_len; @@ -1337,20 +1354,15 @@ static void ravb_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { struct ravb_private *priv = netdev_priv(ndev); - wol->supported = 0; - wol->wolopts = 0; - - if (priv->clk) { - wol->supported = WAKE_MAGIC; - wol->wolopts = priv->wol_enabled ? WAKE_MAGIC : 0; - } + wol->supported = WAKE_MAGIC; + wol->wolopts = priv->wol_enabled ? WAKE_MAGIC : 0; } static int ravb_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { struct ravb_private *priv = netdev_priv(ndev); - if (!priv->clk || wol->wolopts & ~WAKE_MAGIC) + if (wol->wolopts & ~WAKE_MAGIC) return -EOPNOTSUPP; priv->wol_enabled = !!(wol->wolopts & WAKE_MAGIC); @@ -1842,6 +1854,38 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) return phy_mii_ioctl(phydev, req, cmd); } +static void ravb_set_rx_csum(struct net_device *ndev, bool enable) +{ + struct ravb_private *priv = netdev_priv(ndev); + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + /* Disable TX and RX */ + ravb_rcv_snd_disable(ndev); + + /* Modify RX Checksum setting */ + ravb_modify(ndev, ECMR, ECMR_RCSC, enable ? ECMR_RCSC : 0); + + /* Enable TX and RX */ + ravb_rcv_snd_enable(ndev); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +static int ravb_set_features(struct net_device *ndev, + netdev_features_t features) +{ + netdev_features_t changed = ndev->features ^ features; + + if (changed & NETIF_F_RXCSUM) + ravb_set_rx_csum(ndev, features & NETIF_F_RXCSUM); + + ndev->features = features; + + return 0; +} + static const struct net_device_ops ravb_netdev_ops = { .ndo_open = ravb_open, .ndo_stop = ravb_close, @@ -1853,6 +1897,7 @@ static const struct net_device_ops ravb_netdev_ops = { .ndo_do_ioctl = ravb_do_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, + .ndo_set_features = ravb_set_features, }; /* MDIO bus init function */ @@ -1912,22 +1957,12 @@ MODULE_DEVICE_TABLE(of, ravb_match_table); static int ravb_set_gti(struct net_device *ndev) { - + struct ravb_private *priv = netdev_priv(ndev); struct device *dev = ndev->dev.parent; - struct device_node *np = dev->of_node; unsigned long rate; - struct clk *clk; uint64_t inc; - clk = of_clk_get(np, 0); - if (IS_ERR(clk)) { - dev_err(dev, "could not get clock\n"); - return PTR_ERR(clk); - } - - rate = clk_get_rate(clk); - clk_put(clk); - + rate = clk_get_rate(priv->clk); if (!rate) return -EINVAL; @@ -2004,6 +2039,9 @@ static int ravb_probe(struct platform_device *pdev) if (!ndev) return -ENOMEM; + ndev->features = NETIF_F_RXCSUM; + ndev->hw_features = NETIF_F_RXCSUM; + pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); @@ -2073,10 +2111,11 @@ static int ravb_probe(struct platform_device *pdev) priv->chip_id = chip_id; - /* Get clock, if not found that's OK but Wake-On-Lan is unavailable */ priv->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(priv->clk)) - priv->clk = NULL; + if (IS_ERR(priv->clk)) { + error = PTR_ERR(priv->clk); + goto out_release; + } /* Set function */ ndev->netdev_ops = &ravb_netdev_ops; @@ -2144,8 +2183,7 @@ static int ravb_probe(struct platform_device *pdev) if (error) goto out_napi_del; - if (priv->clk) - device_set_wakeup_capable(&pdev->dev, 1); + device_set_wakeup_capable(&pdev->dev, 1); /* Print device information */ netdev_info(ndev, "Base address at %#x, %pM, IRQ %d.\n", diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index d2e88a30f57b..7e060aa9fbed 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -594,7 +594,7 @@ static struct sh_eth_cpu_data r8a7740_data = { }; /* There is CPU dependent code */ -static void sh_eth_set_rate_r8a777x(struct net_device *ndev) +static void sh_eth_set_rate_rcar(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); @@ -608,10 +608,10 @@ static void sh_eth_set_rate_r8a777x(struct net_device *ndev) } } -/* R8A7778/9 */ -static struct sh_eth_cpu_data r8a777x_data = { +/* R-Car Gen1 */ +static struct sh_eth_cpu_data rcar_gen1_data = { .set_duplex = sh_eth_set_duplex, - .set_rate = sh_eth_set_rate_r8a777x, + .set_rate = sh_eth_set_rate_rcar, .register_type = SH_ETH_REG_FAST_RCAR, @@ -635,10 +635,10 @@ static struct sh_eth_cpu_data r8a777x_data = { .hw_swap = 1, }; -/* R8A7790/1 */ -static struct sh_eth_cpu_data r8a779x_data = { +/* R-Car Gen2 and RZ/G1 */ +static struct sh_eth_cpu_data rcar_gen2_data = { .set_duplex = sh_eth_set_duplex, - .set_rate = sh_eth_set_rate_r8a777x, + .set_rate = sh_eth_set_rate_rcar, .register_type = SH_ETH_REG_FAST_RCAR, @@ -3086,15 +3086,17 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) static const struct of_device_id sh_eth_match_table[] = { { .compatible = "renesas,gether-r8a7740", .data = &r8a7740_data }, - { .compatible = "renesas,ether-r8a7743", .data = &r8a779x_data }, - { .compatible = "renesas,ether-r8a7745", .data = &r8a779x_data }, - { .compatible = "renesas,ether-r8a7778", .data = &r8a777x_data }, - { .compatible = "renesas,ether-r8a7779", .data = &r8a777x_data }, - { .compatible = "renesas,ether-r8a7790", .data = &r8a779x_data }, - { .compatible = "renesas,ether-r8a7791", .data = &r8a779x_data }, - { .compatible = "renesas,ether-r8a7793", .data = &r8a779x_data }, - { .compatible = "renesas,ether-r8a7794", .data = &r8a779x_data }, + { .compatible = "renesas,ether-r8a7743", .data = &rcar_gen2_data }, + { .compatible = "renesas,ether-r8a7745", .data = &rcar_gen2_data }, + { .compatible = "renesas,ether-r8a7778", .data = &rcar_gen1_data }, + { .compatible = "renesas,ether-r8a7779", .data = &rcar_gen1_data }, + { .compatible = "renesas,ether-r8a7790", .data = &rcar_gen2_data }, + { .compatible = "renesas,ether-r8a7791", .data = &rcar_gen2_data }, + { .compatible = "renesas,ether-r8a7793", .data = &rcar_gen2_data }, + { .compatible = "renesas,ether-r8a7794", .data = &rcar_gen2_data }, { .compatible = "renesas,ether-r7s72100", .data = &r7s72100_data }, + { .compatible = "renesas,rcar-gen1-ether", .data = &rcar_gen1_data }, + { .compatible = "renesas,rcar-gen2-ether", .data = &rcar_gen2_data }, { } }; MODULE_DEVICE_TABLE(of, sh_eth_match_table); diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 89831adb8eb7..fd35d8004a78 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -105,9 +105,9 @@ void sxgbe_disable_eee_mode(struct sxgbe_priv_data * const priv) * If there is no data transfer and if we are not in LPI state, * then MAC Transmitter can be moved to LPI state. */ -static void sxgbe_eee_ctrl_timer(unsigned long arg) +static void sxgbe_eee_ctrl_timer(struct timer_list *t) { - struct sxgbe_priv_data *priv = (struct sxgbe_priv_data *)arg; + struct sxgbe_priv_data *priv = from_timer(priv, t, eee_ctrl_timer); sxgbe_enable_eee_mode(priv); mod_timer(&priv->eee_ctrl_timer, SXGBE_LPI_TIMER(eee_timer)); @@ -134,8 +134,7 @@ bool sxgbe_eee_init(struct sxgbe_priv_data * const priv) return false; priv->eee_active = 1; - setup_timer(&priv->eee_ctrl_timer, sxgbe_eee_ctrl_timer, - (unsigned long)priv); + timer_setup(&priv->eee_ctrl_timer, sxgbe_eee_ctrl_timer, 0); priv->eee_ctrl_timer.expires = SXGBE_LPI_TIMER(eee_timer); add_timer(&priv->eee_ctrl_timer); @@ -1002,13 +1001,13 @@ static void sxgbe_disable_mtl_engine(struct sxgbe_priv_data *priv) /** * sxgbe_tx_timer: mitigation sw timer for tx. - * @data: data pointer + * @t: timer pointer * Description: * This is the timer handler to directly invoke the sxgbe_tx_clean. */ -static void sxgbe_tx_timer(unsigned long data) +static void sxgbe_tx_timer(struct timer_list *t) { - struct sxgbe_tx_queue *p = (struct sxgbe_tx_queue *)data; + struct sxgbe_tx_queue *p = from_timer(p, t, txtimer); sxgbe_tx_queue_clean(p); } @@ -1028,8 +1027,7 @@ static void sxgbe_tx_init_coalesce(struct sxgbe_priv_data *priv) struct sxgbe_tx_queue *p = priv->txq[queue_num]; p->tx_coal_frames = SXGBE_TX_FRAMES; p->tx_coal_timer = SXGBE_COAL_TX_TIMER; - setup_timer(&p->txtimer, sxgbe_tx_timer, - (unsigned long)&priv->txq[queue_num]); + timer_setup(&p->txtimer, sxgbe_tx_timer, 0); p->txtimer.expires = SXGBE_COAL_TIMER(p->tx_coal_timer); add_timer(&p->txtimer); } diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index 244c1e171017..c5bc124b41a9 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -170,9 +170,11 @@ ether3_setbuffer(struct net_device *dev, buffer_rw_t read, int start) /* * Switch LED off... */ -static void ether3_ledoff(unsigned long data) +static void ether3_ledoff(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; + struct dev_priv *private = from_timer(private, t, timer); + struct net_device *dev = private->dev; + ether3_outw(priv(dev)->regs.config2 |= CFG2_CTRLO, REG_CONFIG2); } @@ -183,8 +185,6 @@ static inline void ether3_ledon(struct net_device *dev) { del_timer(&priv(dev)->timer); priv(dev)->timer.expires = jiffies + HZ / 50; /* leave on for 1/50th second */ - priv(dev)->timer.data = (unsigned long)dev; - priv(dev)->timer.function = ether3_ledoff; add_timer(&priv(dev)->timer); if (priv(dev)->regs.config2 & CFG2_CTRLO) ether3_outw(priv(dev)->regs.config2 &= ~CFG2_CTRLO, REG_CONFIG2); @@ -783,7 +783,8 @@ ether3_probe(struct expansion_card *ec, const struct ecard_id *id) ether3_addr(dev->dev_addr, ec); - init_timer(&priv(dev)->timer); + priv(dev)->dev = dev; + timer_setup(&priv(dev)->timer, ether3_ledoff, 0); /* Reset card... */ diff --git a/drivers/net/ethernet/seeq/ether3.h b/drivers/net/ethernet/seeq/ether3.h index 2db63b08bdf3..be19e5fa5cf2 100644 --- a/drivers/net/ethernet/seeq/ether3.h +++ b/drivers/net/ethernet/seeq/ether3.h @@ -165,6 +165,7 @@ struct dev_priv { unsigned char tx_tail; /* buffer nr of transmitting packet */ unsigned int rx_head; /* address to fetch next packet from */ struct timer_list timer; + struct net_device *dev; int broken; /* 0 = ok, 1 = something went wrong */ }; diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index a95a46bcd339..e566dbb3343d 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -674,6 +674,10 @@ static int efx_ef10_probe(struct efx_nic *efx) efx->rx_packet_len_offset = ES_DZ_RX_PREFIX_PKTLEN_OFST - ES_DZ_RX_PREFIX_SIZE; + if (nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_INCLUDE_FCS_LBN)) + efx->net_dev->hw_features |= NETIF_F_RXFCS; + rc = efx_mcdi_port_get_number(efx); if (rc < 0) goto fail5; @@ -3199,11 +3203,15 @@ static u16 efx_ef10_handle_rx_event_errors(struct efx_channel *channel, const efx_qword_t *event) { struct efx_nic *efx = channel->efx; + bool handled = false; if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_ECRC_ERR)) { - if (!efx->loopback_selftest) - channel->n_rx_eth_crc_err += n_packets; - return EFX_RX_PKT_DISCARD; + if (!(efx->net_dev->features & NETIF_F_RXALL)) { + if (!efx->loopback_selftest) + channel->n_rx_eth_crc_err += n_packets; + return EFX_RX_PKT_DISCARD; + } + handled = true; } if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_IPCKSUM_ERR)) { if (unlikely(rx_encap_hdr != ESE_EZ_ENCAP_HDR_VXLAN && @@ -3274,7 +3282,7 @@ static u16 efx_ef10_handle_rx_event_errors(struct efx_channel *channel, return 0; } - WARN_ON(1); /* No error bits were recognised */ + WARN_ON(!handled); /* No error bits were recognised */ return 0; } @@ -5726,7 +5734,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx) * MCFW do not support VFs. */ rc = efx_ef10_vport_set_mac_address(efx); - } else { + } else if (rc) { efx_mcdi_display_error(efx, MC_CMD_VADAPTOR_SET_MAC, sizeof(inbuf), NULL, 0, rc); } diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 016616a63880..e3c492fcaff0 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -471,8 +471,7 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) rx_queue = &channel->rx_queue; rx_queue->efx = efx; - setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill, - (unsigned long)rx_queue); + timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); return channel; } @@ -511,8 +510,7 @@ efx_copy_channel(const struct efx_channel *old_channel) rx_queue = &channel->rx_queue; rx_queue->buffer = NULL; memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); - setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill, - (unsigned long)rx_queue); + timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); return channel; } @@ -2317,8 +2315,11 @@ static int efx_set_features(struct net_device *net_dev, netdev_features_t data) return rc; } - /* If Rx VLAN filter is changed, update filters via mac_reconfigure */ - if ((net_dev->features ^ data) & NETIF_F_HW_VLAN_CTAG_FILTER) { + /* If Rx VLAN filter is changed, update filters via mac_reconfigure. + * If rx-fcs is changed, mac_reconfigure updates that too. + */ + if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_RXFCS)) { /* efx_set_rx_mode() will schedule MAC work to update filters * when a new features are finally set in net_dev. */ @@ -3244,7 +3245,7 @@ static int efx_pci_probe_post_io(struct efx_nic *efx) /* Determine netdevice features */ net_dev->features |= (efx->type->offload_features | NETIF_F_SG | - NETIF_F_TSO | NETIF_F_RXCSUM); + NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL); if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) net_dev->features |= NETIF_F_TSO6; /* Check whether device supports TSO */ @@ -3255,7 +3256,10 @@ static int efx_pci_probe_post_io(struct efx_nic *efx) NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | NETIF_F_RXCSUM); - net_dev->hw_features = net_dev->features & ~efx->fixed_features; + net_dev->hw_features |= net_dev->features & ~efx->fixed_features; + + /* Disable receiving frames with bad FCS, by default. */ + net_dev->features &= ~NETIF_F_RXALL; /* Disable VLAN filtering by default. It may be enforced if * the feature is fixed (i.e. VLAN filters are required to diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index d407adf59610..52c84b782901 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -46,7 +46,7 @@ void efx_remove_rx_queue(struct efx_rx_queue *rx_queue); void efx_init_rx_queue(struct efx_rx_queue *rx_queue); void efx_fini_rx_queue(struct efx_rx_queue *rx_queue); void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic); -void efx_rx_slow_fill(unsigned long context); +void efx_rx_slow_fill(struct timer_list *t); void __efx_rx_packet(struct efx_channel *channel); void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, unsigned int n_frags, unsigned int len, u16 flags); diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index 7263275fde4a..3d6c91e96589 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -449,8 +449,7 @@ ef4_alloc_channel(struct ef4_nic *efx, int i, struct ef4_channel *old_channel) rx_queue = &channel->rx_queue; rx_queue->efx = efx; - setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill, - (unsigned long)rx_queue); + timer_setup(&rx_queue->slow_fill, ef4_rx_slow_fill, 0); return channel; } @@ -489,8 +488,7 @@ ef4_copy_channel(const struct ef4_channel *old_channel) rx_queue = &channel->rx_queue; rx_queue->buffer = NULL; memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); - setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill, - (unsigned long)rx_queue); + timer_setup(&rx_queue->slow_fill, ef4_rx_slow_fill, 0); return channel; } diff --git a/drivers/net/ethernet/sfc/falcon/efx.h b/drivers/net/ethernet/sfc/falcon/efx.h index 4f3bb30661ea..a4e4d8ea4078 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.h +++ b/drivers/net/ethernet/sfc/falcon/efx.h @@ -45,7 +45,7 @@ void ef4_remove_rx_queue(struct ef4_rx_queue *rx_queue); void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue); void ef4_fini_rx_queue(struct ef4_rx_queue *rx_queue); void ef4_fast_push_rx_descriptors(struct ef4_rx_queue *rx_queue, bool atomic); -void ef4_rx_slow_fill(unsigned long context); +void ef4_rx_slow_fill(struct timer_list *t); void __ef4_rx_packet(struct ef4_channel *channel); void ef4_rx_packet(struct ef4_rx_queue *rx_queue, unsigned int index, unsigned int n_frags, unsigned int len, u16 flags); diff --git a/drivers/net/ethernet/sfc/falcon/falcon.c b/drivers/net/ethernet/sfc/falcon/falcon.c index cd8bb472d758..6520d7bc8d21 100644 --- a/drivers/net/ethernet/sfc/falcon/falcon.c +++ b/drivers/net/ethernet/sfc/falcon/falcon.c @@ -1454,10 +1454,11 @@ static void falcon_stats_complete(struct ef4_nic *efx) } } -static void falcon_stats_timer_func(unsigned long context) +static void falcon_stats_timer_func(struct timer_list *t) { - struct ef4_nic *efx = (struct ef4_nic *)context; - struct falcon_nic_data *nic_data = efx->nic_data; + struct falcon_nic_data *nic_data = from_timer(nic_data, t, + stats_timer); + struct ef4_nic *efx = nic_data->efx; spin_lock(&efx->stats_lock); @@ -2295,6 +2296,7 @@ static int falcon_probe_nic(struct ef4_nic *efx) if (!nic_data) return -ENOMEM; efx->nic_data = nic_data; + nic_data->efx = efx; rc = -ENODEV; @@ -2402,8 +2404,7 @@ static int falcon_probe_nic(struct ef4_nic *efx) } nic_data->stats_disable_count = 1; - setup_timer(&nic_data->stats_timer, &falcon_stats_timer_func, - (unsigned long)efx); + timer_setup(&nic_data->stats_timer, falcon_stats_timer_func, 0); return 0; diff --git a/drivers/net/ethernet/sfc/falcon/nic.h b/drivers/net/ethernet/sfc/falcon/nic.h index 54ca457cdb15..07c62dc552cb 100644 --- a/drivers/net/ethernet/sfc/falcon/nic.h +++ b/drivers/net/ethernet/sfc/falcon/nic.h @@ -267,6 +267,7 @@ enum { /** * struct falcon_nic_data - Falcon NIC state * @pci_dev2: Secondary function of Falcon A + * @efx: ef4_nic pointer * @board: Board state and functions * @stats: Hardware statistics * @stats_disable_count: Nest count for disabling statistics fetches @@ -280,6 +281,7 @@ enum { */ struct falcon_nic_data { struct pci_dev *pci_dev2; + struct ef4_nic *efx; struct falcon_board board; u64 stats[FALCON_STAT_COUNT]; unsigned int stats_disable_count; diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c index 6a8406dc0c2b..02456ed13a7d 100644 --- a/drivers/net/ethernet/sfc/falcon/rx.c +++ b/drivers/net/ethernet/sfc/falcon/rx.c @@ -163,7 +163,7 @@ static int ef4_init_rx_buffers(struct ef4_rx_queue *rx_queue, bool atomic) do { page = ef4_reuse_page(rx_queue); if (page == NULL) { - page = alloc_pages(__GFP_COLD | __GFP_COMP | + page = alloc_pages(__GFP_COMP | (atomic ? GFP_ATOMIC : GFP_KERNEL), efx->rx_buffer_order); if (unlikely(page == NULL)) @@ -376,9 +376,9 @@ void ef4_fast_push_rx_descriptors(struct ef4_rx_queue *rx_queue, bool atomic) ef4_nic_notify_rx_desc(rx_queue); } -void ef4_rx_slow_fill(unsigned long context) +void ef4_rx_slow_fill(struct timer_list *t) { - struct ef4_rx_queue *rx_queue = (struct ef4_rx_queue *)context; + struct ef4_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); /* Post an event to cause NAPI to run and refill the queue */ ef4_nic_generate_fill_event(rx_queue); diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c index 6486814e97dc..3409bbf5b19f 100644 --- a/drivers/net/ethernet/sfc/falcon/tx.c +++ b/drivers/net/ethernet/sfc/falcon/tx.c @@ -435,7 +435,7 @@ int ef4_setup_tc(struct net_device *net_dev, enum tc_setup_type type, unsigned tc, num_tc; int rc; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; num_tc = mqprio->num_tc; diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 86454d25a405..5334dc83d926 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -927,6 +927,10 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue, } #endif + if (efx->net_dev->features & NETIF_F_RXALL) + /* don't discard frame for CRC error */ + rx_ev_eth_crc_err = false; + /* The frame must be discarded if any of these are true. */ return (rx_ev_eth_crc_err | rx_ev_frm_trunc | rx_ev_tobe_disc | rx_ev_pause_frm) ? diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 3df872f56289..9c2567b0d93e 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -48,7 +48,7 @@ struct efx_mcdi_async_param { /* followed by request/response buffer */ }; -static void efx_mcdi_timeout_async(unsigned long context); +static void efx_mcdi_timeout_async(struct timer_list *t); static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, bool *was_attached_out); static bool efx_mcdi_poll_once(struct efx_nic *efx); @@ -87,8 +87,7 @@ int efx_mcdi_init(struct efx_nic *efx) mcdi->mode = MCDI_MODE_POLL; spin_lock_init(&mcdi->async_lock); INIT_LIST_HEAD(&mcdi->async_list); - setup_timer(&mcdi->async_timer, efx_mcdi_timeout_async, - (unsigned long)mcdi); + timer_setup(&mcdi->async_timer, efx_mcdi_timeout_async, 0); (void) efx_mcdi_poll_reboot(efx); mcdi->new_epoch = true; @@ -608,9 +607,9 @@ static void efx_mcdi_ev_cpl(struct efx_nic *efx, unsigned int seqno, } } -static void efx_mcdi_timeout_async(unsigned long context) +static void efx_mcdi_timeout_async(struct timer_list *t) { - struct efx_mcdi_iface *mcdi = (struct efx_mcdi_iface *)context; + struct efx_mcdi_iface *mcdi = from_timer(mcdi, t, async_timer); efx_mcdi_complete_async(mcdi, true); } diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index c7407d129c7d..6e1f282b2976 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -1029,6 +1029,10 @@ int efx_mcdi_set_mac(struct efx_nic *efx) MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT, SET_MAC_IN_REJECT_UNCST, efx->unicast_filter); + MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS, + SET_MAC_IN_FLAG_INCLUDE_FCS, + !!(efx->net_dev->features & NETIF_F_RXFCS)); + switch (efx->wanted_fc) { case EFX_FC_RX | EFX_FC_TX: fcntl = MC_CMD_FCNTL_BIDIR; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 56c2db398def..caa89bf7603e 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -648,11 +648,9 @@ static void efx_ptp_send_times(struct efx_nic *efx, struct pps_event_time now; struct timespec64 limit; struct efx_ptp_data *ptp = efx->ptp_data; - struct timespec64 start; int *mc_running = ptp->start.addr; pps_get_ts(&now); - start = now.ts_real; limit = now.ts_real; timespec64_add_ns(&limit, SYNCHRONISE_PERIOD_NS); diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 42443f434569..cfe76aad79ee 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -163,7 +163,7 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic) do { page = efx_reuse_page(rx_queue); if (page == NULL) { - page = alloc_pages(__GFP_COLD | __GFP_COMP | + page = alloc_pages(__GFP_COMP | (atomic ? GFP_ATOMIC : GFP_KERNEL), efx->rx_buffer_order); if (unlikely(page == NULL)) @@ -376,9 +376,9 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) efx_nic_notify_rx_desc(rx_queue); } -void efx_rx_slow_fill(unsigned long context) +void efx_rx_slow_fill(struct timer_list *t) { - struct efx_rx_queue *rx_queue = (struct efx_rx_queue *)context; + struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill); /* Post an event to cause NAPI to run and refill the queue */ efx_nic_generate_fill_event(rx_queue); diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index efb66ea21f27..0ea7e16f2e6e 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -663,7 +663,7 @@ int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type, unsigned tc, num_tc; int rc; - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; num_tc = mqprio->num_tc; diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index 9c0488e0f08e..18d533fdf14c 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -764,9 +764,9 @@ static inline void ioc3_setup_duplex(struct ioc3_private *ip) ioc3_w_emcr(ip->emcr); } -static void ioc3_timer(unsigned long data) +static void ioc3_timer(struct timer_list *t) { - struct ioc3_private *ip = (struct ioc3_private *) data; + struct ioc3_private *ip = from_timer(ip, t, ioc3_timer); /* Print the link status if it has changed */ mii_check_media(&ip->mii, 1, 0); @@ -818,8 +818,6 @@ out: static void ioc3_mii_start(struct ioc3_private *ip) { ip->ioc3_timer.expires = jiffies + (12 * HZ)/10; /* 1.2 sec. */ - ip->ioc3_timer.data = (unsigned long) ip; - ip->ioc3_timer.function = ioc3_timer; add_timer(&ip->ioc3_timer); } @@ -1291,7 +1289,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #endif spin_lock_init(&ip->ioc3_lock); - init_timer(&ip->ioc3_timer); + timer_setup(&ip->ioc3_timer, ioc3_timer, 0); ioc3_stop(ip); ioc3_init(dev); diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index 445109bd6910..c2c50522b96d 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1018,10 +1018,10 @@ out_unlock: rtnl_unlock(); } -static void sis190_phy_timer(unsigned long __opaque) +static void sis190_phy_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)__opaque; - struct sis190_private *tp = netdev_priv(dev); + struct sis190_private *tp = from_timer(tp, t, timer); + struct net_device *dev = tp->dev; if (likely(netif_running(dev))) schedule_work(&tp->phy_task); @@ -1039,10 +1039,8 @@ static inline void sis190_request_timer(struct net_device *dev) struct sis190_private *tp = netdev_priv(dev); struct timer_list *timer = &tp->timer; - init_timer(timer); + timer_setup(timer, sis190_phy_timer, 0); timer->expires = jiffies + SIS190_PHY_TIMEOUT; - timer->data = (unsigned long)dev; - timer->function = sis190_phy_timer; add_timer(timer); } diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 40bd88362e3d..4bb89f74742c 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -218,7 +218,7 @@ static void sis900_init_rxfilter (struct net_device * net_dev); static u16 read_eeprom(void __iomem *ioaddr, int location); static int mdio_read(struct net_device *net_dev, int phy_id, int location); static void mdio_write(struct net_device *net_dev, int phy_id, int location, int val); -static void sis900_timer(unsigned long data); +static void sis900_timer(struct timer_list *t); static void sis900_check_mode (struct net_device *net_dev, struct mii_phy *mii_phy); static void sis900_tx_timeout(struct net_device *net_dev); static void sis900_init_tx_ring(struct net_device *net_dev); @@ -1065,10 +1065,8 @@ sis900_open(struct net_device *net_dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - init_timer(&sis_priv->timer); + timer_setup(&sis_priv->timer, sis900_timer, 0); sis_priv->timer.expires = jiffies + HZ; - sis_priv->timer.data = (unsigned long)net_dev; - sis_priv->timer.function = sis900_timer; add_timer(&sis_priv->timer); return 0; @@ -1302,10 +1300,10 @@ static void sis630_set_eq(struct net_device *net_dev, u8 revision) * link status (ON/OFF) and link mode (10/100/Full/Half) */ -static void sis900_timer(unsigned long data) +static void sis900_timer(struct timer_list *t) { - struct net_device *net_dev = (struct net_device *)data; - struct sis900_private *sis_priv = netdev_priv(net_dev); + struct sis900_private *sis_priv = from_timer(sis_priv, t, timer); + struct net_device *net_dev = sis_priv->mii_info.dev; struct mii_phy *mii_phy = sis_priv->mii; static const int next_tick = 5*HZ; int speed = 0, duplex = 0; diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 6a0e1d4b597c..949aaef390b6 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -290,7 +290,7 @@ static int read_eeprom(struct epic_private *, int); static int mdio_read(struct net_device *dev, int phy_id, int location); static void mdio_write(struct net_device *dev, int phy_id, int loc, int val); static void epic_restart(struct net_device *dev); -static void epic_timer(unsigned long data); +static void epic_timer(struct timer_list *t); static void epic_tx_timeout(struct net_device *dev); static void epic_init_ring(struct net_device *dev); static netdev_tx_t epic_start_xmit(struct sk_buff *skb, @@ -739,10 +739,8 @@ static int epic_open(struct net_device *dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - init_timer(&ep->timer); + timer_setup(&ep->timer, epic_timer, 0); ep->timer.expires = jiffies + 3*HZ; - ep->timer.data = (unsigned long)dev; - ep->timer.function = epic_timer; /* timer handler */ add_timer(&ep->timer); return rc; @@ -845,10 +843,10 @@ static void check_media(struct net_device *dev) } } -static void epic_timer(unsigned long data) +static void epic_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *)data; - struct epic_private *ep = netdev_priv(dev); + struct epic_private *ep = from_timer(ep, t, timer); + struct net_device *dev = ep->mii.dev; void __iomem *ioaddr = ep->ioaddr; int next_tick = 5*HZ; diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index 92c927aec66d..a55f430f6a7b 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -280,7 +280,7 @@ static void set_rx_mode(struct net_device *dev); static int s9k_config(struct net_device *dev, struct ifmap *map); static void smc_set_xcvr(struct net_device *dev, int if_port); static void smc_reset(struct net_device *dev); -static void media_check(u_long arg); +static void media_check(struct timer_list *t); static void mdio_sync(unsigned int addr); static int mdio_read(struct net_device *dev, int phy_id, int loc); static void mdio_write(struct net_device *dev, int phy_id, int loc, int value); @@ -1070,7 +1070,7 @@ static int smc_open(struct net_device *dev) smc->packets_waiting = 0; smc_reset(dev); - setup_timer(&smc->media, media_check, (u_long)dev); + timer_setup(&smc->media, media_check, 0); mod_timer(&smc->media, jiffies + HZ); return 0; @@ -1708,10 +1708,10 @@ static void smc_reset(struct net_device *dev) ======================================================================*/ -static void media_check(u_long arg) +static void media_check(struct timer_list *t) { - struct net_device *dev = (struct net_device *) arg; - struct smc_private *smc = netdev_priv(dev); + struct smc_private *smc = from_timer(smc, t, media); + struct net_device *dev = smc->mii_if.dev; unsigned int ioaddr = dev->base_addr; u_short i, media, saved_bank; u_short link; diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 97035766c291..e28c0d2c58e9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -159,6 +159,7 @@ config DWMAC_SUN8I tristate "Allwinner sun8i GMAC support" default ARCH_SUNXI depends on OF && (ARCH_SUNXI || COMPILE_TEST) + select MDIO_BUS_MUX ---help--- Support for Allwinner H3 A83T A64 EMAC ethernet controllers. diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c index 6a9c954492f2..8b50afcdb52d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c @@ -118,10 +118,9 @@ int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs) return ret; } -static void pcs_link_timer_callback(unsigned long data) +static void pcs_link_timer_callback(struct tse_pcs *pcs) { u16 val = 0; - struct tse_pcs *pcs = (struct tse_pcs *)data; void __iomem *tse_pcs_base = pcs->tse_pcs_base; void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; @@ -138,12 +137,11 @@ static void pcs_link_timer_callback(unsigned long data) } } -static void auto_nego_timer_callback(unsigned long data) +static void auto_nego_timer_callback(struct tse_pcs *pcs) { u16 val = 0; u16 speed = 0; u16 duplex = 0; - struct tse_pcs *pcs = (struct tse_pcs *)data; void __iomem *tse_pcs_base = pcs->tse_pcs_base; void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; @@ -201,14 +199,14 @@ static void auto_nego_timer_callback(unsigned long data) } } -static void aneg_link_timer_callback(unsigned long data) +static void aneg_link_timer_callback(struct timer_list *t) { - struct tse_pcs *pcs = (struct tse_pcs *)data; + struct tse_pcs *pcs = from_timer(pcs, t, aneg_link_timer); if (pcs->autoneg == AUTONEG_ENABLE) - auto_nego_timer_callback(data); + auto_nego_timer_callback(pcs); else if (pcs->autoneg == AUTONEG_DISABLE) - pcs_link_timer_callback(data); + pcs_link_timer_callback(pcs); } void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, @@ -237,8 +235,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, tse_pcs_reset(tse_pcs_base, pcs); - setup_timer(&pcs->aneg_link_timer, - aneg_link_timer_callback, (unsigned long)pcs); + timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback, + 0); mod_timer(&pcs->aneg_link_timer, jiffies + msecs_to_jiffies(AUTONEGO_LINK_TIMER)); } else if (phy_dev->autoneg == AUTONEG_DISABLE) { @@ -270,8 +268,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, tse_pcs_reset(tse_pcs_base, pcs); - setup_timer(&pcs->aneg_link_timer, - aneg_link_timer_callback, (unsigned long)pcs); + timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback, + 0); mod_timer(&pcs->aneg_link_timer, jiffies + msecs_to_jiffies(AUTONEGO_LINK_TIMER)); } diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index e82b4b70b7be..e1e5ac053760 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -442,8 +442,9 @@ struct stmmac_dma_ops { void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode, int rxfifosz); void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel, - int fifosz); - void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel); + int fifosz, u8 qmode); + void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel, + int fifosz, u8 qmode); /* To track extra statistic (if supported) */ void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, void __iomem *ioaddr); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 866444b6c82f..2c6d7c69c8f7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -51,15 +51,11 @@ #define NSS_COMMON_CLK_SRC_CTRL_RGMII(x) 1 #define NSS_COMMON_CLK_SRC_CTRL_SGMII(x) ((x >= 2) ? 1 : 0) -#define NSS_COMMON_MACSEC_CTL 0x28 -#define NSS_COMMON_MACSEC_CTL_EXT_BYPASS_EN(x) (1 << x) - #define NSS_COMMON_GMAC_CTL(x) (0x30 + (x * 4)) #define NSS_COMMON_GMAC_CTL_CSYS_REQ BIT(19) #define NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL BIT(16) #define NSS_COMMON_GMAC_CTL_IFG_LIMIT_OFFSET 8 #define NSS_COMMON_GMAC_CTL_IFG_OFFSET 0 -#define NSS_COMMON_GMAC_CTL_IFG_MASK 0x3f #define NSS_COMMON_CLK_DIV_RGMII_1000 1 #define NSS_COMMON_CLK_DIV_RGMII_100 9 @@ -68,9 +64,6 @@ #define NSS_COMMON_CLK_DIV_SGMII_100 4 #define NSS_COMMON_CLK_DIV_SGMII_10 49 -#define QSGMII_PCS_MODE_CTL 0x68 -#define QSGMII_PCS_MODE_CTL_AUTONEG_EN(x) BIT((x * 8) + 7) - #define QSGMII_PCS_CAL_LCKDT_CTL 0x120 #define QSGMII_PCS_CAL_LCKDT_CTL_RST BIT(19) @@ -83,15 +76,10 @@ #define QSGMII_PHY_TX_DRIVER_EN BIT(3) #define QSGMII_PHY_QSGMII_EN BIT(7) #define QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET 12 -#define QSGMII_PHY_PHASE_LOOP_GAIN_MASK 0x7 #define QSGMII_PHY_RX_DC_BIAS_OFFSET 18 -#define QSGMII_PHY_RX_DC_BIAS_MASK 0x3 #define QSGMII_PHY_RX_INPUT_EQU_OFFSET 20 -#define QSGMII_PHY_RX_INPUT_EQU_MASK 0x3 #define QSGMII_PHY_CDR_PI_SLEW_OFFSET 22 -#define QSGMII_PHY_CDR_PI_SLEW_MASK 0x3 #define QSGMII_PHY_TX_DRV_AMP_OFFSET 28 -#define QSGMII_PHY_TX_DRV_AMP_MASK 0xf struct ipq806x_gmac { struct platform_device *pdev; @@ -217,7 +205,7 @@ static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) * code and keep it consistent with the Linux convention, we'll number * them from 0 to 3 here. */ - if (gmac->id < 0 || gmac->id > 3) { + if (gmac->id > 3) { dev_err(dev, "invalid gmac id\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 39c2122a4f26..e5ff734d4f9b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -17,6 +17,7 @@ #include <linux/clk.h> #include <linux/io.h> #include <linux/iopoll.h> +#include <linux/mdio-mux.h> #include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/of_device.h> @@ -41,14 +42,14 @@ * This value is used for disabling properly EMAC * and used as a good starting value in case of the * boot process(uboot) leave some stuff. - * @internal_phy: Does the MAC embed an internal PHY + * @soc_has_internal_phy: Does the MAC embed an internal PHY * @support_mii: Does the MAC handle MII * @support_rmii: Does the MAC handle RMII * @support_rgmii: Does the MAC handle RGMII */ struct emac_variant { u32 default_syscon_value; - int internal_phy; + bool soc_has_internal_phy; bool support_mii; bool support_rmii; bool support_rgmii; @@ -61,7 +62,8 @@ struct emac_variant { * @rst_ephy: reference to the optional EPHY reset for the internal PHY * @variant: reference to the current board variant * @regmap: regmap for using the syscon - * @use_internal_phy: Does the current PHY choice imply using the internal PHY + * @internal_phy_powered: Does the internal PHY is enabled + * @mux_handle: Internal pointer used by mdio-mux lib */ struct sunxi_priv_data { struct clk *tx_clk; @@ -70,12 +72,13 @@ struct sunxi_priv_data { struct reset_control *rst_ephy; const struct emac_variant *variant; struct regmap *regmap; - bool use_internal_phy; + bool internal_phy_powered; + void *mux_handle; }; static const struct emac_variant emac_variant_h3 = { .default_syscon_value = 0x58000, - .internal_phy = PHY_INTERFACE_MODE_MII, + .soc_has_internal_phy = true, .support_mii = true, .support_rmii = true, .support_rgmii = true @@ -83,20 +86,20 @@ static const struct emac_variant emac_variant_h3 = { static const struct emac_variant emac_variant_v3s = { .default_syscon_value = 0x38000, - .internal_phy = PHY_INTERFACE_MODE_MII, + .soc_has_internal_phy = true, .support_mii = true }; static const struct emac_variant emac_variant_a83t = { .default_syscon_value = 0, - .internal_phy = 0, + .soc_has_internal_phy = false, .support_mii = true, .support_rgmii = true }; static const struct emac_variant emac_variant_a64 = { .default_syscon_value = 0, - .internal_phy = 0, + .soc_has_internal_phy = false, .support_mii = true, .support_rmii = true, .support_rgmii = true @@ -195,6 +198,9 @@ static const struct emac_variant emac_variant_a64 = { #define H3_EPHY_LED_POL BIT(17) /* 1: active low, 0: active high */ #define H3_EPHY_SHUTDOWN BIT(16) /* 1: shutdown, 0: power up */ #define H3_EPHY_SELECT BIT(15) /* 1: internal PHY, 0: external PHY */ +#define H3_EPHY_MUX_MASK (H3_EPHY_SHUTDOWN | H3_EPHY_SELECT) +#define DWMAC_SUN8I_MDIO_MUX_INTERNAL_ID 1 +#define DWMAC_SUN8I_MDIO_MUX_EXTERNAL_ID 2 /* H3/A64 specific bits */ #define SYSCON_RMII_EN BIT(13) /* 1: enable RMII (overrides EPIT) */ @@ -634,6 +640,159 @@ static int sun8i_dwmac_reset(struct stmmac_priv *priv) return 0; } +/* Search in mdio-mux node for internal PHY node and get its clk/reset */ +static int get_ephy_nodes(struct stmmac_priv *priv) +{ + struct sunxi_priv_data *gmac = priv->plat->bsp_priv; + struct device_node *mdio_mux, *iphynode; + struct device_node *mdio_internal; + int ret; + + mdio_mux = of_get_child_by_name(priv->device->of_node, "mdio-mux"); + if (!mdio_mux) { + dev_err(priv->device, "Cannot get mdio-mux node\n"); + return -ENODEV; + } + + mdio_internal = of_find_compatible_node(mdio_mux, NULL, + "allwinner,sun8i-h3-mdio-internal"); + if (!mdio_internal) { + dev_err(priv->device, "Cannot get internal_mdio node\n"); + return -ENODEV; + } + + /* Seek for internal PHY */ + for_each_child_of_node(mdio_internal, iphynode) { + gmac->ephy_clk = of_clk_get(iphynode, 0); + if (IS_ERR(gmac->ephy_clk)) + continue; + gmac->rst_ephy = of_reset_control_get_exclusive(iphynode, NULL); + if (IS_ERR(gmac->rst_ephy)) { + ret = PTR_ERR(gmac->rst_ephy); + if (ret == -EPROBE_DEFER) + return ret; + continue; + } + dev_info(priv->device, "Found internal PHY node\n"); + return 0; + } + return -ENODEV; +} + +static int sun8i_dwmac_power_internal_phy(struct stmmac_priv *priv) +{ + struct sunxi_priv_data *gmac = priv->plat->bsp_priv; + int ret; + + if (gmac->internal_phy_powered) { + dev_warn(priv->device, "Internal PHY already powered\n"); + return 0; + } + + dev_info(priv->device, "Powering internal PHY\n"); + ret = clk_prepare_enable(gmac->ephy_clk); + if (ret) { + dev_err(priv->device, "Cannot enable internal PHY\n"); + return ret; + } + + /* Make sure the EPHY is properly reseted, as U-Boot may leave + * it at deasserted state, and thus it may fail to reset EMAC. + */ + reset_control_assert(gmac->rst_ephy); + + ret = reset_control_deassert(gmac->rst_ephy); + if (ret) { + dev_err(priv->device, "Cannot deassert internal phy\n"); + clk_disable_unprepare(gmac->ephy_clk); + return ret; + } + + gmac->internal_phy_powered = true; + + return 0; +} + +static int sun8i_dwmac_unpower_internal_phy(struct sunxi_priv_data *gmac) +{ + if (!gmac->internal_phy_powered) + return 0; + + clk_disable_unprepare(gmac->ephy_clk); + reset_control_assert(gmac->rst_ephy); + gmac->internal_phy_powered = false; + return 0; +} + +/* MDIO multiplexing switch function + * This function is called by the mdio-mux layer when it thinks the mdio bus + * multiplexer needs to switch. + * 'current_child' is the current value of the mux register + * 'desired_child' is the value of the 'reg' property of the target child MDIO + * node. + * The first time this function is called, current_child == -1. + * If current_child == desired_child, then the mux is already set to the + * correct bus. + */ +static int mdio_mux_syscon_switch_fn(int current_child, int desired_child, + void *data) +{ + struct stmmac_priv *priv = data; + struct sunxi_priv_data *gmac = priv->plat->bsp_priv; + u32 reg, val; + int ret = 0; + bool need_power_ephy = false; + + if (current_child ^ desired_child) { + regmap_read(gmac->regmap, SYSCON_EMAC_REG, ®); + switch (desired_child) { + case DWMAC_SUN8I_MDIO_MUX_INTERNAL_ID: + dev_info(priv->device, "Switch mux to internal PHY"); + val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SELECT; + + need_power_ephy = true; + break; + case DWMAC_SUN8I_MDIO_MUX_EXTERNAL_ID: + dev_info(priv->device, "Switch mux to external PHY"); + val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SHUTDOWN; + need_power_ephy = false; + break; + default: + dev_err(priv->device, "Invalid child ID %x\n", + desired_child); + return -EINVAL; + } + regmap_write(gmac->regmap, SYSCON_EMAC_REG, val); + if (need_power_ephy) { + ret = sun8i_dwmac_power_internal_phy(priv); + if (ret) + return ret; + } else { + sun8i_dwmac_unpower_internal_phy(gmac); + } + /* After changing syscon value, the MAC need reset or it will + * use the last value (and so the last PHY set). + */ + ret = sun8i_dwmac_reset(priv); + } + return ret; +} + +static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv) +{ + int ret; + struct device_node *mdio_mux; + struct sunxi_priv_data *gmac = priv->plat->bsp_priv; + + mdio_mux = of_get_child_by_name(priv->device->of_node, "mdio-mux"); + if (!mdio_mux) + return -ENODEV; + + ret = mdio_mux_init(priv->device, mdio_mux, mdio_mux_syscon_switch_fn, + &gmac->mux_handle, priv, priv->mii); + return ret; +} + static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) { struct sunxi_priv_data *gmac = priv->plat->bsp_priv; @@ -648,35 +807,25 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) "Current syscon value is not the default %x (expect %x)\n", val, reg); - if (gmac->variant->internal_phy) { - if (!gmac->use_internal_phy) { - /* switch to external PHY interface */ - reg &= ~H3_EPHY_SELECT; - } else { - reg |= H3_EPHY_SELECT; - reg &= ~H3_EPHY_SHUTDOWN; - dev_dbg(priv->device, "Select internal_phy %x\n", reg); - - if (of_property_read_bool(priv->plat->phy_node, - "allwinner,leds-active-low")) - reg |= H3_EPHY_LED_POL; - else - reg &= ~H3_EPHY_LED_POL; - - /* Force EPHY xtal frequency to 24MHz. */ - reg |= H3_EPHY_CLK_SEL; - - ret = of_mdio_parse_addr(priv->device, - priv->plat->phy_node); - if (ret < 0) { - dev_err(priv->device, "Could not parse MDIO addr\n"); - return ret; - } - /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY - * address. No need to mask it again. - */ - reg |= ret << H3_EPHY_ADDR_SHIFT; + if (gmac->variant->soc_has_internal_phy) { + if (of_property_read_bool(priv->plat->phy_node, + "allwinner,leds-active-low")) + reg |= H3_EPHY_LED_POL; + else + reg &= ~H3_EPHY_LED_POL; + + /* Force EPHY xtal frequency to 24MHz. */ + reg |= H3_EPHY_CLK_SEL; + + ret = of_mdio_parse_addr(priv->device, priv->plat->phy_node); + if (ret < 0) { + dev_err(priv->device, "Could not parse MDIO addr\n"); + return ret; } + /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY + * address. No need to mask it again. + */ + reg |= 1 << H3_EPHY_ADDR_SHIFT; } if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) { @@ -746,81 +895,21 @@ static void sun8i_dwmac_unset_syscon(struct sunxi_priv_data *gmac) regmap_write(gmac->regmap, SYSCON_EMAC_REG, reg); } -static int sun8i_dwmac_power_internal_phy(struct stmmac_priv *priv) +static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) { - struct sunxi_priv_data *gmac = priv->plat->bsp_priv; - int ret; - - if (!gmac->use_internal_phy) - return 0; - - ret = clk_prepare_enable(gmac->ephy_clk); - if (ret) { - dev_err(priv->device, "Cannot enable ephy\n"); - return ret; - } - - /* Make sure the EPHY is properly reseted, as U-Boot may leave - * it at deasserted state, and thus it may fail to reset EMAC. - */ - reset_control_assert(gmac->rst_ephy); + struct sunxi_priv_data *gmac = priv; - ret = reset_control_deassert(gmac->rst_ephy); - if (ret) { - dev_err(priv->device, "Cannot deassert ephy\n"); - clk_disable_unprepare(gmac->ephy_clk); - return ret; + if (gmac->variant->soc_has_internal_phy) { + /* sun8i_dwmac_exit could be called with mdiomux uninit */ + if (gmac->mux_handle) + mdio_mux_uninit(gmac->mux_handle); + if (gmac->internal_phy_powered) + sun8i_dwmac_unpower_internal_phy(gmac); } - return 0; -} - -static int sun8i_dwmac_unpower_internal_phy(struct sunxi_priv_data *gmac) -{ - if (!gmac->use_internal_phy) - return 0; - - clk_disable_unprepare(gmac->ephy_clk); - reset_control_assert(gmac->rst_ephy); - return 0; -} - -/* sun8i_power_phy() - Activate the PHY: - * In case of error, no need to call sun8i_unpower_phy(), - * it will be called anyway by sun8i_dwmac_exit() - */ -static int sun8i_power_phy(struct stmmac_priv *priv) -{ - int ret; - - ret = sun8i_dwmac_power_internal_phy(priv); - if (ret) - return ret; - - ret = sun8i_dwmac_set_syscon(priv); - if (ret) - return ret; - - /* After changing syscon value, the MAC need reset or it will use - * the last value (and so the last PHY set. - */ - ret = sun8i_dwmac_reset(priv); - if (ret) - return ret; - return 0; -} - -static void sun8i_unpower_phy(struct sunxi_priv_data *gmac) -{ sun8i_dwmac_unset_syscon(gmac); - sun8i_dwmac_unpower_internal_phy(gmac); -} - -static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) -{ - struct sunxi_priv_data *gmac = priv; - sun8i_unpower_phy(gmac); + reset_control_put(gmac->rst_ephy); clk_disable_unprepare(gmac->tx_clk); @@ -849,7 +938,7 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv) if (!mac) return NULL; - ret = sun8i_power_phy(priv); + ret = sun8i_dwmac_set_syscon(priv); if (ret) return NULL; @@ -889,6 +978,8 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) struct sunxi_priv_data *gmac; struct device *dev = &pdev->dev; int ret; + struct stmmac_priv *priv; + struct net_device *ndev; ret = stmmac_get_platform_resources(pdev, &stmmac_res); if (ret) @@ -932,29 +1023,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) } plat_dat->interface = of_get_phy_mode(dev->of_node); - if (plat_dat->interface == gmac->variant->internal_phy) { - dev_info(&pdev->dev, "Will use internal PHY\n"); - gmac->use_internal_phy = true; - gmac->ephy_clk = of_clk_get(plat_dat->phy_node, 0); - if (IS_ERR(gmac->ephy_clk)) { - ret = PTR_ERR(gmac->ephy_clk); - dev_err(&pdev->dev, "Cannot get EPHY clock: %d\n", ret); - return -EINVAL; - } - - gmac->rst_ephy = of_reset_control_get(plat_dat->phy_node, NULL); - if (IS_ERR(gmac->rst_ephy)) { - ret = PTR_ERR(gmac->rst_ephy); - if (ret == -EPROBE_DEFER) - return ret; - dev_err(&pdev->dev, "No EPHY reset control found %d\n", - ret); - return -EINVAL; - } - } else { - dev_info(&pdev->dev, "Will use external PHY\n"); - gmac->use_internal_phy = false; - } /* platform data specifying hardware features and callbacks. * hardware features were copied from Allwinner drivers. @@ -973,12 +1041,45 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) - sun8i_dwmac_exit(pdev, plat_dat->bsp_priv); + goto dwmac_exit; + + ndev = dev_get_drvdata(&pdev->dev); + priv = netdev_priv(ndev); + /* The mux must be registered after parent MDIO + * so after stmmac_dvr_probe() + */ + if (gmac->variant->soc_has_internal_phy) { + ret = get_ephy_nodes(priv); + if (ret) + goto dwmac_exit; + ret = sun8i_dwmac_register_mdio_mux(priv); + if (ret) { + dev_err(&pdev->dev, "Failed to register mux\n"); + goto dwmac_mux; + } + } else { + ret = sun8i_dwmac_reset(priv); + if (ret) + goto dwmac_exit; + } return ret; +dwmac_mux: + sun8i_dwmac_unset_syscon(gmac); +dwmac_exit: + sun8i_dwmac_exit(pdev, plat_dat->bsp_priv); +return ret; } static const struct of_device_id sun8i_dwmac_match[] = { + { .compatible = "allwinner,sun8i-h3-emac", + .data = &emac_variant_h3 }, + { .compatible = "allwinner,sun8i-v3s-emac", + .data = &emac_variant_v3s }, + { .compatible = "allwinner,sun8i-a83t-emac", + .data = &emac_variant_a83t }, + { .compatible = "allwinner,sun50i-a64-emac", + .data = &emac_variant_a64 }, { } }; MODULE_DEVICE_TABLE(of, sun8i_dwmac_match); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index d74cedf2a397..789dad8a07b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -98,7 +98,7 @@ #define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \ GMAC_INT_PCS_ANE) -#define GMAC_INT_DEFAULT_MASK GMAC_INT_PMT_EN +#define GMAC_INT_DEFAULT_MASK (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN) enum dwmac4_irq_status { time_stamp_irq = 0x00001000, @@ -106,6 +106,7 @@ enum dwmac4_irq_status { mmc_tx_irq = 0x00000400, mmc_rx_irq = 0x00000200, mmc_irq = 0x00000100, + lpi_irq = 0x00000020, pmt_irq = 0x00000010, }; @@ -132,6 +133,10 @@ enum power_event { #define GMAC4_LPI_CTRL_STATUS_LPITXA BIT(19) /* Enable LPI TX Automate */ #define GMAC4_LPI_CTRL_STATUS_PLS BIT(17) /* PHY Link Status */ #define GMAC4_LPI_CTRL_STATUS_LPIEN BIT(16) /* LPI Enable */ +#define GMAC4_LPI_CTRL_STATUS_RLPIEX BIT(3) /* Receive LPI Exit */ +#define GMAC4_LPI_CTRL_STATUS_RLPIEN BIT(2) /* Receive LPI Entry */ +#define GMAC4_LPI_CTRL_STATUS_TLPIEX BIT(1) /* Transmit LPI Exit */ +#define GMAC4_LPI_CTRL_STATUS_TLPIEN BIT(0) /* Transmit LPI Entry */ /* MAC Debug bitmap */ #define GMAC_DEBUG_TFCSTS_MASK GENMASK(18, 17) @@ -225,6 +230,8 @@ enum power_event { #define MTL_CHAN_RX_DEBUG(x) (MTL_CHANX_BASE_ADDR(x) + 0x38) #define MTL_OP_MODE_RSF BIT(5) +#define MTL_OP_MODE_TXQEN_MASK GENMASK(3, 2) +#define MTL_OP_MODE_TXQEN_AV BIT(2) #define MTL_OP_MODE_TXQEN BIT(3) #define MTL_OP_MODE_TSF BIT(1) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 2f7d7ec59962..f3ed8f7853eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -580,6 +580,25 @@ static int dwmac4_irq_status(struct mac_device_info *hw, x->irq_receive_pmt_irq_n++; } + /* MAC tx/rx EEE LPI entry/exit interrupts */ + if (intr_status & lpi_irq) { + /* Clear LPI interrupt by reading MAC_LPI_Control_Status */ + u32 status = readl(ioaddr + GMAC4_LPI_CTRL_STATUS); + + if (status & GMAC4_LPI_CTRL_STATUS_TLPIEN) { + ret |= CORE_IRQ_TX_PATH_IN_LPI_MODE; + x->irq_tx_path_in_lpi_mode_n++; + } + if (status & GMAC4_LPI_CTRL_STATUS_TLPIEX) { + ret |= CORE_IRQ_TX_PATH_EXIT_LPI_MODE; + x->irq_tx_path_exit_lpi_mode_n++; + } + if (status & GMAC4_LPI_CTRL_STATUS_RLPIEN) + x->irq_rx_path_in_lpi_mode_n++; + if (status & GMAC4_LPI_CTRL_STATUS_RLPIEX) + x->irq_rx_path_exit_lpi_mode_n++; + } + dwmac_pcs_isr(ioaddr, GMAC_PCS_BASE, intr_status, x); if (intr_status & PCS_RGSMIIIS_IRQ) dwmac4_phystatus(ioaddr, x); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index e84831e1b63b..c110f6850ffa 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -191,7 +191,7 @@ static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 number_chan) } static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode, - u32 channel, int fifosz) + u32 channel, int fifosz, u8 qmode) { unsigned int rqs = fifosz / 256 - 1; u32 mtl_rx_op, mtl_rx_int; @@ -218,8 +218,10 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode, mtl_rx_op &= ~MTL_OP_MODE_RQS_MASK; mtl_rx_op |= rqs << MTL_OP_MODE_RQS_SHIFT; - /* enable flow control only if each channel gets 4 KiB or more FIFO */ - if (fifosz >= 4096) { + /* Enable flow control only if each channel gets 4 KiB or more FIFO and + * only if channel is not an AVB channel. + */ + if ((fifosz >= 4096) && (qmode != MTL_QUEUE_AVB)) { unsigned int rfd, rfa; mtl_rx_op |= MTL_OP_MODE_EHFC; @@ -271,9 +273,10 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode, } static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode, - u32 channel) + u32 channel, int fifosz, u8 qmode) { u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel)); + unsigned int tqs = fifosz / 256 - 1; if (mode == SF_DMA_MODE) { pr_debug("GMAC: enable TX store and forward mode\n"); @@ -306,12 +309,18 @@ static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode, * For an IP with DWC_EQOS_NUM_TXQ > 1, the fields TXQEN and TQS are R/W * with reset values: TXQEN off, TQS 256 bytes. * - * Write the bits in both cases, since it will have no effect when RO. - * For DWC_EQOS_NUM_TXQ > 1, the top bits in MTL_OP_MODE_TQS_MASK might - * be RO, however, writing the whole TQS field will result in a value - * equal to DWC_EQOS_TXFIFO_SIZE, just like for DWC_EQOS_NUM_TXQ == 1. + * TXQEN must be written for multi-channel operation and TQS must + * reflect the available fifo size per queue (total fifo size / number + * of enabled queues). */ - mtl_tx_op |= MTL_OP_MODE_TXQEN | MTL_OP_MODE_TQS_MASK; + mtl_tx_op &= ~MTL_OP_MODE_TXQEN_MASK; + if (qmode != MTL_QUEUE_AVB) + mtl_tx_op |= MTL_OP_MODE_TXQEN; + else + mtl_tx_op |= MTL_OP_MODE_TXQEN_AV; + mtl_tx_op &= ~MTL_OP_MODE_TQS_MASK; + mtl_tx_op |= tqs << MTL_OP_MODE_TQS_SHIFT; + writel(mtl_tx_op, ioaddr + MTL_CHAN_TX_OP_MODE(channel)); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 16bd50929084..ff4fb5eae1af 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1749,12 +1749,20 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) u32 rx_channels_count = priv->plat->rx_queues_to_use; u32 tx_channels_count = priv->plat->tx_queues_to_use; int rxfifosz = priv->plat->rx_fifo_size; + int txfifosz = priv->plat->tx_fifo_size; u32 txmode = 0; u32 rxmode = 0; u32 chan = 0; + u8 qmode = 0; if (rxfifosz == 0) rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + + /* Adjust for real per queue fifo size */ + rxfifosz /= rx_channels_count; + txfifosz /= tx_channels_count; if (priv->plat->force_thresh_dma_mode) { txmode = tc; @@ -1777,12 +1785,19 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) /* configure all channels */ if (priv->synopsys_id >= DWMAC_CORE_4_00) { - for (chan = 0; chan < rx_channels_count; chan++) + for (chan = 0; chan < rx_channels_count; chan++) { + qmode = priv->plat->rx_queues_cfg[chan].mode_to_use; + priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan, - rxfifosz); + rxfifosz, qmode); + } - for (chan = 0; chan < tx_channels_count; chan++) - priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan); + for (chan = 0; chan < tx_channels_count; chan++) { + qmode = priv->plat->tx_queues_cfg[chan].mode_to_use; + + priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan, + txfifosz, qmode); + } } else { priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode, rxfifosz); @@ -1946,15 +1961,27 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan) static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, u32 rxmode, u32 chan) { + u8 rxqmode = priv->plat->rx_queues_cfg[chan].mode_to_use; + u8 txqmode = priv->plat->tx_queues_cfg[chan].mode_to_use; + u32 rx_channels_count = priv->plat->rx_queues_to_use; + u32 tx_channels_count = priv->plat->tx_queues_to_use; int rxfifosz = priv->plat->rx_fifo_size; + int txfifosz = priv->plat->tx_fifo_size; if (rxfifosz == 0) rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + + /* Adjust for real per queue fifo size */ + rxfifosz /= rx_channels_count; + txfifosz /= tx_channels_count; if (priv->synopsys_id >= DWMAC_CORE_4_00) { priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan, - rxfifosz); - priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan); + rxfifosz, rxqmode); + priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan, + txfifosz, txqmode); } else { priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode, rxfifosz); @@ -2217,10 +2244,8 @@ static void stmmac_init_tx_coalesce(struct stmmac_priv *priv) { priv->tx_coal_frames = STMMAC_TX_FRAMES; priv->tx_coal_timer = STMMAC_COAL_TX_TIMER; - init_timer(&priv->txtimer); + setup_timer(&priv->txtimer, stmmac_tx_timer, (unsigned long)priv); priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer); - priv->txtimer.data = (unsigned long)priv; - priv->txtimer.function = stmmac_tx_timer; add_timer(&priv->txtimer); } @@ -3724,6 +3749,20 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return ret; } +static int stmmac_set_mac_address(struct net_device *ndev, void *addr) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + int ret = 0; + + ret = eth_mac_addr(ndev, addr); + if (ret) + return ret; + + priv->hw->mac->set_umac_addr(priv->hw, ndev->dev_addr, 0); + + return ret; +} + #ifdef CONFIG_DEBUG_FS static struct dentry *stmmac_fs_dir; @@ -3951,7 +3990,7 @@ static const struct net_device_ops stmmac_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = stmmac_poll_controller, #endif - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = stmmac_set_mac_address, }; /** diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 195eb7e71473..05f122b8424a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -318,10 +318,6 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat, bool mdio = true; static const struct of_device_id need_mdio_ids[] = { { .compatible = "snps,dwc-qos-ethernet-4.10" }, - { .compatible = "allwinner,sun8i-a83t-emac" }, - { .compatible = "allwinner,sun8i-h3-emac" }, - { .compatible = "allwinner,sun8i-v3s-emac" }, - { .compatible = "allwinner,sun50i-a64-emac" }, {}, }; diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 382993c1561c..113bd57e2ea0 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -4079,9 +4079,9 @@ done: #endif } -static void cas_link_timer(unsigned long data) +static void cas_link_timer(struct timer_list *t) { - struct cas *cp = (struct cas *) data; + struct cas *cp = from_timer(cp, t, link_timer); int mask, pending = 0, reset = 0; unsigned long flags; @@ -5039,9 +5039,7 @@ static int cas_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&cp->stat_lock[N_TX_RINGS]); mutex_init(&cp->pm_mutex); - init_timer(&cp->link_timer); - cp->link_timer.function = cas_link_timer; - cp->link_timer.data = (unsigned long) cp; + timer_setup(&cp->link_timer, cas_link_timer, 0); #if 1 /* Just in case the implementation of atomic operations diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 5b56c24b6ed2..5ea037672e6f 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -307,7 +307,7 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) /* Get (or create) the vnet associated with this port */ vp = vsw_get_vnet(hp, vdev->mp, &handle); - if (unlikely(IS_ERR(vp))) { + if (IS_ERR(vp)) { err = PTR_ERR(vp); pr_err("Failed to get vnet for vsw-port\n"); mdesc_release(hp); @@ -363,8 +363,7 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) list_add_rcu(&port->list, &vp->port_list); spin_unlock_irqrestore(&vp->lock, flags); - setup_timer(&port->clean_timer, sunvnet_clean_timer_expire_common, - (unsigned long)port); + timer_setup(&port->clean_timer, sunvnet_clean_timer_expire_common, 0); err = register_netdev(dev); if (err) { diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 8ab0fb6892d5..06001bacbe0f 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -2221,9 +2221,9 @@ static int niu_link_status(struct niu *np, int *link_up_p) return err; } -static void niu_timer(unsigned long __opaque) +static void niu_timer(struct timer_list *t) { - struct niu *np = (struct niu *) __opaque; + struct niu *np = from_timer(np, t, timer); unsigned long off; int err, link_up; @@ -6123,10 +6123,8 @@ static int niu_open(struct net_device *dev) err = niu_init_hw(np); if (!err) { - init_timer(&np->timer); + timer_setup(&np->timer, niu_timer, 0); np->timer.expires = jiffies + HZ; - np->timer.data = (unsigned long) np; - np->timer.function = niu_timer; err = niu_enable_interrupts(np, 1); if (err) @@ -6775,10 +6773,8 @@ static int niu_change_mtu(struct net_device *dev, int new_mtu) err = niu_init_hw(np); if (!err) { - init_timer(&np->timer); + timer_setup(&np->timer, niu_timer, 0); np->timer.expires = jiffies + HZ; - np->timer.data = (unsigned long) np; - np->timer.function = niu_timer; err = niu_enable_interrupts(np, 1); if (err) diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index 3189722110c2..0b1f41f6bceb 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -523,9 +523,9 @@ static int try_next_permutation(struct bigmac *bp, void __iomem *tregs) return -1; } -static void bigmac_timer(unsigned long data) +static void bigmac_timer(struct timer_list *t) { - struct bigmac *bp = (struct bigmac *) data; + struct bigmac *bp = from_timer(bp, t, bigmac_timer); void __iomem *tregs = bp->tregs; int restart_timer = 0; @@ -613,8 +613,6 @@ static void bigmac_begin_auto_negotiation(struct bigmac *bp) bp->timer_state = ltrywait; bp->timer_ticks = 0; bp->bigmac_timer.expires = jiffies + (12 * HZ) / 10; - bp->bigmac_timer.data = (unsigned long) bp; - bp->bigmac_timer.function = bigmac_timer; add_timer(&bp->bigmac_timer); } @@ -921,7 +919,7 @@ static int bigmac_open(struct net_device *dev) printk(KERN_ERR "BIGMAC: Can't order irq %d to go.\n", dev->irq); return ret; } - init_timer(&bp->bigmac_timer); + timer_setup(&bp->bigmac_timer, bigmac_timer, 0); ret = bigmac_init_hw(bp, 0); if (ret) free_irq(dev->irq, bp); @@ -1172,7 +1170,7 @@ static int bigmac_ether_init(struct platform_device *op, "board-version", 1); /* Init auto-negotiation timer state. */ - init_timer(&bp->bigmac_timer); + timer_setup(&bp->bigmac_timer, bigmac_timer, 0); bp->timer_state = asleep; bp->timer_ticks = 0; diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index fa607d062cb3..a7afcee3c5ae 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -1496,9 +1496,9 @@ static int gem_mdio_link_not_up(struct gem *gp) } } -static void gem_link_timer(unsigned long data) +static void gem_link_timer(struct timer_list *t) { - struct gem *gp = (struct gem *) data; + struct gem *gp = from_timer(gp, t, link_timer); struct net_device *dev = gp->dev; int restart_aneg = 0; @@ -2910,9 +2910,7 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) gp->msg_enable = DEFAULT_MSG; - init_timer(&gp->link_timer); - gp->link_timer.function = gem_link_timer; - gp->link_timer.data = (unsigned long) gp; + timer_setup(&gp->link_timer, gem_link_timer, 0); INIT_WORK(&gp->reset_task, gem_reset_task); diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index 9e983e1d8249..0431f1e5f511 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -685,9 +685,9 @@ static int is_lucent_phy(struct happy_meal *hp) return ret; } -static void happy_meal_timer(unsigned long data) +static void happy_meal_timer(struct timer_list *t) { - struct happy_meal *hp = (struct happy_meal *) data; + struct happy_meal *hp = from_timer(hp, t, happy_timer); void __iomem *tregs = hp->tcvregs; int restart_timer = 0; @@ -1413,8 +1413,6 @@ force_link: hp->timer_ticks = 0; hp->happy_timer.expires = jiffies + (12 * HZ)/10; /* 1.2 sec. */ - hp->happy_timer.data = (unsigned long) hp; - hp->happy_timer.function = happy_meal_timer; add_timer(&hp->happy_timer); } @@ -2819,7 +2817,7 @@ static int happy_meal_sbus_probe_one(struct platform_device *op, int is_qfe) hp->timer_state = asleep; hp->timer_ticks = 0; - init_timer(&hp->happy_timer); + timer_setup(&hp->happy_timer, happy_meal_timer, 0); hp->dev = dev; dev->netdev_ops = &hme_netdev_ops; @@ -3133,7 +3131,7 @@ static int happy_meal_pci_probe(struct pci_dev *pdev, hp->timer_state = asleep; hp->timer_ticks = 0; - init_timer(&hp->happy_timer); + timer_setup(&hp->happy_timer, happy_meal_timer, 0); hp->irq = pdev->irq; hp->dev = dev; diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 0b95105f7060..27fb22638885 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -492,8 +492,7 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) pr_info("%s: PORT ( remote-mac %pM%s )\n", vp->dev->name, port->raddr, switch_port ? " switch-port" : ""); - setup_timer(&port->clean_timer, sunvnet_clean_timer_expire_common, - (unsigned long)port); + timer_setup(&port->clean_timer, sunvnet_clean_timer_expire_common, 0); napi_enable(&port->napi); vio_port_up(&port->vio); diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index ecf456c7b6d1..8aa3ce46bb81 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1040,9 +1040,9 @@ static inline void vnet_free_skbs(struct sk_buff *skb) } } -void sunvnet_clean_timer_expire_common(unsigned long port0) +void sunvnet_clean_timer_expire_common(struct timer_list *t) { - struct vnet_port *port = (struct vnet_port *)port0; + struct vnet_port *port = from_timer(port, t, clean_timer); struct sk_buff *freeskbs; unsigned pending; diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h index 6a4dd1fb19bf..1ea0b016580a 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.h +++ b/drivers/net/ethernet/sun/sunvnet_common.h @@ -130,7 +130,7 @@ struct vnet { ((__port)->vsw ? (__port)->dev : (__port)->vp->dev) /* Common funcs */ -void sunvnet_clean_timer_expire_common(unsigned long port0); +void sunvnet_clean_timer_expire_common(struct timer_list *t); int sunvnet_open_common(struct net_device *dev); int sunvnet_close_common(struct net_device *dev); void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp); diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c index e9672b1f9968..031cf9c3435a 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c @@ -335,7 +335,7 @@ static int xlgmac_alloc_pages(struct xlgmac_pdata *pdata, dma_addr_t pages_dma; /* Try to obtain pages, decreasing order if necessary */ - gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN; + gfp |= __GFP_COMP | __GFP_NOWARN; while (order >= 0) { pages = alloc_pages(gfp, order); if (pages) diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index ddd43e09111e..cd1185e66133 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -859,9 +859,7 @@ void cpsw_ale_start(struct cpsw_ale *ale) cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1); cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1); - init_timer(&ale->timer); - ale->timer.data = (unsigned long)ale; - ale->timer.function = cpsw_ale_timer; + setup_timer(&ale->timer, cpsw_ale_timer, (unsigned long)ale); if (ale->ageout) { ale->timer.expires = jiffies + ale->ageout; add_timer(&ale->timer); diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 437d36289786..ed58c746e4af 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -906,7 +906,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) sw_data[0] = (u32)bufptr; } else { /* Allocate a secondary receive queue entry */ - page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD); + page = alloc_page(GFP_ATOMIC | GFP_DMA); if (unlikely(!page)) { dev_warn_ratelimited(netcp->ndev_dev, "Secondary page alloc failed\n"); goto fail; @@ -1887,7 +1887,7 @@ static int netcp_setup_tc(struct net_device *dev, enum tc_setup_type type, /* setup tc must be called under rtnl lock */ ASSERT_RTNL(); - if (type != TC_SETUP_MQPRIO) + if (type != TC_SETUP_QDISC_MQPRIO) return -EOPNOTSUPP; mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 28cb38af1a34..4ad821655e51 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -3616,9 +3616,8 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, } spin_unlock_bh(&gbe_dev->hw_stats_lock); - init_timer(&gbe_dev->timer); - gbe_dev->timer.data = (unsigned long)gbe_dev; - gbe_dev->timer.function = netcp_ethss_timer; + setup_timer(&gbe_dev->timer, netcp_ethss_timer, + (unsigned long)gbe_dev); gbe_dev->timer.expires = jiffies + GBE_TIMER_INTERVAL; add_timer(&gbe_dev->timer); *inst_priv = gbe_dev; diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c index c8d53d8c83ee..8f53d762fbc4 100644 --- a/drivers/net/ethernet/ti/tlan.c +++ b/drivers/net/ethernet/ti/tlan.c @@ -172,7 +172,8 @@ static u32 tlan_handle_tx_eoc(struct net_device *, u16); static u32 tlan_handle_status_check(struct net_device *, u16); static u32 tlan_handle_rx_eoc(struct net_device *, u16); -static void tlan_timer(unsigned long); +static void tlan_timer(struct timer_list *t); +static void tlan_phy_monitor(struct timer_list *t); static void tlan_reset_lists(struct net_device *); static void tlan_free_lists(struct net_device *); @@ -190,7 +191,6 @@ static void tlan_phy_power_up(struct net_device *); static void tlan_phy_reset(struct net_device *); static void tlan_phy_start_link(struct net_device *); static void tlan_phy_finish_auto_neg(struct net_device *); -static void tlan_phy_monitor(unsigned long); /* static int tlan_phy_nop(struct net_device *); @@ -254,11 +254,10 @@ tlan_set_timer(struct net_device *dev, u32 ticks, u32 type) spin_unlock_irqrestore(&priv->lock, flags); return; } - priv->timer.function = tlan_timer; + priv->timer.function = (TIMER_FUNC_TYPE)tlan_timer; if (!in_irq()) spin_unlock_irqrestore(&priv->lock, flags); - priv->timer.data = (unsigned long) dev; priv->timer_set_at = jiffies; priv->timer_type = type; mod_timer(&priv->timer, jiffies + ticks); @@ -926,8 +925,8 @@ static int tlan_open(struct net_device *dev) return err; } - init_timer(&priv->timer); - init_timer(&priv->media_timer); + timer_setup(&priv->timer, NULL, 0); + timer_setup(&priv->media_timer, tlan_phy_monitor, 0); tlan_start(dev); @@ -1426,8 +1425,7 @@ static u32 tlan_handle_tx_eof(struct net_device *dev, u16 host_int) tlan_dio_write8(dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT); if (priv->timer.function == NULL) { - priv->timer.function = tlan_timer; - priv->timer.data = (unsigned long) dev; + priv->timer.function = (TIMER_FUNC_TYPE)tlan_timer; priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY; priv->timer_set_at = jiffies; priv->timer_type = TLAN_TIMER_ACTIVITY; @@ -1578,8 +1576,7 @@ drop_and_reuse: tlan_dio_write8(dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT); if (priv->timer.function == NULL) { - priv->timer.function = tlan_timer; - priv->timer.data = (unsigned long) dev; + priv->timer.function = (TIMER_FUNC_TYPE)tlan_timer; priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY; priv->timer_set_at = jiffies; priv->timer_type = TLAN_TIMER_ACTIVITY; @@ -1836,10 +1833,10 @@ ThunderLAN driver timer function * **************************************************************/ -static void tlan_timer(unsigned long data) +static void tlan_timer(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct tlan_priv *priv = netdev_priv(dev); + struct tlan_priv *priv = from_timer(priv, t, timer); + struct net_device *dev = priv->dev; u32 elapsed; unsigned long flags = 0; @@ -1872,7 +1869,6 @@ static void tlan_timer(unsigned long data) tlan_dio_write8(dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK); } else { - priv->timer.function = tlan_timer; priv->timer.expires = priv->timer_set_at + TLAN_TIMER_ACT_DELAY; spin_unlock_irqrestore(&priv->lock, flags); @@ -2317,8 +2313,6 @@ tlan_finish_reset(struct net_device *dev) } else netdev_info(dev, "Link active\n"); /* Enabling link beat monitoring */ - priv->media_timer.function = tlan_phy_monitor; - priv->media_timer.data = (unsigned long) dev; priv->media_timer.expires = jiffies + HZ; add_timer(&priv->media_timer); } @@ -2763,10 +2757,10 @@ static void tlan_phy_finish_auto_neg(struct net_device *dev) * *******************************************************************/ -static void tlan_phy_monitor(unsigned long data) +static void tlan_phy_monitor(struct timer_list *t) { - struct net_device *dev = (struct net_device *) data; - struct tlan_priv *priv = netdev_priv(dev); + struct tlan_priv *priv = from_timer(priv, t, media_timer); + struct net_device *dev = priv->dev; u16 phy; u16 phy_status; diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index cec9e70ab995..a913538d3213 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -2256,16 +2256,14 @@ spider_net_setup_netdev(struct spider_net_card *card) pci_set_drvdata(card->pdev, netdev); - init_timer(&card->tx_timer); - card->tx_timer.function = - (void (*)(unsigned long)) spider_net_cleanup_tx_ring; - card->tx_timer.data = (unsigned long) card; + setup_timer(&card->tx_timer, + (void(*)(unsigned long))spider_net_cleanup_tx_ring, + (unsigned long)card); netdev->irq = card->pdev->irq; card->aneg_count = 0; - init_timer(&card->aneg_timer); - card->aneg_timer.function = spider_net_link_phy; - card->aneg_timer.data = (unsigned long) card; + setup_timer(&card->aneg_timer, spider_net_link_phy, + (unsigned long)card); netif_napi_add(netdev, &card->napi, spider_net_poll, SPIDER_NET_NAPI_WEIGHT); diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index c2d15d9c0c33..0624b71ab5d4 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -164,7 +164,7 @@ static struct platform_driver tsi_eth_driver = { }, }; -static void tsi108_timed_checker(unsigned long dev_ptr); +static void tsi108_timed_checker(struct timer_list *t); #ifdef DEBUG static void dump_eth_one(struct net_device *dev) @@ -1370,7 +1370,7 @@ static int tsi108_open(struct net_device *dev) napi_enable(&data->napi); - setup_timer(&data->timer, tsi108_timed_checker, (unsigned long)dev); + timer_setup(&data->timer, tsi108_timed_checker, 0); mod_timer(&data->timer, jiffies + 1); tsi108_restart_rx(data, dev); @@ -1666,10 +1666,10 @@ regs_fail: * Thus, we have to do it using a timer. */ -static void tsi108_timed_checker(unsigned long dev_ptr) +static void tsi108_timed_checker(struct timer_list *t) { - struct net_device *dev = (struct net_device *)dev_ptr; - struct tsi108_prv_data *data = netdev_priv(dev); + struct tsi108_prv_data *data = from_timer(data, t, timer); + struct net_device *dev = data->dev; tsi108_check_phy(dev); tsi108_check_rxring(dev); |